diff --git a/.gitignore b/.gitignore index de4b66da3c3..d67eb0f8f62 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,6 @@ .project testing/ .settings/ +/.vs +/out/build +/CMakeSettings.json diff --git a/.jenkins b/.jenkins index 67491f174cb..c23bd8ac193 100644 --- a/.jenkins +++ b/.jenkins @@ -9,6 +9,38 @@ pipeline { stages { stage('Build') { parallel { + stage('HIP-3.1-HCC') { + agent { + dockerfile { + filename 'Dockerfile.hipcc' + dir 'scripts/docker' + additionalBuildArgs '--pull --build-arg BASE=rocm/dev-ubuntu-18.04:3.1' + label 'rocm-docker && vega' + args '-v /tmp/ccache.kokkos:/tmp/ccache --device=/dev/kfd --device=/dev/dri --security-opt seccomp=unconfined --group-add video' + } + } + steps { + sh 'ccache --zero-stats' + sh '''rm -rf build && mkdir -p build && cd build && \ + cmake \ + -DCMAKE_BUILD_TYPE=Debug \ + -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ + -DCMAKE_CXX_COMPILER=hipcc \ + -DCMAKE_CXX_FLAGS="-Werror -Wno-unused-command-line-argument -Wno-braced-scalar-init" \ + -DKokkos_ENABLE_COMPILER_WARNINGS=ON \ + -DKokkos_ENABLE_TESTS=ON \ + -DKokkos_ENABLE_HIP=ON \ + -DKokkos_ENABLE_LIBDL=OFF \ + -DKokkos_ENABLE_PROFILING=OFF \ + .. && \ + make -j8 && ctest --output-on-failure''' + } + post { + always { + sh 'ccache --show-stats' + } + } + } stage('CUDA-9.2-Clang') { agent { dockerfile { diff --git a/.travis.yml b/.travis.yml index 1d8cb0b3b42..d1dca455b4d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -31,8 +31,8 @@ branches: env: - - - BACKEND="OPENMP" -# - BACKEND="PTHREAD" +# - BACKEND="OPENMP" + - BACKEND="PTHREAD" - CMAKE_BUILD_TYPE=Debug COVERAGE=yes GTEST_FILTER="-*DeathTest*" - CMAKE_BUILD_TYPE=Debug BACKEND="OPENMP" COVERAGE=yes GTEST_FILTER="-*DeathTest*" # - CMAKE_BUILD_TYPE=Debug BACKEND="PTHREAD" COVERAGE=yes @@ -46,9 +46,6 @@ matrix: - os: osx compiler: gcc # Apple Clang doesn't support OpenMP - - os: osx - compiler: clang - env: BACKEND="OPENMP" - os: osx compiler: clang env: CMAKE_BUILD_TYPE=Debug BACKEND="OPENMP" COVERAGE=yes GTEST_FILTER="-*DeathTest*" @@ -66,6 +63,7 @@ before_script: export HOMEBREW_NO_AUTO_UPDATE=1; brew ls --versions ccache > /dev/null || brew install ccache; export PATH=/usr/local/opt/ccache/libexec:$PATH; + if [[ ${BACKEND} == "OPENMP" ]]; then brew install libomp; fi fi - ccache -z - if [[ ${COVERAGE} ]]; then export CXX="${CXX} --coverage"; fi diff --git a/BUILD.md b/BUILD.md index c4d6c98365d..63dbf7fdb22 100644 --- a/BUILD.md +++ b/BUILD.md @@ -18,7 +18,7 @@ Then for every executable or library in your project: target_link_libraries(myTarget Kokkos::kokkos) ```` That's it! There is no checking Kokkos preprocessor, compiler, or linker flags. -Kokkos propagates all the necesssary flags to your project. +Kokkos propagates all the necessary flags to your project. This means not only is linking to Kokkos easy, but Kokkos itself can actually configure compiler and linker flags for *your* project. If building in-tree, there is no `find_package` and you link with `target_link_libraries(kokkos)`. @@ -40,6 +40,13 @@ cmake ${srcdir} \ ```` which activates the OpenMP backend. All of the options controlling device backends, options, architectures, and third-party libraries (TPLs) are given below. +## Platform-specific Problems + +### Cray + +* The Cray compiler wrappers do static linking by default. This seems to break the Kokkos build. You will likely need to set the environment variable `CRAYPE_LINK_TYPE=dynamic` in order to link correctly. Kokkos warns during configure if this is missing. +* The Cray compiler identifies to CMake as Clang, but it sometimes has its own flags that differ from Clang. We try to include all exceptions, but flag errors may occur in which a Clang-specific flag is passed that the Cray compiler does not recognize. + ## Spack An alternative to manually building with the CMake is to use the Spack package manager. To do so, download the `kokkos-spack` git repo and add to the package list: @@ -63,6 +70,7 @@ For a complete list of Kokkos options, run: ```` spack info kokkos ```` +More details can be found in the kokkos-spack repository [README](https://github.com/kokkos/kokkos-spack/blob/master/README.md). #### Spack Development Spack currently installs packages to a location determined by a unique hash. This hash name is not really "human readable". @@ -73,37 +81,13 @@ spack find -p kokkos ... ```` where `...` is the unique spec identifying the particular Kokkos configuration and version. -A better way to use Spack for doing Kokkos development is the DIY feature of Spack. -If you wish to develop Kokkos itself, go to the Kokkos source folder: -```` -spack diy -u cmake kokkos@diy ... -```` -where `...` is a Spack spec identifying the exact Kokkos configuration. -This then creates a `spack-build` directory where you can run `make`. - -If doing development on a downstream project, you can do almost exactly the same thing. -```` -spack diy -u cmake ${myproject}@${myversion} ... ^kokkos... -```` -where the `...` are the specs for your project and the desired Kokkos configuration. -Again, a `spack-build` directory will be created where you can run `make`. - -Spack has a few idiosyncracies that make building outside of Spack annoying related to Spack forcing use of a compiler wrapper. This can be worked around by having a `-DSpack_WORKAROUND=On` given your CMake. Then add the block of code to your CMakeLists.txt: - -```` -if (Spack_WORKAROUND) - set(SPACK_CXX $ENV{SPACK_CXX}) - if(SPACK_CXX) - set(CMAKE_CXX_COMPILER ${SPACK_CXX} CACHE STRING "the C++ compiler" FORCE) - set(ENV{CXX} ${SPACK_CXX}) - endif() -endif() -```` +A better way to use Spack for doing Kokkos development is the dev-build feature of Spack. +For dev-build details, consult the kokkos-spack repository [README](https://github.com/kokkos/kokkos-spack/blob/master/README.md). # Kokkos Keyword Listing ## Device Backends -Device backends can be enabled by specifiying `-DKokkos_ENABLE_X`. +Device backends can be enabled by specifying `-DKokkos_ENABLE_X`. * Kokkos_ENABLE_CUDA * Whether to build CUDA backend @@ -122,7 +106,7 @@ Device backends can be enabled by specifiying `-DKokkos_ENABLE_X`. * BOOL Default: ON ## Enable Options -Options can be enabled by specifiying `-DKokkos_ENABLE_X`. +Options can be enabled by specifying `-DKokkos_ENABLE_X`. * Kokkos_ENABLE_AGGRESSIVE_VECTORIZATION * Whether to aggressively vectorize loops @@ -157,6 +141,9 @@ Options can be enabled by specifiying `-DKokkos_ENABLE_X`. * Kokkos_ENABLE_DEPRECATED_CODE * Whether to enable deprecated code * BOOL Default: OFF +* Kokkos_ENABLE_EXAMPLES + * Whether to enable building examples + * BOOL Default: OFF * Kokkos_ENABLE_HPX_ASYNC_DISPATCH * Whether HPX supports asynchronous dispatch * BOOL Default: OFF @@ -225,7 +212,7 @@ The following options control `find_package` paths for CMake-based TPLs: * PATH Default: ## Architecture Keywords -Architecture-specific optimizations can be enabled by specifiying `-DKokkos_ARCH_X`. +Architecture-specific optimizations can be enabled by specifying `-DKokkos_ARCH_X`. * Kokkos_ARCH_AMDAVX * Whether to optimize for the AMDAVX architecture diff --git a/CHANGELOG.md b/CHANGELOG.md index 459aeb3d2ea..149e63ca91c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,59 @@ # Change Log +## [3.1.00](https://github.com/kokkos/kokkos/tree/3.1.00) (2020-04-14) +[Full Changelog](https://github.com/kokkos/kokkos/compare/3.0.00...3.1.00) + +**Features:** + +- HIP Support for AMD +- OpenMPTarget Support with clang +- Windows VS19 (Serial) Support [\#1533](https://github.com/kokkos/kokkos/issues/1533) + +**Implemented enhancements:** + +- generate\_makefile.bash should allow tests to be disabled [\#2886](https://github.com/kokkos/kokkos/issues/2886) +- clang/7+cuda/9 build -Werror-unused parameter error in nightly test [\#2884](https://github.com/kokkos/kokkos/issues/2884) +- ScatterView memory space is not user settable [\#2826](https://github.com/kokkos/kokkos/issues/2826) +- clang/8+cuda/10.0 build error with c++17 [\#2809](https://github.com/kokkos/kokkos/issues/2809) +- warnings.... [\#2805](https://github.com/kokkos/kokkos/issues/2805) +- Kokkos version in cpp define [\#2787](https://github.com/kokkos/kokkos/issues/2787) +- Remove Defunct QThreads Backend [\#2751](https://github.com/kokkos/kokkos/issues/2751) +- Improve Kokkos::fence behavior with multiple execution spaces [\#2659](https://github.com/kokkos/kokkos/issues/2659) +- polylithic\(?\) initialization of Kokkos [\#2658](https://github.com/kokkos/kokkos/issues/2658) +- Unnecessary\(?\) check for host execution space initialization from Cuda initialization [\#2652](https://github.com/kokkos/kokkos/issues/2652) +- Kokkos error reporting failures with CUDA GPUs in exclusive mode [\#2471](https://github.com/kokkos/kokkos/issues/2471) +- atomicMax equivalent \(and other atomics\) [\#2401](https://github.com/kokkos/kokkos/issues/2401) +- Fix alignment for Kokkos::complex [\#2255](https://github.com/kokkos/kokkos/issues/2255) +- Warnings with Cuda 10.1 [\#2206](https://github.com/kokkos/kokkos/issues/2206) +- dual view with Kokkos::ViewAllocateWithoutInitializing [\#2188](https://github.com/kokkos/kokkos/issues/2188) +- Check error code from cudaOccupancyMaxActiveBlocksPerMultiprocessor [\#2172](https://github.com/kokkos/kokkos/issues/2172) +- Add non-member Kokkos::resize/realloc for DualView [\#2170](https://github.com/kokkos/kokkos/issues/2170) +- Construct DualView without initialization [\#2046](https://github.com/kokkos/kokkos/issues/2046) +- Expose is\_assignable to determine if one view can be assigned to another [\#1936](https://github.com/kokkos/kokkos/issues/1936) +- profiling label [\#1935](https://github.com/kokkos/kokkos/issues/1935) +- team\_broadcast of bool failed on CUDA backend [\#1908](https://github.com/kokkos/kokkos/issues/1908) +- View static\_extent [\#660](https://github.com/kokkos/kokkos/issues/660) +- Misleading Kokkos::Cuda::initialize ERROR message when compiled for wrong GPU architecture [\#1944](https://github.com/kokkos/kokkos/issues/1944) +- Cryptic Error When Malloc Fails [\#2164](https://github.com/kokkos/kokkos/issues/2164) +- Drop support for intermediate standards in CMake [\#2336](https://github.com/kokkos/kokkos/issues/2336) + +**Fixed bugs:** + +- DualView sync\_device with length zero creates cuda errors [\#2946](https://github.com/kokkos/kokkos/issues/2946) +- building with nvcc and clang \(or clang based XL\) as host compiler: "Kokkos::atomic\_fetch\_min\(volatile int \*, int\)" has already been defined [\#2903](https://github.com/kokkos/kokkos/issues/2903) +- Cuda 9.1,10.1 debug builds failing due to -Werror=unused-parameter [\#2880](https://github.com/kokkos/kokkos/issues/2880) +- clang -Werror: Kokkos\_FixedBufferMemoryPool.hpp:140:28: error: unused parameter 'alloc\_size' [\#2869](https://github.com/kokkos/kokkos/issues/2869) +- intel/16.0.1, intel/17.0.1 nightly build failures with debugging enabled [\#2867](https://github.com/kokkos/kokkos/issues/2867) +- intel/16.0.1 debug build errors [\#2863](https://github.com/kokkos/kokkos/issues/2863) +- xl/16.1.1 with cpp14, openmp build, nightly test failures [\#2856](https://github.com/kokkos/kokkos/issues/2856) +- Intel nightly test failures: team\_vector [\#2852](https://github.com/kokkos/kokkos/issues/2852) +- Kokkos Views with intmax/2\_ROOT variables") CMAKE_POLICY(SET CMP0074 NEW) ENDIF() -# Load either the real TriBITS or a TriBITS wrapper +# Load either the real TriBITS or a TriBITS wrapper # for certain utility functions that are universal (like GLOBAL_SET) INCLUDE(${KOKKOS_SRC_PATH}/cmake/fake_tribits.cmake) @@ -118,18 +129,14 @@ ENDIF() # These are the variables we will append to as we go # I really wish these were regular variables # but scoping issues can make it difficult -GLOBAL_RESET(KOKKOS_COMPILE_OPTIONS) -GLOBAL_RESET(KOKKOS_LINK_OPTIONS) -GLOBAL_RESET(KOKKOS_CUDA_OPTIONS) -GLOBAL_RESET(KOKKOS_CUDAFE_OPTIONS) -GLOBAL_RESET(KOKKOS_XCOMPILER_OPTIONS) +GLOBAL_SET(KOKKOS_COMPILE_OPTIONS) +GLOBAL_SET(KOKKOS_LINK_OPTIONS) +GLOBAL_SET(KOKKOS_CUDA_OPTIONS) +GLOBAL_SET(KOKKOS_CUDAFE_OPTIONS) +GLOBAL_SET(KOKKOS_XCOMPILER_OPTIONS) # We need to append text here for making sure TPLs # we import are available for an installed Kokkos -GLOBAL_RESET(KOKKOS_TPL_EXPORTS) -# We need these for controlling the exact -std flag -GLOBAL_RESET(KOKKOS_DONT_ALLOW_EXTENSIONS) -GLOBAL_RESET(KOKKOS_USE_CXX_EXTENSIONS) -GLOBAL_RESET(KOKKOS_CXX_STANDARD_FEATURE) +GLOBAL_SET(KOKKOS_TPL_EXPORTS) # Include a set of Kokkos-specific wrapper functions that # will either call raw CMake or TriBITS @@ -137,6 +144,9 @@ GLOBAL_RESET(KOKKOS_CXX_STANDARD_FEATURE) INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_tribits.cmake) +# Check the environment and set certain variables +# to allow platform-specific checks +INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_check_env.cmake) # The build environment setup goes in the following steps # 1) Check all the enable options. This includes checking Kokkos_DEVICES # 2) Check the compiler ID (type and version) @@ -187,14 +197,21 @@ IF (KOKKOS_HAS_TRILINOS) # Because Tribits doesn't use lists, it uses spaces for the list of CXX flags # we have to match the annoying behavior STRING(REPLACE ";" " " KOKKOSCORE_COMPILE_OPTIONS "${KOKKOS_COMPILE_OPTIONS}") - STRING(REPLACE ";" " " KOKKOSCORE_CUDA_OPTIONS "${KOKKOS_CUDA_OPTIONS}") - FOREACH(CUDAFE_FLAG ${KOKKOS_CUDAFE_OPTIONS}) - SET(KOKKOSCORE_CUDAFE_OPTIONS "${KOKKOSCORE_CUDAFE_OPTIONS} -Xcudafe ${CUDAFE_FLAG}") - ENDFOREACH() + LIST(APPEND KOKKOS_ALL_COMPILE_OPTIONS ${KOKKOS_COMPILE_OPTIONS}) + LIST(APPEND KOKKOS_ALL_COMPILE_OPTIONS ${KOKKOS_CUDA_OPTIONS}) FOREACH(XCOMP_FLAG ${KOKKOS_XCOMPILER_OPTIONS}) SET(KOKKOSCORE_XCOMPILER_OPTIONS "${KOKKOSCORE_XCOMPILER_OPTIONS} -Xcompiler ${XCOMP_FLAG}") + LIST(APPEND KOKKOS_ALL_COMPILE_OPTIONS -Xcompiler ${XCOMP_FLAG}) ENDFOREACH() - SET(KOKKOSCORE_CXX_FLAGS "${KOKKOSCORE_COMPILE_OPTIONS} ${CMAKE_CXX${KOKKOS_CXX_STANDARD}_STANDARD_COMPILE_OPTION} ${KOKKOSCORE_CUDA_OPTIONS} ${KOKKOSCORE_CUDAFE_OPTIONS} ${KOKKOSCORE_XCOMPILER_OPTIONS}") + SET(KOKKOSCORE_CXX_FLAGS "${KOKKOSCORE_COMPILE_OPTIONS} ${CMAKE_CXX${KOKKOS_CXX_STANDARD}_STANDARD_COMPILE_OPTION} ${KOKKOSCORE_XCOMPILER_OPTIONS}") + IF (KOKKOS_ENABLE_CUDA) + STRING(REPLACE ";" " " KOKKOSCORE_CUDA_OPTIONS "${KOKKOS_CUDA_OPTIONS}") + FOREACH(CUDAFE_FLAG ${KOKKOS_CUDAFE_OPTIONS}) + SET(KOKKOSCORE_CUDAFE_OPTIONS "${KOKKOSCORE_CUDAFE_OPTIONS} -Xcudafe ${CUDAFE_FLAG}") + LIST(APPEND KOKKOS_ALL_COMPILE_OPTIONS -Xcudafe ${CUDAFE_FLAG}) + ENDFOREACH() + SET(KOKKOSCORE_CXX_FLAGS "${KOKKOSCORE_CXX_FLAGS} ${KOKKOSCORE_CUDA_OPTIONS} ${KOKKOSCORE_CUDAFE_OPTIONS}") + ENDIF() # Both parent scope and this package # In ProjectCompilerPostConfig.cmake, we capture the "global" flags Trilinos wants in # TRILINOS_TOPLEVEL_CXX_FLAGS @@ -203,6 +220,8 @@ IF (KOKKOS_HAS_TRILINOS) #CMAKE_CXX_FLAGS will get added to Kokkos and Kokkos dependencies automatically here #These flags get set up in KOKKOS_PACKAGE_DECL, which means they #must be configured before KOKKOS_PACKAGE_DECL + SET(KOKKOS_ALL_COMPILE_OPTIONS + $<$:${KOKKOS_ALL_COMPILE_OPTIONS}>) ENDIF() KOKKOS_PACKAGE_DECL() @@ -250,7 +269,7 @@ INSTALL(FILES "${CMAKE_CURRENT_BINARY_DIR}/KokkosCore_config.h" DESTINATION ${CM IF (HAS_PARENT) FOREACH(DEV Kokkos_ENABLED_DEVICES) #I would much rather not make these cache variables or global properties, but I can't - #make any guarantees on whether PARENT_SCOPE is good enough to make + #make any guarantees on whether PARENT_SCOPE is good enough to make #these variables visible where I need them SET(Kokkos_ENABLE_${DEV} ON PARENT_SCOPE) SET_PROPERTY(GLOBAL PROPERTY Kokkos_ENABLE_${DEV} ON) diff --git a/Copyright.txt b/Copyright.txt index 06184796b26..5e2f8d8647b 100644 --- a/Copyright.txt +++ b/Copyright.txt @@ -1,13 +1,13 @@ //@HEADER // ************************************************************************ -// +// // Kokkos v. 3.0 // Copyright (2020) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). -// +// // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,6 +36,6 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// +// // ************************************************************************ //@HEADER diff --git a/Makefile.kokkos b/Makefile.kokkos index 77147d59c8b..afb3a371e9c 100644 --- a/Makefile.kokkos +++ b/Makefile.kokkos @@ -1,6 +1,11 @@ # Default settings common options. -# Options: Cuda,ROCm,OpenMP,Pthread,Qthreads,Serial +KOKKOS_VERSION_MAJOR = 3 +KOKKOS_VERSION_MINOR = 1 +KOKKOS_VERSION_PATCH = 0 +KOKKOS_VERSION = $(shell echo $(KOKKOS_VERSION_MAJOR)*10000+$(KOKKOS_VERSION_MINOR)*100+$(KOKKOS_VERSION_PATCH) | bc) + +# Options: Cuda,HIP,ROCm,OpenMP,Pthread,Serial #KOKKOS_DEVICES ?= "OpenMP" KOKKOS_DEVICES ?= "Pthread" # Options: @@ -8,7 +13,7 @@ KOKKOS_DEVICES ?= "Pthread" # NVIDIA: Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal60,Pascal61,Volta70,Volta72,Turing75 # ARM: ARMv80,ARMv81,ARMv8-ThunderX,ARMv8-TX2 # IBM: BGQ,Power7,Power8,Power9 -# AMD-GPUS: Kaveri,Carrizo,Fiji,Vega +# AMD-GPUS: Vega900,Vega906 # AMD-CPUS: AMDAVX,Ryzen,EPYC KOKKOS_ARCH ?= "" # Options: yes,no @@ -29,6 +34,9 @@ KOKKOS_STANDALONE_CMAKE ?= "no" # Options: force_uvm,use_ldg,rdc,enable_lambda,enable_constexpr KOKKOS_CUDA_OPTIONS ?= "" +# Options: rdc +KOKKOS_HIP_OPTIONS ?= "" + # Default settings specific options. # Options: enable_async_dispatch KOKKOS_HPX_OPTIONS ?= "" @@ -76,29 +84,50 @@ KOKKOS_INTERNAL_CUDA_USE_CONSTEXPR := $(call kokkos_has_string,$(KOKKOS_CUDA_OPT KOKKOS_INTERNAL_HPX_ENABLE_ASYNC_DISPATCH := $(call kokkos_has_string,$(KOKKOS_HPX_OPTIONS),enable_async_dispatch) KOKKOS_INTERNAL_ENABLE_ETI := $(call kokkos_has_string,$(KOKKOS_OPTIONS),enable_eti) +KOKKOS_INTERNAL_HIP_USE_RELOC := $(call kokkos_has_string,$(KOKKOS_HIP_OPTIONS),rdc) # Check for Kokkos Host Execution Spaces one of which must be on. KOKKOS_INTERNAL_USE_OPENMP := $(call kokkos_has_string,$(subst OpenMPTarget,,$(KOKKOS_DEVICES)),OpenMP) KOKKOS_INTERNAL_USE_PTHREADS := $(call kokkos_has_string,$(KOKKOS_DEVICES),Pthread) -KOKKOS_INTERNAL_USE_QTHREADS := $(call kokkos_has_string,$(KOKKOS_DEVICES),Qthreads) KOKKOS_INTERNAL_USE_HPX := $(call kokkos_has_string,$(KOKKOS_DEVICES),HPX) KOKKOS_INTERNAL_USE_SERIAL := $(call kokkos_has_string,$(KOKKOS_DEVICES),Serial) ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 0) ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 0) - ifeq ($(KOKKOS_INTERNAL_USE_QTHREADS), 0) - ifeq ($(KOKKOS_INTERNAL_USE_HPX), 0) - KOKKOS_INTERNAL_USE_SERIAL := 1 - endif + ifeq ($(KOKKOS_INTERNAL_USE_HPX), 0) + KOKKOS_INTERNAL_USE_SERIAL := 1 endif endif endif # Check for other Execution Spaces. KOKKOS_INTERNAL_USE_CUDA := $(call kokkos_has_string,$(KOKKOS_DEVICES),Cuda) -KOKKOS_INTERNAL_USE_ROCM := $(call kokkos_has_string,$(KOKKOS_DEVICES),ROCm) +KOKKOS_INTERNAL_USE_HIP := $(call kokkos_has_string,$(KOKKOS_DEVICES),HIP) KOKKOS_INTERNAL_USE_OPENMPTARGET := $(call kokkos_has_string,$(KOKKOS_DEVICES),OpenMPTarget) +KOKKOS_DEVICELIST = +ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1) + KOKKOS_DEVICELIST += Serial +endif +ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) + KOKKOS_DEVICELIST += OpenMP +endif +ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1) + KOKKOS_DEVICELIST += Threads +endif +ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1) + KOKKOS_DEVICELIST += HPX +endif +ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) + KOKKOS_DEVICELIST += Cuda +endif +ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1) + KOKKOS_DEVICELIST += HIP +endif +ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1) + KOKKOS_DEVICELIST += OPENMPTARGET +endif + ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) KOKKOS_INTERNAL_NVCC_PATH := $(shell which nvcc) ifeq ($(origin CUDA_PATH), undefined) @@ -122,10 +151,11 @@ KOKKOS_INTERNAL_COMPILER_INTEL := $(call kokkos_has_string,$(KOKKOS_CXX_VE KOKKOS_INTERNAL_COMPILER_PGI := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),PGI) KOKKOS_INTERNAL_COMPILER_XL := $(strip $(shell $(CXX) -qversion 2>&1 | grep XL | wc -l)) KOKKOS_INTERNAL_COMPILER_CRAY := $(strip $(shell $(CXX) -craype-verbose 2>&1 | grep "CC-" | wc -l)) -KOKKOS_INTERNAL_COMPILER_NVCC := $(strip $(shell export OMPI_CXX=$(OMPI_CXX); export MPICH_CXX=$(MPICH_CXX); echo "$(shell $(CXX) --version 2>&1 | grep nvcc | wc -l)>0" | bc)) +KOKKOS_INTERNAL_COMPILER_NVCC := $(strip $(shell echo "$(shell export OMPI_CXX=$(OMPI_CXX); export MPICH_CXX=$(MPICH_CXX); $(CXX) --version 2>&1 | grep nvcc | wc -l)>0" | bc)) KOKKOS_INTERNAL_COMPILER_CLANG := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),clang) KOKKOS_INTERNAL_COMPILER_APPLE_CLANG := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),Apple LLVM) KOKKOS_INTERNAL_COMPILER_HCC := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),HCC) +KOKKOS_INTERNAL_COMPILER_GCC := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),GCC) # Check Host Compiler if using NVCC through nvcc_wrapper ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1) @@ -174,20 +204,20 @@ ifeq ($(KOKKOS_INTERNAL_ENABLE_COMPILER_WARNINGS), 1) KOKKOS_INTERNAL_COMPILER_WARNINGS = else ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) - KOKKOS_INTERNAL_COMPILER_WARNINGS = -Wall -Wshadow -pedantic -Wsign-compare -Wtype-limits -Wuninitialized + KOKKOS_INTERNAL_COMPILER_WARNINGS = -Wall -Wunused-parameter -Wshadow -pedantic -Wsign-compare -Wtype-limits -Wuninitialized else ifeq ($(KOKKOS_INTERNAL_COMPILER_APPLE_CLANG), 1) - KOKKOS_INTERNAL_COMPILER_WARNINGS = -Wall -Wshadow -pedantic -Wsign-compare -Wtype-limits -Wuninitialized + KOKKOS_INTERNAL_COMPILER_WARNINGS = -Wall -Wunused-parameter -Wshadow -pedantic -Wsign-compare -Wtype-limits -Wuninitialized else ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 1) - KOKKOS_INTERNAL_COMPILER_WARNINGS = -Wall -Wshadow -pedantic -Wsign-compare -Wtype-limits -Wuninitialized + KOKKOS_INTERNAL_COMPILER_WARNINGS = -Wall -Wunused-parameter -Wshadow -pedantic -Wsign-compare -Wtype-limits -Wuninitialized else ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) # TODO check if cray accepts GNU style warnings KOKKOS_INTERNAL_COMPILER_WARNINGS = else #gcc - KOKKOS_INTERNAL_COMPILER_WARNINGS = -Wall -Wshadow -pedantic -Wsign-compare -Wtype-limits -Wignored-qualifiers -Wempty-body -Wclobbered -Wuninitialized + KOKKOS_INTERNAL_COMPILER_WARNINGS = -Wall -Wunused-parameter -Wshadow -pedantic -Wsign-compare -Wtype-limits -Wignored-qualifiers -Wempty-body -Wclobbered -Wuninitialized endif endif endif @@ -224,7 +254,12 @@ ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 1) KOKKOS_INTERNAL_OPENMPTARGET_FLAG := -DKOKKOS_IBM_XL_OMP45_WORKAROUND -qsmp=omp -qoffload -qnoeh else ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) - KOKKOS_INTERNAL_OPENMPTARGET_FLAG := -DKOKKOS_BUG_WORKAROUND_IBM_CLANG_OMP45_VIEW_INIT -fopenmp-implicit-declare-target -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp -fopenmp=libomp + #KOKKOS_INTERNAL_OPENMPTARGET_FLAG := -DKOKKOS_BUG_WORKAROUND_IBM_CLANG_OMP45_VIEW_INIT -fopenmp-implicit-declare-target -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp -fopenmp=libomp + KOKKOS_INTERNAL_OPENMPTARGET_FLAG := -DKOKKOS_WORKAROUND_OPENMPTARGET_CLANG -fopenmp -fopenmp=libomp + KOKKOS_INTERNAL_OPENMPTARGET_LIB := -lomptarget + else + #Assume GCC + KOKKOS_INTERNAL_OPENMPTARGET_FLAG := -fopenmp -foffload=nvptx-none endif endif @@ -347,11 +382,8 @@ KOKKOS_INTERNAL_USE_ARCH_IBM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_ KOKKOS_INTERNAL_USE_ARCH_AMDAVX := $(call kokkos_has_string,$(KOKKOS_ARCH),AMDAVX) KOKKOS_INTERNAL_USE_ARCH_RYZEN := $(call kokkos_has_string,$(KOKKOS_ARCH),Ryzen) KOKKOS_INTERNAL_USE_ARCH_EPYC := $(call kokkos_has_string,$(KOKKOS_ARCH),EPYC) -KOKKOS_INTERNAL_USE_ARCH_KAVERI := $(call kokkos_has_string,$(KOKKOS_ARCH),Kaveri) -KOKKOS_INTERNAL_USE_ARCH_CARRIZO := $(call kokkos_has_string,$(KOKKOS_ARCH),Carrizo) -KOKKOS_INTERNAL_USE_ARCH_FIJI := $(call kokkos_has_string,$(KOKKOS_ARCH),Fiji) -KOKKOS_INTERNAL_USE_ARCH_VEGA := $(call kokkos_has_string,$(KOKKOS_ARCH),Vega) -KOKKOS_INTERNAL_USE_ARCH_GFX901 := $(call kokkos_has_string,$(KOKKOS_ARCH),gfx901) +KOKKOS_INTERNAL_USE_ARCH_VEGA900 := $(call kokkos_has_string,$(KOKKOS_ARCH),Vega900) +KOKKOS_INTERNAL_USE_ARCH_VEGA906 := $(call kokkos_has_string,$(KOKKOS_ARCH),Vega906) # Any AVX? KOKKOS_INTERNAL_USE_ARCH_SSE42 := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_WSM)) @@ -424,6 +456,10 @@ tmp := $(call kokkos_append_header,'\#error "Do not include $(KOKKOS_CONFIG_HEAD tmp := $(call kokkos_append_header,'\#else') tmp := $(call kokkos_append_header,'\#define KOKKOS_CORE_CONFIG_H') tmp := $(call kokkos_append_header,'\#endif') + +tmp := $(call kokkos_append_header,"") +tmp := $(call kokkos_append_header,"\#define KOKKOS_VERSION $(KOKKOS_VERSION)") +tmp := $(call kokkos_append_header,"") tmp := $(call kokkos_append_header,"/* Execution Spaces */") @@ -436,9 +472,15 @@ ifeq ($(KOKKOS_INTERNAL_USE_ROCM), 1) tmp := $(call kokkos_append_header,'\#define KOKKOS_ENABLE_ROCM') tmp := $(call kokkos_append_header,'\#define KOKKOS_IMPL_ROCM_CLANG_WORKAROUND 1') endif +ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1) + tmp := $(call kokkos_append_header,'\#define KOKKOS_ENABLE_HIP') +endif ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1) tmp := $(call kokkos_append_header,'\#define KOKKOS_ENABLE_OPENMPTARGET') + ifeq ($(KOKKOS_INTERNAL_COMPILER_GCC), 1) + tmp := $(call kokkos_append_header,"\#define KOKKOS_WORKAROUND_OPENMPTARGET_GCC") + endif endif ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) @@ -449,10 +491,6 @@ ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1) tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_THREADS") endif -ifeq ($(KOKKOS_INTERNAL_USE_QTHREADS), 1) - tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_QTHREADS") -endif - ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1) tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_HPX") endif @@ -960,6 +998,14 @@ endif # Figure out the architecture flag for Cuda. ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) + KOKKOS_INTERNAL_USE_CUDA_ARCH=1 +endif +ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1) + ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1) + KOKKOS_INTERNAL_USE_CUDA_ARCH=1 + endif +endif +ifeq ($(KOKKOS_INTERNAL_USE_CUDA_ARCH), 1) ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1) KOKKOS_INTERNAL_CUDA_ARCH_FLAG=-arch else ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) @@ -968,7 +1014,17 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) else $(error Makefile.kokkos: CUDA is enabled but the compiler is neither NVCC nor Clang (got version string $(KOKKOS_CXX_VERSION)) ) endif + KOKKOS_INTERNAL_USE_CUDA_ARCH = 1 +endif +ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1) + ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) + KOKKOS_INTERNAL_CUDA_ARCH_FLAG=-fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march + endif + KOKKOS_INTERNAL_USE_CUDA_ARCH = 1 +endif + +ifeq ($(KOKKOS_INTERNAL_USE_CUDA_ARCH), 1) ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER30), 1) tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_KEPLER") tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_KEPLER30") @@ -1036,55 +1092,49 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1) KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG) endif + ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) + ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1) + KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG) + endif + endif + endif + ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1) + KOKKOS_CXXFLAGS += --expt-extended-lambda endif endif + # Figure out the architecture flag for ROCm. -ifeq ($(KOKKOS_INTERNAL_USE_ROCM), 1) +ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1) # Lets start with adding architecture defines - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KAVERI), 1) - tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_ROCM 701") - tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_KAVERI") - KOKKOS_INTERNAL_ROCM_ARCH_FLAG := --amdgpu-target=gfx701 - endif - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_CARRIZO), 1) - tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_ROCM 801") - tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_CARRIZO") - KOKKOS_INTERNAL_ROCM_ARCH_FLAG := --amdgpu-target=gfx801 - endif - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_FIJI), 1) - tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_ROCM 803") - tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_FIJI") - KOKKOS_INTERNAL_ROCM_ARCH_FLAG := --amdgpu-target=gfx803 + ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VEGA900), 1) + tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_HIP 900") + tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_VEGA900") + KOKKOS_INTERNAL_HIP_ARCH_FLAG := --amdgpu-target=gfx900 endif - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VEGA), 1) - tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_ROCM 900") - tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_VEGA") - KOKKOS_INTERNAL_ROCM_ARCH_FLAG := --amdgpu-target=gfx900 + ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VEGA906), 1) + tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_HIP 906") + tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_VEGA906") + KOKKOS_INTERNAL_HIP_ARCH_FLAG := --amdgpu-target=gfx906 endif - ifeq ($(KOKKOS_INTERNAL_USE_ARCH_GFX901), 1) - tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_ROCM 901") - tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_GFX901") - KOKKOS_INTERNAL_ROCM_ARCH_FLAG := --amdgpu-target=gfx901 + + KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/HIP/*.cpp) + KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/HIP/*.hpp) + + KOKKOS_CXXFLAGS+=$(KOKKOS_INTERNAL_HIP_ARCH_FLAG) + KOKKOS_LDFLAGS+=$(KOKKOS_INTERNAL_HIP_ARCH_FLAG) + + ifeq ($(KOKKOS_INTERNAL_HIP_USE_RELOC), 1) + tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_HIP_RELOCATABLE_DEVICE_CODE") + KOKKOS_CXXFLAGS+=-fgpu-rdc + KOKKOS_LDFLAGS+=-fgpu-rdc + else + KOKKOS_CXXFLAGS+=-fno-gpu-rdc + KOKKOS_LDFLAGS+=-fno-gpu-rdc endif - - - KOKKOS_INTERNAL_HCC_PATH := $(shell which $(CXX)) - ROCM_HCC_PATH ?= $(KOKKOS_INTERNAL_HCC_PATH:/bin/clang++=) - - KOKKOS_CXXFLAGS += $(shell $(ROCM_HCC_PATH)/bin/hcc-config --cxxflags) - KOKKOS_LDFLAGS += $(shell $(ROCM_HCC_PATH)/bin/hcc-config --ldflags) -lhc_am -lm - KOKKOS_CXXLDFLAGS += $(shell $(ROCM_HCC_PATH)/bin/hcc-config --ldflags) -lhc_am -lm - KOKKOS_TPL_LIBRARY_NAMES += hc_am m - KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_ROCM_ARCH_FLAG) - - KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/ROCm/*.cpp) -ifeq ($(KOKKOS_INTERNAL_ENABLE_ETI), 1) - KOKKOS_SRC += $(wildcard $(KOKKOS_ETI_PATH)/ROCm/*.cpp) -endif - KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/ROCm/*.hpp) endif + KOKKOS_INTERNAL_LS_CONFIG := $(shell ls KokkosCore_config.h 2>&1) ifeq ($(KOKKOS_INTERNAL_LS_CONFIG), KokkosCore_config.h) @@ -1135,7 +1185,7 @@ endif endif ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1) - KOKKOS_SRC += $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Exec.cpp $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTargetSpace.cpp + KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/OpenMPTarget/*.cpp) KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/OpenMPTarget/*.hpp) ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) KOKKOS_CXXFLAGS += -Xcompiler $(KOKKOS_INTERNAL_OPENMPTARGET_FLAG) @@ -1143,6 +1193,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1) KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_OPENMPTARGET_FLAG) endif KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_OPENMPTARGET_FLAG) + KOKKOS_LIBS += $(KOKKOS_INTERNAL_OPENMPTARGET_LIB) endif ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) @@ -1172,22 +1223,6 @@ endif KOKKOS_TPL_LIBRARY_NAMES += pthread endif -ifeq ($(KOKKOS_INTERNAL_USE_QTHREADS), 1) - KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Qthreads/*.cpp) - KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Qthreads/*.hpp) - ifneq ($(KOKKOS_CMAKE), yes) - ifneq ($(QTHREADS_PATH),) - KOKKOS_CPPFLAGS += -I$(QTHREADS_PATH)/include - KOKKOS_LIBDIRS += -L$(QTHREADS_PATH)/lib - KOKKOS_CXXLDFLAGS += -L$(QTHREADS_PATH)/lib - KOKKOS_TPL_INCLUDE_DIRS += $(QTHREADS_PATH)/include - KOKKOS_TPL_LIBRARY_DIRS += $(QTHREADS_PATH)/lib64 - endif - KOKKOS_LIBS += -lqthread - KOKKOS_TPL_LIBRARY_NAMES += qthread - endif -endif - ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1) KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/HPX/*.cpp) KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/HPX/*.hpp) diff --git a/Makefile.targets b/Makefile.targets index 0a1f5220161..18e37a71f71 100644 --- a/Makefile.targets +++ b/Makefile.targets @@ -55,6 +55,17 @@ ifeq ($(KOKKOS_INTERNAL_ENABLE_ETI), 1) endif endif +ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1) +Kokkos_HIP_Space.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP_Space.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP_Space.cpp +Kokkos_HIP_Instance.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP_Instance.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP_Instance.cpp +Kokkos_HIP_KernelLaunch.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP_KernelLaunch.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP_KernelLaunch.cpp +Kokkos_HIP_Locks.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP_Locks.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/HIP/Kokkos_HIP_Locks.cpp +endif + ifeq ($(KOKKOS_INTERNAL_USE_ROCM), 1) Kokkos_ROCm_Exec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/ROCm/Kokkos_ROCm_Exec.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/ROCm/Kokkos_ROCm_Exec.cpp @@ -79,13 +90,6 @@ ifeq ($(KOKKOS_INTERNAL_ENABLE_ETI), 1) endif endif -ifeq ($(KOKKOS_INTERNAL_USE_QTHREADS), 1) -Kokkos_QthreadsExec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Qthreads/Kokkos_QthreadsExec.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Qthreads/Kokkos_QthreadsExec.cpp -Kokkos_Qthreads_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Qthreads/Kokkos_Qthreads_Task.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Qthreads/Kokkos_Qthreads_Task.cpp -endif - ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) Kokkos_OpenMP_Exec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Exec.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Exec.cpp @@ -106,10 +110,12 @@ endif ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1) Kokkos_OpenMPTarget_Exec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Exec.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Exec.cpp +Kokkos_OpenMPTarget_Instance.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Instance.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Instance.cpp Kokkos_OpenMPTargetSpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTargetSpace.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTargetSpace.cpp -#Kokkos_OpenMPTarget_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Task.cpp -# $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Task.cpp +Kokkos_OpenMPTarget_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Task.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Task.cpp endif Kokkos_HBWSpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HBWSpace.cpp diff --git a/README.md b/README.md index 322dabfdab7..a04df9eb9b6 100644 --- a/README.md +++ b/README.md @@ -11,8 +11,8 @@ CUDA, HPX, OpenMP and Pthreads as backend programming models with several other backends in development. Kokkos Core is part of the Kokkos C++ Performance Portability Programming EcoSystem, -which also provides math kernels (https://github.com/kokkos/kokkos-kernels), as well as -profiling and debugging tools (https://github.com/kokkos/kokkos-tools). +which also provides math kernels (https://github.com/kokkos/kokkos-kernels), as well as +profiling and debugging tools (https://github.com/kokkos/kokkos-tools). # Learning about Kokkos @@ -23,7 +23,7 @@ For questions find us on Slack: https://kokkosteam.slack.com or open a github is For non-public questions send an email to crtrott(at)sandia.gov -A separate repository with extensive tutorial material can be found under +A separate repository with extensive tutorial material can be found under https://github.com/kokkos/kokkos-tutorials. Furthermore, the 'example/tutorial' directory provides step by step tutorial @@ -41,12 +41,12 @@ To learn more about Kokkos consider watching one of our presentations: # Contributing to Kokkos -We are open and try to encourage contributions from external developers. +We are open and try to encourage contributions from external developers. To do so please first open an issue describing the contribution and then issue a pull request against the develop branch. For larger features it may be good -to get guidance from the core development team first through the github issue. +to get guidance from the core development team first through the github issue. -Note that Kokkos Core is licensed under standard 3-clause BSD terms of use. +Note that Kokkos Core is licensed under standard 3-clause BSD terms of use. Which means contributing to Kokkos allows anyone else to use your contributions not just for public purposes but also for closed source commercial projects. For specifics see the LICENSE file contained in the repository or distribution. @@ -94,9 +94,9 @@ For specifics see the LICENSE file contained in the repository or distribution. * Intel 18.2.199 (with gcc 4.9.3) ### Primary tested compilers on ARM (Cavium ThunderX2) -* GCC 7.2.0 +* GCC 7.2.0 * ARM/Clang 18.4.0 - + ### Other compilers working: * X86: * Cygwin 2.1.0 64bit with gcc 4.9.3 @@ -110,47 +110,47 @@ For specifics see the LICENSE file contained in the repository or distribution. Primary tested compiler are passing in release mode -with warnings as errors. They also are tested with a comprehensive set of +with warnings as errors. They also are tested with a comprehensive set of backend combinations (i.e. OpenMP, Pthreads, Serial, OpenMP+Serial, ...). We are using the following set of flags: -* GCC: +* GCC: ```` - -Wall -Wshadow -pedantic + -Wall -Wunused-parameter -Wshadow -pedantic -Werror -Wsign-compare -Wtype-limits - -Wignored-qualifiers -Wempty-body + -Wignored-qualifiers -Wempty-body -Wclobbered -Wuninitialized ```` -* Intel: +* Intel: ```` - -Wall -Wshadow -pedantic - -Werror -Wsign-compare -Wtype-limits + -Wall -Wunused-parameter -Wshadow -pedantic + -Werror -Wsign-compare -Wtype-limits -Wuninitialized ```` -* Clang: +* Clang: ```` - -Wall -Wshadow -pedantic - -Werror -Wsign-compare -Wtype-limits + -Wall -Wunused-parameter -Wshadow -pedantic + -Werror -Wsign-compare -Wtype-limits -Wuninitialized - ```` + ```` -* NVCC: +* NVCC: ```` - -Wall -Wshadow -pedantic - -Werror -Wsign-compare -Wtype-limits + -Wall -Wunused-parameter -Wshadow -pedantic + -Werror -Wsign-compare -Wtype-limits -Wuninitialized ```` -Other compilers are tested occasionally, in particular when pushing from develop to +Other compilers are tested occasionally, in particular when pushing from develop to master branch. These are tested less rigorously without `-Werror` and only for a select set of backends. # Building and Installing Kokkos -Kokkos provide a CMake build system and a raw Makefile build system. +Kokkos provide a CMake build system and a raw Makefile build system. The CMake build system is strongly encouraged and will be the most rigorously supported in future releases. Full details are given in the [build instructions](BUILD.md). Basic setups are shown here: ## CMake -The best way to install Kokkos is using the CMake build system. Assuming Kokkos lives in `$srcdir`: +The best way to install Kokkos is using the CMake build system. Assuming Kokkos lives in `$srcdir`: ```` cmake $srcdir \ -DCMAKE_CXX_COMPILER=$path_to_compiler \ @@ -162,9 +162,9 @@ cmake $srcdir \ ```` then simply type `make install`. The Kokkos CMake package will then be installed in `$path_to_install` to be used by downstream packages. -To validate the Kokkos build, configure with +To validate the Kokkos build, configure with ```` - -DKokkos_ENABLE_TESTS=On + -DKokkos_ENABLE_TESTS=On ```` and run `make test` after completing the build. @@ -209,7 +209,7 @@ For a complete list of Kokkos options, run: spack info kokkos ```` Spack currently installs packages to a location determined by a unique hash. This hash name is not really "human readable". -Generally, Spack usage should never really require you to reference the computer-generated unique install folder. +Generally, Spack usage should never really require you to reference the computer-generated unique install folder. More details are given in the [build instructions](BUILD.md). If you must know, you can locate Spack Kokkos installations with: ```` spack find -p kokkos ... @@ -217,7 +217,7 @@ spack find -p kokkos ... where `...` is the unique spec identifying the particular Kokkos configuration and version. -## Raw Makefile +## Raw Makefile A bash script is provided to generate raw makefiles. To install Kokkos as a library create a build directory and run the following ```` @@ -240,33 +240,33 @@ changing the device type for which to build. For individual projects, it may be preferable to build Kokkos inline rather than link to an installed package. The main reason is that you may otherwise need many different configurations of Kokkos installed depending on the required compile time -features an application needs. For example there is only one default +features an application needs. For example there is only one default execution space, which means you need different installations to have OpenMP or Pthreads as the default space. Also for the CUDA backend there are certain -choices, such as allowing relocatable device code, which must be made at +choices, such as allowing relocatable device code, which must be made at installation time. Building Kokkos inline uses largely the same process -as compiling an application against an installed Kokkos library. +as compiling an application against an installed Kokkos library. For CMake, this means copying over the Kokkos source code into your project and adding `add_subdirectory(kokkos)` to your CMakeLists.txt. -For raw Makefiles, see the example benchmarks/bytes_and_flops/Makefile which can be used with an installed library and or an inline build. +For raw Makefiles, see the example benchmarks/bytes_and_flops/Makefile which can be used with an installed library and or an inline build. # Kokkos and CUDA UVM -Kokkos does support UVM as a specific memory space called CudaUVMSpace. -Allocations made with that space are accessible from host and device. +Kokkos does support UVM as a specific memory space called CudaUVMSpace. +Allocations made with that space are accessible from host and device. You can tell Kokkos to use that as the default space for Cuda allocations. In either case UVM comes with a number of restrictions: -* You can't access allocations on the host while a kernel is potentially -running. This will lead to segfaults. To avoid that you either need to +* You can't access allocations on the host while a kernel is potentially +running. This will lead to segfaults. To avoid that you either need to call Kokkos::Cuda::fence() (or just Kokkos::fence()), after kernels, or you can set the environment variable CUDA_LAUNCH_BLOCKING=1. -* In multi socket multi GPU machines without NVLINK, UVM defaults +* In multi socket multi GPU machines without NVLINK, UVM defaults to using zero copy allocations for technical reasons related to using multiple GPUs from the same process. If an executable doesn't do that (e.g. each -MPI rank of an application uses a single GPU [can be the same GPU for +MPI rank of an application uses a single GPU [can be the same GPU for multiple MPI ranks]) you can set CUDA_MANAGED_FORCE_DEVICE_ALLOC=1. -This will enforce proper UVM allocations, but can lead to errors if +This will enforce proper UVM allocations, but can lead to errors if more than a single GPU is used by a single process. diff --git a/algorithms/src/Kokkos_Random.hpp b/algorithms/src/Kokkos_Random.hpp index 078db18eddf..0a796752365 100644 --- a/algorithms/src/Kokkos_Random.hpp +++ b/algorithms/src/Kokkos_Random.hpp @@ -537,6 +537,145 @@ struct rand > { } }; +template +class Random_XorShift1024_Pool; + +namespace Impl { + +template +struct Random_XorShift1024_State { + uint64_t state_[16]; + KOKKOS_DEFAULTED_FUNCTION + Random_XorShift1024_State() = default; + + template + KOKKOS_FUNCTION Random_XorShift1024_State(const StateViewType& v, + int state_idx) { + for (int i = 0; i < 16; i++) state_[i] = v(state_idx, i); + } + + KOKKOS_FUNCTION + uint64_t operator[](const int i) const { return state_[i]; } + + KOKKOS_FUNCTION + uint64_t& operator[](const int i) { return state_[i]; } +}; + +template <> +struct Random_XorShift1024_State { + uint64_t* state_; + const int stride_; + KOKKOS_FUNCTION + Random_XorShift1024_State() : state_(nullptr), stride_(1){}; + + template + KOKKOS_FUNCTION Random_XorShift1024_State(const StateViewType& v, + int state_idx) + : state_(&v(state_idx, 0)), stride_(v.stride_1()) {} + + KOKKOS_FUNCTION + uint64_t operator[](const int i) const { return state_[i * stride_]; } + + KOKKOS_FUNCTION + uint64_t& operator[](const int i) { return state_[i * stride_]; } +}; + +template +struct Random_XorShift1024_UseCArrayState : std::true_type {}; + +#ifdef KOKKOS_ENABLE_CUDA +template <> +struct Random_XorShift1024_UseCArrayState : std::false_type {}; +#endif +#ifdef KOKKOS_ENABLE_HIP +template <> +struct Random_XorShift1024_UseCArrayState + : std::false_type {}; +#endif +#ifdef KOKKOS_ENABLE_OPENMPTARGET +template <> +struct Random_XorShift1024_UseCArrayState + : std::false_type {}; +#endif + +template +struct Random_UniqueIndex { + using locks_view_type = View; + KOKKOS_FUNCTION + static int get_state_idx(const locks_view_type) { +#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE + const int i = ExecutionSpace::hardware_thread_id(); +#else + const int i = ExecutionSpace::impl_hardware_thread_id(); +#endif + return i; +#else + return 0; +#endif + } +}; + +#ifdef KOKKOS_ENABLE_CUDA +template <> +struct Random_UniqueIndex { + using locks_view_type = View; + KOKKOS_FUNCTION + static int get_state_idx(const locks_view_type& locks_) { +#ifdef __CUDA_ARCH__ + const int i_offset = + (threadIdx.x * blockDim.y + threadIdx.y) * blockDim.z + threadIdx.z; + int i = (((blockIdx.x * gridDim.y + blockIdx.y) * gridDim.z + blockIdx.z) * + blockDim.x * blockDim.y * blockDim.z + + i_offset) % + locks_.extent(0); + while (Kokkos::atomic_compare_exchange(&locks_(i), 0, 1)) { + i += blockDim.x * blockDim.y * blockDim.z; + if (i >= static_cast(locks_.extent(0))) { + i = i_offset; + } + } + return i; +#else + (void)locks_; + return 0; +#endif + } +}; +#endif + +#ifdef KOKKOS_ENABLE_HIP +template <> +struct Random_UniqueIndex { + using locks_view_type = View; + KOKKOS_FUNCTION + static int get_state_idx(const locks_view_type& locks_) { +#ifdef __HIP_DEVICE_COMPILE__ + const int i_offset = + (hipThreadIdx_x * hipBlockDim_y + hipThreadIdx_y) * hipBlockDim_z + + hipThreadIdx_z; + int i = (((hipBlockIdx_x * hipGridDim_y + hipBlockIdx_y) * hipGridDim_z + + hipBlockIdx_z) * + hipBlockDim_x * hipBlockDim_y * hipBlockDim_z + + i_offset) % + locks_.extent(0); + while (Kokkos::atomic_compare_exchange(&locks_(i), 0, 1)) { + i += hipBlockDim_x * hipBlockDim_y * hipBlockDim_z; + if (i >= static_cast(locks_.extent(0))) { + i = i_offset; + } + } + return i; +#else + (void)locks_; + return 0; +#endif + } +}; +#endif + +} // namespace Impl + template class Random_XorShift64_Pool; @@ -550,10 +689,10 @@ class Random_XorShift64 { public: typedef DeviceType device_type; - enum { MAX_URAND = 0xffffffffU }; - enum { MAX_URAND64 = 0xffffffffffffffffULL - 1 }; - enum { MAX_RAND = static_cast(0xffffffff / 2) }; - enum { MAX_RAND64 = static_cast(0xffffffffffffffffLL / 2 - 1) }; + constexpr static uint32_t MAX_URAND = std::numeric_limits::max(); + constexpr static uint64_t MAX_URAND64 = std::numeric_limits::max(); + constexpr static int32_t MAX_RAND = std::numeric_limits::max(); + constexpr static int64_t MAX_RAND64 = std::numeric_limits::max(); KOKKOS_INLINE_FUNCTION Random_XorShift64(uint64_t state, int state_idx = 0) @@ -637,10 +776,12 @@ class Random_XorShift64 { } KOKKOS_INLINE_FUNCTION - float frand() { return 1.0f * urand64() / MAX_URAND64; } + float frand() { return urand64() / static_cast(MAX_URAND64); } KOKKOS_INLINE_FUNCTION - float frand(const float& range) { return range * urand64() / MAX_URAND64; } + float frand(const float& range) { + return range * urand64() / static_cast(MAX_URAND64); + } KOKKOS_INLINE_FUNCTION float frand(const float& start, const float& end) { @@ -648,10 +789,12 @@ class Random_XorShift64 { } KOKKOS_INLINE_FUNCTION - double drand() { return 1.0 * urand64() / MAX_URAND64; } + double drand() { return urand64() / static_cast(MAX_URAND64); } KOKKOS_INLINE_FUNCTION - double drand(const double& range) { return range * urand64() / MAX_URAND64; } + double drand(const double& range) { + return range * urand64() / static_cast(MAX_URAND64); + } KOKKOS_INLINE_FUNCTION double drand(const double& start, const double& end) { @@ -662,6 +805,11 @@ class Random_XorShift64 { // number KOKKOS_INLINE_FUNCTION double normal() { +#ifndef __HIP_DEVICE_COMPILE__ // FIXME_HIP + using std::sqrt; +#else + using ::sqrt; +#endif double S = 2.0; double U; while (S >= 1.0) { @@ -669,7 +817,7 @@ class Random_XorShift64 { const double V = 2.0 * drand() - 1.0; S = U * U + V * V; } - return U * std::sqrt(-2.0 * log(S) / S); + return U * sqrt(-2.0 * log(S) / S); } KOKKOS_INLINE_FUNCTION @@ -681,9 +829,10 @@ class Random_XorShift64 { template class Random_XorShift64_Pool { private: - typedef View lock_type; + using execution_space = typename DeviceType::execution_space; + typedef View locks_type; typedef View state_data_type; - lock_type locks_; + locks_type locks_; state_data_type state_; int num_states_; @@ -695,11 +844,8 @@ class Random_XorShift64_Pool { Random_XorShift64_Pool() { num_states_ = 0; } Random_XorShift64_Pool(uint64_t seed) { num_states_ = 0; -#ifdef KOKKOS_ENABLE_DEPRECATED_CODE - init(seed, DeviceType::max_hardware_threads()); -#else - init(seed, DeviceType::impl_max_hardware_threads()); -#endif + + init(seed, execution_space().concurrency()); } KOKKOS_INLINE_FUNCTION @@ -719,11 +865,11 @@ class Random_XorShift64_Pool { num_states_ = num_states; - locks_ = lock_type("Kokkos::Random_XorShift64::locks", num_states_); + locks_ = locks_type("Kokkos::Random_XorShift64::locks", num_states_); state_ = state_data_type("Kokkos::Random_XorShift64::state", num_states_); typename state_data_type::HostMirror h_state = create_mirror_view(state_); - typename lock_type::HostMirror h_lock = create_mirror_view(locks_); + typename locks_type::HostMirror h_lock = create_mirror_view(locks_); // Execute on the HostMirror's default execution space. Random_XorShift64 @@ -746,13 +892,8 @@ class Random_XorShift64_Pool { KOKKOS_INLINE_FUNCTION Random_XorShift64 get_state() const { -#ifdef KOKKOS_ENABLE_DEPRECATED_CODE - const int i = DeviceType::hardware_thread_id(); - ; -#else - const int i = DeviceType::impl_hardware_thread_id(); - ; -#endif + const int i = + Impl::Random_UniqueIndex::get_state_idx(locks_); return Random_XorShift64(state_(i), i); } @@ -765,35 +906,35 @@ class Random_XorShift64_Pool { KOKKOS_INLINE_FUNCTION void free_state(const Random_XorShift64& state) const { state_(state.state_idx_) = state.state_; + locks_(state.state_idx_) = 0; } }; -template -class Random_XorShift1024_Pool; - template class Random_XorShift1024 { + using execution_space = typename DeviceType::execution_space; + private: int p_; const int state_idx_; - uint64_t state_[16]; + Impl::Random_XorShift1024_State< + Impl::Random_XorShift1024_UseCArrayState::value> + state_; friend class Random_XorShift1024_Pool; public: typedef Random_XorShift1024_Pool pool_type; typedef DeviceType device_type; - enum { MAX_URAND = 0xffffffffU }; - enum { MAX_URAND64 = 0xffffffffffffffffULL - 1 }; - enum { MAX_RAND = static_cast(0xffffffffU / 2) }; - enum { MAX_RAND64 = static_cast(0xffffffffffffffffULL / 2 - 1) }; + constexpr static uint32_t MAX_URAND = std::numeric_limits::max(); + constexpr static uint64_t MAX_URAND64 = std::numeric_limits::max(); + constexpr static int32_t MAX_RAND = std::numeric_limits::max(); + constexpr static int64_t MAX_RAND64 = std::numeric_limits::max(); KOKKOS_INLINE_FUNCTION Random_XorShift1024(const typename pool_type::state_data_type& state, int p, int state_idx = 0) - : p_(p), state_idx_(state_idx) { - for (int i = 0; i < 16; i++) state_[i] = state(state_idx, i); - } + : p_(p), state_idx_(state_idx), state_(state, state_idx) {} KOKKOS_INLINE_FUNCTION uint32_t urand() { @@ -876,10 +1017,12 @@ class Random_XorShift1024 { } KOKKOS_INLINE_FUNCTION - float frand() { return 1.0f * urand64() / MAX_URAND64; } + float frand() { return urand64() / static_cast(MAX_URAND64); } KOKKOS_INLINE_FUNCTION - float frand(const float& range) { return range * urand64() / MAX_URAND64; } + float frand(const float& range) { + return range * urand64() / static_cast(MAX_URAND64); + } KOKKOS_INLINE_FUNCTION float frand(const float& start, const float& end) { @@ -887,10 +1030,12 @@ class Random_XorShift1024 { } KOKKOS_INLINE_FUNCTION - double drand() { return 1.0 * urand64() / MAX_URAND64; } + double drand() { return urand64() / static_cast(MAX_URAND64); } KOKKOS_INLINE_FUNCTION - double drand(const double& range) { return range * urand64() / MAX_URAND64; } + double drand(const double& range) { + return range * urand64() / static_cast(MAX_URAND64); + } KOKKOS_INLINE_FUNCTION double drand(const double& start, const double& end) { @@ -901,6 +1046,11 @@ class Random_XorShift1024 { // number KOKKOS_INLINE_FUNCTION double normal() { +#ifndef KOKKOS_ENABLE_HIP // FIXME_HIP + using std::sqrt; +#else + using ::sqrt; +#endif double S = 2.0; double U; while (S >= 1.0) { @@ -908,7 +1058,7 @@ class Random_XorShift1024 { const double V = 2.0 * drand() - 1.0; S = U * U + V * V; } - return U * std::sqrt(-2.0 * log(S) / S); + return U * sqrt(-2.0 * log(S) / S); } KOKKOS_INLINE_FUNCTION @@ -920,10 +1070,12 @@ class Random_XorShift1024 { template class Random_XorShift1024_Pool { private: + using execution_space = typename DeviceType::execution_space; + typedef View locks_type; typedef View int_view_type; typedef View state_data_type; - int_view_type locks_; + locks_type locks_; state_data_type state_; int_view_type p_; int num_states_; @@ -939,11 +1091,8 @@ class Random_XorShift1024_Pool { inline Random_XorShift1024_Pool(uint64_t seed) { num_states_ = 0; -#ifdef KOKKOS_ENABLE_DEPRECATED_CODE - init(seed, DeviceType::max_hardware_threads()); -#else - init(seed, DeviceType::impl_max_hardware_threads()); -#endif + + init(seed, execution_space().concurrency()); } KOKKOS_INLINE_FUNCTION @@ -965,12 +1114,12 @@ class Random_XorShift1024_Pool { inline void init(uint64_t seed, int num_states) { if (seed == 0) seed = uint64_t(1318319); num_states_ = num_states; - locks_ = int_view_type("Kokkos::Random_XorShift1024::locks", num_states_); + locks_ = locks_type("Kokkos::Random_XorShift1024::locks", num_states_); state_ = state_data_type("Kokkos::Random_XorShift1024::state", num_states_); p_ = int_view_type("Kokkos::Random_XorShift1024::p", num_states_); typename state_data_type::HostMirror h_state = create_mirror_view(state_); - typename int_view_type::HostMirror h_lock = create_mirror_view(locks_); + typename locks_type::HostMirror h_lock = create_mirror_view(locks_); typename int_view_type::HostMirror h_p = create_mirror_view(p_); // Execute on the HostMirror's default execution space. @@ -997,11 +1146,8 @@ class Random_XorShift1024_Pool { KOKKOS_INLINE_FUNCTION Random_XorShift1024 get_state() const { -#ifdef KOKKOS_ENABLE_DEPRECATED_CODE - const int i = DeviceType::hardware_thread_id(); -#else - const int i = DeviceType::impl_hardware_thread_id(); -#endif + const int i = + Impl::Random_UniqueIndex::get_state_idx(locks_); return Random_XorShift1024(state_, p_(i), i); }; @@ -1014,482 +1160,11 @@ class Random_XorShift1024_Pool { KOKKOS_INLINE_FUNCTION void free_state(const Random_XorShift1024& state) const { for (int i = 0; i < 16; i++) state_(state.state_idx_, i) = state.state_[i]; - p_(state.state_idx_) = state.p_; + p_(state.state_idx_) = state.p_; + locks_(state.state_idx_) = 0; } }; -#if defined(KOKKOS_ENABLE_CUDA) && defined(__CUDACC__) - -template <> -class Random_XorShift1024 { - private: - int p_; - const int state_idx_; - uint64_t* state_; - const int stride_; - friend class Random_XorShift1024_Pool; - - public: - typedef Kokkos::Cuda device_type; - typedef Random_XorShift1024_Pool pool_type; - - enum { MAX_URAND = 0xffffffffU }; - enum { MAX_URAND64 = 0xffffffffffffffffULL - 1 }; - enum { MAX_RAND = static_cast(0xffffffffU / 2) }; - enum { MAX_RAND64 = static_cast(0xffffffffffffffffULL / 2 - 1) }; - - KOKKOS_INLINE_FUNCTION - Random_XorShift1024(const typename pool_type::state_data_type& state, int p, - int state_idx = 0) - : p_(p), - state_idx_(state_idx), - state_(&state(state_idx, 0)), - stride_(state.stride_1()) {} - - KOKKOS_INLINE_FUNCTION - uint32_t urand() { - uint64_t state_0 = state_[p_ * stride_]; - uint64_t state_1 = state_[(p_ = (p_ + 1) & 15) * stride_]; - state_1 ^= state_1 << 31; - state_1 ^= state_1 >> 11; - state_0 ^= state_0 >> 30; - uint64_t tmp = - (state_[p_ * stride_] = state_0 ^ state_1) * 1181783497276652981ULL; - tmp = tmp >> 16; - return static_cast(tmp & MAX_URAND); - } - - KOKKOS_INLINE_FUNCTION - uint64_t urand64() { - uint64_t state_0 = state_[p_ * stride_]; - uint64_t state_1 = state_[(p_ = (p_ + 1) & 15) * stride_]; - state_1 ^= state_1 << 31; - state_1 ^= state_1 >> 11; - state_0 ^= state_0 >> 30; - return ((state_[p_ * stride_] = state_0 ^ state_1) * - 1181783497276652981LL) - - 1; - } - - KOKKOS_INLINE_FUNCTION - uint32_t urand(const uint32_t& range) { - const uint32_t max_val = (MAX_URAND / range) * range; - uint32_t tmp = urand(); - while (tmp >= max_val) urand(); - return tmp % range; - } - - KOKKOS_INLINE_FUNCTION - uint32_t urand(const uint32_t& start, const uint32_t& end) { - return urand(end - start) + start; - } - - KOKKOS_INLINE_FUNCTION - uint64_t urand64(const uint64_t& range) { - const uint64_t max_val = (MAX_URAND64 / range) * range; - uint64_t tmp = urand64(); - while (tmp >= max_val) urand64(); - return tmp % range; - } - - KOKKOS_INLINE_FUNCTION - uint64_t urand64(const uint64_t& start, const uint64_t& end) { - return urand64(end - start) + start; - } - - KOKKOS_INLINE_FUNCTION - int rand() { return static_cast(urand() / 2); } - - KOKKOS_INLINE_FUNCTION - int rand(const int& range) { - const int max_val = (MAX_RAND / range) * range; - int tmp = rand(); - while (tmp >= max_val) rand(); - return tmp % range; - } - - KOKKOS_INLINE_FUNCTION - int rand(const int& start, const int& end) { - return rand(end - start) + start; - } - - KOKKOS_INLINE_FUNCTION - int64_t rand64() { return static_cast(urand64() / 2); } - - KOKKOS_INLINE_FUNCTION - int64_t rand64(const int64_t& range) { - const int64_t max_val = (MAX_RAND64 / range) * range; - int64_t tmp = rand64(); - while (tmp >= max_val) rand64(); - return tmp % range; - } - - KOKKOS_INLINE_FUNCTION - int64_t rand64(const int64_t& start, const int64_t& end) { - return rand64(end - start) + start; - } - - KOKKOS_INLINE_FUNCTION - float frand() { return 1.0f * urand64() / MAX_URAND64; } - - KOKKOS_INLINE_FUNCTION - float frand(const float& range) { return range * urand64() / MAX_URAND64; } - - KOKKOS_INLINE_FUNCTION - float frand(const float& start, const float& end) { - return frand(end - start) + start; - } - - KOKKOS_INLINE_FUNCTION - double drand() { return 1.0 * urand64() / MAX_URAND64; } - - KOKKOS_INLINE_FUNCTION - double drand(const double& range) { return range * urand64() / MAX_URAND64; } - - KOKKOS_INLINE_FUNCTION - double drand(const double& start, const double& end) { - return frand(end - start) + start; - } - - // Marsaglia polar method for drawing a standard normal distributed random - // number - KOKKOS_INLINE_FUNCTION - double normal() { - double S = 2.0; - double U; - while (S >= 1.0) { - U = 2.0 * drand() - 1.0; - const double V = 2.0 * drand() - 1.0; - S = U * U + V * V; - } - return U * std::sqrt(-2.0 * log(S) / S); - } - - KOKKOS_INLINE_FUNCTION - double normal(const double& mean, const double& std_dev = 1.0) { - return mean + normal() * std_dev; - } -}; - -template <> -inline Random_XorShift64_Pool::Random_XorShift64_Pool( - uint64_t seed) { - num_states_ = 0; - init(seed, 4 * 32768); -} - -template <> -KOKKOS_INLINE_FUNCTION Random_XorShift64 -Random_XorShift64_Pool::get_state() const { -#ifdef __CUDA_ARCH__ - const int i_offset = - (threadIdx.x * blockDim.y + threadIdx.y) * blockDim.z + threadIdx.z; - int i = (((blockIdx.x * gridDim.y + blockIdx.y) * gridDim.z + blockIdx.z) * - blockDim.x * blockDim.y * blockDim.z + - i_offset) % - num_states_; - while (Kokkos::atomic_compare_exchange(&locks_(i), 0, 1)) { - i += blockDim.x * blockDim.y * blockDim.z; - if (i >= num_states_) { - i = i_offset; - } - } - - return Random_XorShift64(state_(i), i); -#else - return Random_XorShift64(state_(0), 0); -#endif -} - -template <> -KOKKOS_INLINE_FUNCTION void Random_XorShift64_Pool::free_state( - const Random_XorShift64& state) const { - state_(state.state_idx_) = state.state_; -#ifdef __CUDA_ARCH__ - locks_(state.state_idx_) = 0; - return; -#endif -} - -template <> -inline Random_XorShift1024_Pool::Random_XorShift1024_Pool( - uint64_t seed) { - num_states_ = 0; - init(seed, 4 * 32768); -} - -template <> -KOKKOS_INLINE_FUNCTION Random_XorShift1024 -Random_XorShift1024_Pool::get_state() const { -#ifdef __CUDA_ARCH__ - const int i_offset = - (threadIdx.x * blockDim.y + threadIdx.y) * blockDim.z + threadIdx.z; - int i = (((blockIdx.x * gridDim.y + blockIdx.y) * gridDim.z + blockIdx.z) * - blockDim.x * blockDim.y * blockDim.z + - i_offset) % - num_states_; - while (Kokkos::atomic_compare_exchange(&locks_(i), 0, 1)) { - i += blockDim.x * blockDim.y * blockDim.z; - if (i >= num_states_) { - i = i_offset; - } - } - - return Random_XorShift1024(state_, p_(i), i); -#else - return Random_XorShift1024(state_, p_(0), 0); -#endif -} - -template <> -KOKKOS_INLINE_FUNCTION void Random_XorShift1024_Pool::free_state( - const Random_XorShift1024& state) const { - for (int i = 0; i < 16; i++) state_(state.state_idx_, i) = state.state_[i]; -#ifdef __CUDA_ARCH__ - locks_(state.state_idx_) = 0; - return; -#endif -} - -#endif - -#if defined(KOKKOS_ENABLE_ROCM) - -template <> -class Random_XorShift1024 { - private: - int p_; - const int state_idx_; - uint64_t* state_; - const int stride_; - friend class Random_XorShift1024_Pool; - - public: - typedef Kokkos::Experimental::ROCm device_type; - typedef Random_XorShift1024_Pool pool_type; - - enum { MAX_URAND = 0xffffffffU }; - enum { MAX_URAND64 = 0xffffffffffffffffULL - 1 }; - enum { MAX_RAND = static_cast(0xffffffffU / 2) }; - enum { MAX_RAND64 = static_cast(0xffffffffffffffffULL / 2 - 1) }; - - KOKKOS_INLINE_FUNCTION - Random_XorShift1024(const typename pool_type::state_data_type& state, int p, - int state_idx = 0) - : p_(p), - state_idx_(state_idx), - state_(&state(state_idx, 0)), - stride_(state.stride_1()) {} - - KOKKOS_INLINE_FUNCTION - uint32_t urand() { - uint64_t state_0 = state_[p_ * stride_]; - uint64_t state_1 = state_[(p_ = (p_ + 1) & 15) * stride_]; - state_1 ^= state_1 << 31; - state_1 ^= state_1 >> 11; - state_0 ^= state_0 >> 30; - uint64_t tmp = - (state_[p_ * stride_] = state_0 ^ state_1) * 1181783497276652981ULL; - tmp = tmp >> 16; - return static_cast(tmp & MAX_URAND); - } - - KOKKOS_INLINE_FUNCTION - uint64_t urand64() { - uint64_t state_0 = state_[p_ * stride_]; - uint64_t state_1 = state_[(p_ = (p_ + 1) & 15) * stride_]; - state_1 ^= state_1 << 31; - state_1 ^= state_1 >> 11; - state_0 ^= state_0 >> 30; - return ((state_[p_ * stride_] = state_0 ^ state_1) * - 1181783497276652981LL) - - 1; - } - - KOKKOS_INLINE_FUNCTION - uint32_t urand(const uint32_t& range) { - const uint32_t max_val = (MAX_URAND / range) * range; - uint32_t tmp = urand(); - while (tmp >= max_val) urand(); - return tmp % range; - } - - KOKKOS_INLINE_FUNCTION - uint32_t urand(const uint32_t& start, const uint32_t& end) { - return urand(end - start) + start; - } - - KOKKOS_INLINE_FUNCTION - uint64_t urand64(const uint64_t& range) { - const uint64_t max_val = (MAX_URAND64 / range) * range; - uint64_t tmp = urand64(); - while (tmp >= max_val) urand64(); - return tmp % range; - } - - KOKKOS_INLINE_FUNCTION - uint64_t urand64(const uint64_t& start, const uint64_t& end) { - return urand64(end - start) + start; - } - - KOKKOS_INLINE_FUNCTION - int rand() { return static_cast(urand() / 2); } - - KOKKOS_INLINE_FUNCTION - int rand(const int& range) { - const int max_val = (MAX_RAND / range) * range; - int tmp = rand(); - while (tmp >= max_val) rand(); - return tmp % range; - } - - KOKKOS_INLINE_FUNCTION - int rand(const int& start, const int& end) { - return rand(end - start) + start; - } - - KOKKOS_INLINE_FUNCTION - int64_t rand64() { return static_cast(urand64() / 2); } - - KOKKOS_INLINE_FUNCTION - int64_t rand64(const int64_t& range) { - const int64_t max_val = (MAX_RAND64 / range) * range; - int64_t tmp = rand64(); - while (tmp >= max_val) rand64(); - return tmp % range; - } - - KOKKOS_INLINE_FUNCTION - int64_t rand64(const int64_t& start, const int64_t& end) { - return rand64(end - start) + start; - } - - KOKKOS_INLINE_FUNCTION - float frand() { return 1.0f * urand64() / MAX_URAND64; } - - KOKKOS_INLINE_FUNCTION - float frand(const float& range) { return range * urand64() / MAX_URAND64; } - - KOKKOS_INLINE_FUNCTION - float frand(const float& start, const float& end) { - return frand(end - start) + start; - } - - KOKKOS_INLINE_FUNCTION - double drand() { return 1.0 * urand64() / MAX_URAND64; } - - KOKKOS_INLINE_FUNCTION - double drand(const double& range) { return range * urand64() / MAX_URAND64; } - - KOKKOS_INLINE_FUNCTION - double drand(const double& start, const double& end) { - return frand(end - start) + start; - } - - // Marsaglia polar method for drawing a standard normal distributed random - // number - KOKKOS_INLINE_FUNCTION - double normal() { - double S = 2.0; - double U; - while (S >= 1.0) { - U = 2.0 * drand() - 1.0; - const double V = 2.0 * drand() - 1.0; - S = U * U + V * V; - } - return U * std::sqrt(-2.0 * log(S) / S); - } - - KOKKOS_INLINE_FUNCTION - double normal(const double& mean, const double& std_dev = 1.0) { - return mean + normal() * std_dev; - } -}; - -template <> -inline Random_XorShift64_Pool< - Kokkos::Experimental::ROCm>::Random_XorShift64_Pool(uint64_t seed) { - num_states_ = 0; - init(seed, 4 * 32768); -} - -template <> -KOKKOS_INLINE_FUNCTION Random_XorShift64 -Random_XorShift64_Pool::get_state() const { -#ifdef __HCC_ACCELERATOR__ - const int i_offset = - (threadIdx_x * blockDim_y + threadIdx_y) * blockDim_z + threadIdx_z; - int i = (((blockIdx_x * gridDim_y + blockIdx_y) * gridDim_z + blockIdx_z) * - blockDim_x * blockDim_y * blockDim_z + - i_offset) % - num_states_; - while (Kokkos::atomic_compare_exchange(&locks_(i), 0, 1)) { - i += blockDim_x * blockDim_y * blockDim_z; - if (i >= num_states_) { - i = i_offset; - } - } - - return Random_XorShift64(state_(i), i); -#else - return Random_XorShift64(state_(0), 0); -#endif -} - -template <> -KOKKOS_INLINE_FUNCTION void -Random_XorShift64_Pool::free_state( - const Random_XorShift64& state) const { -#ifdef __HCC_ACCELERATOR__ - state_(state.state_idx_) = state.state_; - locks_(state.state_idx_) = 0; - return; -#endif -} - -template <> -inline Random_XorShift1024_Pool< - Kokkos::Experimental::ROCm>::Random_XorShift1024_Pool(uint64_t seed) { - num_states_ = 0; - init(seed, 4 * 32768); -} - -template <> -KOKKOS_INLINE_FUNCTION Random_XorShift1024 -Random_XorShift1024_Pool::get_state() const { -#ifdef __HCC_ACCELERATOR__ - const int i_offset = - (threadIdx_x * blockDim_y + threadIdx_y) * blockDim_z + threadIdx_z; - int i = (((blockIdx_x * gridDim_y + blockIdx_y) * gridDim_z + blockIdx_z) * - blockDim_x * blockDim_y * blockDim_z + - i_offset) % - num_states_; - while (Kokkos::atomic_compare_exchange(&locks_(i), 0, 1)) { - i += blockDim_x * blockDim_y * blockDim_z; - if (i >= num_states_) { - i = i_offset; - } - } - - return Random_XorShift1024(state_, p_(i), i); -#else - return Random_XorShift1024(state_, p_(0), 0); -#endif -} - -template <> -KOKKOS_INLINE_FUNCTION void -Random_XorShift1024_Pool::free_state( - const Random_XorShift1024& state) const { -#ifdef __HCC_ACCELERATOR__ - for (int i = 0; i < 16; i++) state_(state.state_idx_, i) = state.state_[i]; - locks_(state.state_idx_) = 0; - return; -#endif -} - -#endif - namespace Impl { template 0) - parallel_for((LDA + 127) / 128, + parallel_for("Kokkos::fill_random", (LDA + 127) / 128, Impl::fill_random_functor_range( a, g, range)); @@ -2055,7 +1730,7 @@ void fill_random(ViewType a, RandomPool g, typename ViewType::const_value_type end) { int64_t LDA = a.extent(0); if (LDA > 0) - parallel_for((LDA + 127) / 128, + parallel_for("Kokkos::fill_random", (LDA + 127) / 128, Impl::fill_random_functor_begin_end( a, g, begin, end)); diff --git a/algorithms/src/Kokkos_Sort.hpp b/algorithms/src/Kokkos_Sort.hpp index b7a988361f5..1c79a505bb9 100644 --- a/algorithms/src/Kokkos_Sort.hpp +++ b/algorithms/src/Kokkos_Sort.hpp @@ -201,7 +201,7 @@ class BinSort { bool sort_within_bins; public: - BinSort() {} + BinSort() = default; //---------------------------------------- // Constructor: takes the keys, the binning_operator and optionally whether to @@ -327,7 +327,7 @@ class BinSort { Kokkos::RangePolicy(0, len), functor); } - Kokkos::fence(); + execution_space().fence(); } template @@ -349,14 +349,14 @@ class BinSort { public: KOKKOS_INLINE_FUNCTION - void operator()(const bin_count_tag& tag, const int& i) const { + void operator()(const bin_count_tag& /*tag*/, const int i) const { const int j = range_begin + i; bin_count_atomic(bin_op.bin(keys, j))++; } KOKKOS_INLINE_FUNCTION - void operator()(const bin_offset_tag& tag, const int& i, value_type& offset, - const bool& final) const { + void operator()(const bin_offset_tag& /*tag*/, const int i, + value_type& offset, const bool& final) const { if (final) { bin_offsets(i) = offset; } @@ -364,7 +364,7 @@ class BinSort { } KOKKOS_INLINE_FUNCTION - void operator()(const bin_binning_tag& tag, const int& i) const { + void operator()(const bin_binning_tag& /*tag*/, const int i) const { const int j = range_begin + i; const int bin = bin_op.bin(keys, j); const int count = bin_count_atomic(bin)++; @@ -373,7 +373,7 @@ class BinSort { } KOKKOS_INLINE_FUNCTION - void operator()(const bin_sort_bins_tag& tag, const int& i) const { + void operator()(const bin_sort_bins_tag& /*tag*/, const int i) const { auto bin_size = bin_count_const(i); if (bin_size <= 1) return; int upper_bound = bin_offsets(i) + bin_size; @@ -381,7 +381,7 @@ class BinSort { while (!sorted) { sorted = true; int old_idx = sort_order(bin_offsets(i)); - int new_idx; + int new_idx = 0; for (int k = bin_offsets(i) + 1; k < upper_bound; k++) { new_idx = sort_order(k); @@ -446,7 +446,7 @@ struct BinOp3D { typename KeyViewType::non_const_value_type range_[3]; typename KeyViewType::non_const_value_type min_[3]; - BinOp3D() {} + BinOp3D() = default; BinOp3D(int max_bins__[], typename KeyViewType::const_value_type min[], typename KeyViewType::const_value_type max[]) { diff --git a/algorithms/unit_tests/CMakeLists.txt b/algorithms/unit_tests/CMakeLists.txt index 6fb08ce2edb..e3563a8b98b 100644 --- a/algorithms/unit_tests/CMakeLists.txt +++ b/algorithms/unit_tests/CMakeLists.txt @@ -20,16 +20,38 @@ KOKKOS_ADD_TEST_LIBRARY( HEADERS ${GTEST_SOURCE_DIR}/gtest/gtest.h SOURCES ${GTEST_SOURCE_DIR}/gtest/gtest-all.cc ) -KOKKOS_TARGET_COMPILE_DEFINITIONS(kokkosalgorithms_gtest PUBLIC "-DGTEST_HAS_PTHREAD=0") +# WORKAROUND FOR HIPCC +IF(Kokkos_ENABLE_HIP) + TARGET_COMPILE_DEFINITIONS(kokkosalgorithms_gtest PUBLIC "-DGTEST_HAS_PTHREAD=0 --amdgpu-target=gfx906") +ELSE() + TARGET_COMPILE_DEFINITIONS(kokkosalgorithms_gtest PUBLIC "-DGTEST_HAS_PTHREAD=0") +ENDIF() + +TARGET_COMPILE_FEATURES(kokkosalgorithms_gtest PUBLIC cxx_std_11) SET(SOURCES - UnitTestMain.cpp - TestCuda.cpp - ) + UnitTestMain.cpp +) IF(Kokkos_ENABLE_OPENMP) LIST( APPEND SOURCES TestOpenMP.cpp + TestOpenMP_Sort1D.cpp + TestOpenMP_Sort3D.cpp + TestOpenMP_SortDynamicView.cpp + TestOpenMP_Random.cpp + ) +ENDIF() + +IF(Kokkos_ENABLE_HIP) + LIST( APPEND SOURCES + TestHIP.cpp + ) +ENDIF() + +IF(Kokkos_ENABLE_CUDA) + LIST( APPEND SOURCES + TestCuda.cpp ) ENDIF() diff --git a/algorithms/unit_tests/Makefile b/algorithms/unit_tests/Makefile index 3c862d03dc9..4a192b08ec8 100644 --- a/algorithms/unit_tests/Makefile +++ b/algorithms/unit_tests/Makefile @@ -44,7 +44,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1) endif ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) - OBJ_OPENMP = TestOpenMP.o UnitTestMain.o gtest-all.o + OBJ_OPENMP = TestOpenMP.o TestOpenMP_Random.o TestOpenMP_Sort1D.o TestOpenMP_Sort3D.o TestOpenMP_SortDynamicView.o UnitTestMain.o gtest-all.o TARGETS += KokkosAlgorithms_UnitTest_OpenMP TEST_TARGETS += test-openmp endif diff --git a/algorithms/unit_tests/TestCuda.cpp b/algorithms/unit_tests/TestCuda.cpp index ab727b0326d..86cee61f64f 100644 --- a/algorithms/unit_tests/TestCuda.cpp +++ b/algorithms/unit_tests/TestCuda.cpp @@ -59,11 +59,15 @@ namespace Test { void cuda_test_random_xorshift64(int num_draws) { - Impl::test_random >(num_draws); + Impl::test_random>(num_draws); + Impl::test_random>>(num_draws); } void cuda_test_random_xorshift1024(int num_draws) { - Impl::test_random >(num_draws); + Impl::test_random>(num_draws); + Impl::test_random>>(num_draws); } #define CUDA_RANDOM_XORSHIFT64(num_draws) \ diff --git a/algorithms/unit_tests/TestHIP.cpp b/algorithms/unit_tests/TestHIP.cpp new file mode 100644 index 00000000000..5e5ccb6a2eb --- /dev/null +++ b/algorithms/unit_tests/TestHIP.cpp @@ -0,0 +1,83 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#ifdef KOKKOS_ENABLE_HIP + +#include +#include +#include + +#include + +#include + +#include +#include + +namespace Test { + +void hip_test_random_xorshift64(size_t num_draws) { + Impl::test_random>( + num_draws); + Impl::test_random>>(num_draws); +} + +void hip_test_random_xorshift1024(size_t num_draws) { + Impl::test_random< + Kokkos::Random_XorShift1024_Pool>(num_draws); + Impl::test_random>>(num_draws); +} + +TEST(hip, Random_XorShift64) { hip_test_random_xorshift64(132141141); } +TEST(hip, Random_XorShift1024_0) { hip_test_random_xorshift1024(52428813); } +TEST(hip, SortUnsigned) { + Impl::test_sort(171); +} +} // namespace Test +#else +void KOKKOS_ALGORITHMS_UNITTESTS_TESTHIP_PREVENT_LINK_ERROR() {} +#endif /* #ifdef KOKKOS_ENABLE_HIP */ diff --git a/algorithms/unit_tests/TestOpenMP.cpp b/algorithms/unit_tests/TestOpenMP.cpp index 3a9e306014b..5ded3ce3906 100644 --- a/algorithms/unit_tests/TestOpenMP.cpp +++ b/algorithms/unit_tests/TestOpenMP.cpp @@ -55,30 +55,8 @@ namespace Test { -#define OPENMP_RANDOM_XORSHIFT64(num_draws) \ - TEST(openmp, Random_XorShift64) { \ - Impl::test_random >( \ - num_draws); \ - } +TEST(openmp, SortIssue1160) { Impl::test_issue_1160_sort(); } -#define OPENMP_RANDOM_XORSHIFT1024(num_draws) \ - TEST(openmp, Random_XorShift1024) { \ - Impl::test_random >( \ - num_draws); \ - } - -#define OPENMP_SORT_UNSIGNED(size) \ - TEST(openmp, SortUnsigned) { \ - Impl::test_sort(size); \ - } - -OPENMP_RANDOM_XORSHIFT64(10240000) -OPENMP_RANDOM_XORSHIFT1024(10130144) -OPENMP_SORT_UNSIGNED(171) - -#undef OPENMP_RANDOM_XORSHIFT64 -#undef OPENMP_RANDOM_XORSHIFT1024 -#undef OPENMP_SORT_UNSIGNED } // namespace Test #else void KOKKOS_ALGORITHMS_UNITTESTS_TESTOPENMP_PREVENT_LINK_ERROR() {} diff --git a/algorithms/unit_tests/TestOpenMP_Random.cpp b/algorithms/unit_tests/TestOpenMP_Random.cpp new file mode 100644 index 00000000000..1ca8e0a828f --- /dev/null +++ b/algorithms/unit_tests/TestOpenMP_Random.cpp @@ -0,0 +1,77 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#ifdef KOKKOS_ENABLE_OPENMP + +#include +#include + +//---------------------------------------------------------------------------- +#include +#include + +namespace Test { + +#define OPENMP_RANDOM_XORSHIFT64(num_draws) \ + TEST(openmp, Random_XorShift64) { \ + Impl::test_random >( \ + num_draws); \ + } + +#define OPENMP_RANDOM_XORSHIFT1024(num_draws) \ + TEST(openmp, Random_XorShift1024) { \ + Impl::test_random >( \ + num_draws); \ + } + +OPENMP_RANDOM_XORSHIFT64(10240000) +OPENMP_RANDOM_XORSHIFT1024(10130144) + +#undef OPENMP_RANDOM_XORSHIFT64 +#undef OPENMP_RANDOM_XORSHIFT1024 +} // namespace Test +#else +void KOKKOS_ALGORITHMS_UNITTESTS_TESTOPENMP_PREVENT_LINK_ERROR() {} +#endif diff --git a/algorithms/unit_tests/TestOpenMP_Sort1D.cpp b/algorithms/unit_tests/TestOpenMP_Sort1D.cpp new file mode 100644 index 00000000000..a9b2010ad02 --- /dev/null +++ b/algorithms/unit_tests/TestOpenMP_Sort1D.cpp @@ -0,0 +1,65 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#ifdef KOKKOS_ENABLE_OPENMP + +#include +#include + +//---------------------------------------------------------------------------- +#include +#include +#include + +namespace Test { + +TEST(openmp, SortUnsigned1D) { + Impl::test_1D_sort(171); +} + +} // namespace Test +#else +void KOKKOS_ALGORITHMS_UNITTESTS_TESTOPENMP_PREVENT_LINK_ERROR() {} +#endif diff --git a/algorithms/unit_tests/TestOpenMP_Sort3D.cpp b/algorithms/unit_tests/TestOpenMP_Sort3D.cpp new file mode 100644 index 00000000000..127d911d7ca --- /dev/null +++ b/algorithms/unit_tests/TestOpenMP_Sort3D.cpp @@ -0,0 +1,65 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#ifdef KOKKOS_ENABLE_OPENMP + +#include +#include + +//---------------------------------------------------------------------------- +#include +#include +#include + +namespace Test { + +TEST(openmp, SortUnsigned3D) { + Impl::test_3D_sort(171); +} + +} // namespace Test +#else +void KOKKOS_ALGORITHMS_UNITTESTS_TESTOPENMP_PREVENT_LINK_ERROR() {} +#endif diff --git a/algorithms/unit_tests/TestOpenMP_SortDynamicView.cpp b/algorithms/unit_tests/TestOpenMP_SortDynamicView.cpp new file mode 100644 index 00000000000..3dc88540443 --- /dev/null +++ b/algorithms/unit_tests/TestOpenMP_SortDynamicView.cpp @@ -0,0 +1,65 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#ifdef KOKKOS_ENABLE_OPENMP + +#include +#include + +//---------------------------------------------------------------------------- +#include +#include +#include + +namespace Test { + +TEST(openmp, SortUnsignedDynamicView) { + Impl::test_dynamic_view_sort(171); +} + +} // namespace Test +#else +void KOKKOS_ALGORITHMS_UNITTESTS_TESTOPENMP_PREVENT_LINK_ERROR() {} +#endif diff --git a/algorithms/unit_tests/TestRandom.hpp b/algorithms/unit_tests/TestRandom.hpp index bc55ebfad35..10a496242b6 100644 --- a/algorithms/unit_tests/TestRandom.hpp +++ b/algorithms/unit_tests/TestRandom.hpp @@ -140,7 +140,7 @@ struct test_random_functor { density_3d(d3d) {} KOKKOS_INLINE_FUNCTION - void operator()(int i, RandomProperties& prop) const { + void operator()(int /*i*/, RandomProperties& prop) const { using Kokkos::atomic_fetch_add; rnd_type rand_gen = rand_pool.get_state(); diff --git a/algorithms/unit_tests/TestSort.hpp b/algorithms/unit_tests/TestSort.hpp index 310a93c93d1..b6ff91c25fa 100644 --- a/algorithms/unit_tests/TestSort.hpp +++ b/algorithms/unit_tests/TestSort.hpp @@ -130,7 +130,7 @@ struct sum3D { }; template -void test_1D_sort(unsigned int n, bool force_kokkos) { +void test_1D_sort_impl(unsigned int n, bool force_kokkos) { typedef Kokkos::View KeyViewType; KeyViewType keys("Keys", n); @@ -165,7 +165,7 @@ void test_1D_sort(unsigned int n, bool force_kokkos) { } template -void test_3D_sort(unsigned int n) { +void test_3D_sort_impl(unsigned int n) { typedef Kokkos::View KeyViewType; KeyViewType keys("Keys", n * n * n); @@ -214,7 +214,7 @@ void test_3D_sort(unsigned int n) { //---------------------------------------------------------------------------- template -void test_dynamic_view_sort(unsigned int n) { +void test_dynamic_view_sort_impl(unsigned int n) { typedef Kokkos::Experimental::DynamicView KeyDynamicViewType; typedef Kokkos::View KeyViewType; @@ -278,7 +278,7 @@ void test_dynamic_view_sort(unsigned int n) { //---------------------------------------------------------------------------- template -void test_issue_1160() { +void test_issue_1160_impl() { Kokkos::View element_("element", 10); Kokkos::View x_("x", 10); Kokkos::View v_("y", 10); @@ -345,17 +345,34 @@ void test_issue_1160() { //---------------------------------------------------------------------------- +template +void test_1D_sort(unsigned int N) { + test_1D_sort_impl(N * N * N, true); + test_1D_sort_impl(N * N * N, false); +} + +template +void test_3D_sort(unsigned int N) { + test_3D_sort_impl(N); +} + +template +void test_dynamic_view_sort(unsigned int N) { + test_dynamic_view_sort_impl(N * N); +} + +template +void test_issue_1160_sort() { + test_issue_1160_impl(); +} + template void test_sort(unsigned int N) { - test_1D_sort(N * N * N, true); - test_1D_sort(N * N * N, false); -#if !defined(KOKKOS_ENABLE_ROCM) + test_1D_sort(N); test_3D_sort(N); - test_dynamic_view_sort(N * N); -#endif - test_issue_1160(); + test_dynamic_view_sort(N); + test_issue_1160_sort(); } - } // namespace Impl } // namespace Test #endif /* KOKKOS_ALGORITHMS_UNITTESTS_TESTSORT_HPP */ diff --git a/appveyor.yml b/appveyor.yml new file mode 100644 index 00000000000..8f139ba6ab1 --- /dev/null +++ b/appveyor.yml @@ -0,0 +1,10 @@ +image: + - Visual Studio 2019 +clone_folder: c:\projects\source +build_script: +- cmd: >- + mkdir build && + cd build && + cmake c:\projects\source -DKokkos_ENABLE_TESTS=ON -DKokkos_ENABLE_LIBDL=OFF -DKokkos_ENABLE_PROFILING=OFF && + cmake --build . --target install && + ctest -C Debug -V diff --git a/benchmarks/gups/gups-kokkos.cc b/benchmarks/gups/gups-kokkos.cc index 9ac59be4a6b..36fc36925b9 100644 --- a/benchmarks/gups/gups-kokkos.cc +++ b/benchmarks/gups/gups-kokkos.cc @@ -61,7 +61,7 @@ typedef int GUPSIndex; double now() { struct timeval now; - gettimeofday(&now, NULL); + gettimeofday(&now, nullptr); return (double) now.tv_sec + ((double) now.tv_usec * 1.0e-6); } diff --git a/benchmarks/policy_performance/script_sample_usage.sh b/benchmarks/policy_performance/script_sample_usage.sh index f4bfb87f8fe..1c2db56648c 100755 --- a/benchmarks/policy_performance/script_sample_usage.sh +++ b/benchmarks/policy_performance/script_sample_usage.sh @@ -2,7 +2,7 @@ # Sample script for benchmarking policy performance -# Suggested enviroment variables to export prior to executing script: +# Suggested environment variables to export prior to executing script: # KNL: # OMP_NUM_THREADS=256 KMP_AFFINITY=compact # Power: diff --git a/benchmarks/stream/stream-kokkos.cc b/benchmarks/stream/stream-kokkos.cc index 6ce789dd820..8d604079d48 100644 --- a/benchmarks/stream/stream-kokkos.cc +++ b/benchmarks/stream/stream-kokkos.cc @@ -64,7 +64,7 @@ typedef int StreamIndex; double now() { struct timeval now; - gettimeofday(&now, NULL); + gettimeofday(&now, nullptr); return (double) now.tv_sec + ((double) now.tv_usec * 1.0e-6); } diff --git a/bin/hpcbind b/bin/hpcbind index b185a928217..6af091a7d8b 100755 --- a/bin/hpcbind +++ b/bin/hpcbind @@ -383,7 +383,7 @@ fi # Check unknown arguments ################################################################################ if [[ ${#UNKNOWN_ARGS[*]} > 0 ]]; then - echo "HPCBIND Uknown options: ${UNKNOWN_ARGS[*]}" > >(tee -a ${HPCBIND_LOG}) + echo "HPCBIND Unknown options: ${UNKNOWN_ARGS[*]}" > >(tee -a ${HPCBIND_LOG}) exit 1 fi diff --git a/cm_generate_makefile.bash b/cm_generate_makefile.bash deleted file mode 100755 index fd7cfe2d325..00000000000 --- a/cm_generate_makefile.bash +++ /dev/null @@ -1,339 +0,0 @@ -#!/bin/bash - -update_kokkos_devices() { - SEARCH_TEXT="*$1*" - if [[ $KOKKOS_DEVICES == $SEARCH_TEXT ]]; then - echo kokkos devices already includes $SEARCH_TEXT - else - if [ "$KOKKOS_DEVICES" = "" ]; then - KOKKOS_DEVICES="$1" - echo reseting kokkos devices to $KOKKOS_DEVICES - else - KOKKOS_DEVICES="${KOKKOS_DEVICES},$1" - echo appending to kokkos devices $KOKKOS_DEVICES - fi - fi -} - -get_kokkos_device_list() { - KOKKOS_DEVICE_CMD= - PARSE_DEVICES_LST=$(echo $KOKKOS_DEVICES | tr "," "\n") - for DEVICE_ in $PARSE_DEVICES_LST - do - UC_DEVICE=$(echo $DEVICE_ | tr "[:lower:]" "[:upper:]") - KOKKOS_DEVICE_CMD="-DKokkos_ENABLE_${UC_DEVICE}=ON ${KOKKOS_DEVICE_CMD}" - done -} - -get_kokkos_arch_list() { - KOKKOS_ARCH_CMD= - PARSE_ARCH_LST=$(echo $KOKKOS_ARCH | tr "," "\n") - for ARCH_ in $PARSE_ARCH_LST - do - UC_ARCH=$(echo $ARCH_ | tr "[:lower:]" "[:upper:]") - KOKKOS_ARCH_CMD="-DKokkos_ARCH_${UC_ARCH}=ON ${KOKKOS_ARCH_CMD}" - done -} - -get_kokkos_cuda_option_list() { - echo parsing KOKKOS_CUDA_OPTIONS=$KOKKOS_CUDA_OPTIONS - KOKKOS_CUDA_OPTION_CMD= - PARSE_CUDA_LST=$(echo $KOKKOS_CUDA_OPTIONS | tr "," "\n") - for CUDA_ in $PARSE_CUDA_LST - do - CUDA_OPT_NAME= - if [ "${CUDA_}" == "enable_lambda" ]; then - CUDA_OPT_NAME=CUDA_LAMBDA - elif [ "${CUDA_}" == "rdc" ]; then - CUDA_OPT_NAME=CUDA_RELOCATABLE_DEVICE_CODE - elif [ "${CUDA_}" == "force_uvm" ]; then - CUDA_OPT_NAME=CUDA_UVM - elif [ "${CUDA_}" == "use_ldg" ]; then - CUDA_OPT_NAME=CUDA_LDG_INTRINSIC - else - echo "${CUDA_} is not a valid cuda options..." - fi - if [ "${CUDA_OPT_NAME}" != "" ]; then - KOKKOS_CUDA_OPTION_CMD="-DKokkos_ENABLE_${CUDA_OPT_NAME}=ON ${KOKKOS_CUDA_OPTION_CMD}" - fi - done -} - -get_kokkos_option_list() { - echo parsing KOKKOS_OPTIONS=$KOKKOS_OPTIONS - KOKKOS_OPTION_CMD= - PARSE_OPTIONS_LST=$(echo $KOKKOS_OPTIONS | tr "," "\n") - for OPT_ in $PARSE_OPTIONS_LST - do - UC_OPT_=$(echo $OPT_ | tr "[:lower:]" "[:upper:]") - if [[ "$UC_OPT_" == *DISABLE* ]]; then - FLIP_OPT_=${UC_OPT_/DISABLE/ENABLE} - KOKKOS_OPTION_CMD="-DKokkos_${FLIP_OPT_}=OFF ${KOKKOS_OPTION_CMD}" - elif [[ "$UC_OPT_" == *ENABLE* ]]; then - KOKKOS_OPTION_CMD="-DKokkos_${UC_OPT_}=ON ${KOKKOS_OPTION_CMD}" - else - KOKKOS_OPTION_CMD="-DKokkos_ENABLE_${UC_OPT_}=ON ${KOKKOS_OPTION_CMD}" - fi - done -} - -display_help_text() { - - echo "Kokkos configure options:" - echo "" - echo "--kokkos-path=/Path/To/Kokkos: Path to the Kokkos root directory." - echo "--prefix=/Install/Path: Path to install the Kokkos library." - echo "" - echo "--with-cuda[=/Path/To/Cuda]: Enable Cuda and set path to Cuda Toolkit." - echo "--with-openmp: Enable OpenMP backend." - echo "--with-pthread: Enable Pthreads backend." - echo "--with-serial: Enable Serial backend." - echo "--with-devices: Explicitly add a set of backends." - echo "" - echo "--arch=[OPT]: Set target architectures. Options are:" - echo " [AMD]" - echo " AMDAVX = AMD CPU" - echo " EPYC = AMD EPYC Zen-Core CPU" - echo " [ARM]" - echo " ARMv80 = ARMv8.0 Compatible CPU" - echo " ARMv81 = ARMv8.1 Compatible CPU" - echo " ARMv8-ThunderX = ARMv8 Cavium ThunderX CPU" - echo " ARMv8-TX2 = ARMv8 Cavium ThunderX2 CPU" - echo " [IBM]" - echo " BGQ = IBM Blue Gene Q" - echo " Power7 = IBM POWER7 and POWER7+ CPUs" - echo " Power8 = IBM POWER8 CPUs" - echo " Power9 = IBM POWER9 CPUs" - echo " [Intel]" - echo " WSM = Intel Westmere CPUs" - echo " SNB = Intel Sandy/Ivy Bridge CPUs" - echo " HSW = Intel Haswell CPUs" - echo " BDW = Intel Broadwell Xeon E-class CPUs" - echo " SKX = Intel Sky Lake Xeon E-class HPC CPUs (AVX512)" - echo " [Intel Xeon Phi]" - echo " KNC = Intel Knights Corner Xeon Phi" - echo " KNL = Intel Knights Landing Xeon Phi" - echo " [NVIDIA]" - echo " Kepler30 = NVIDIA Kepler generation CC 3.0" - echo " Kepler32 = NVIDIA Kepler generation CC 3.2" - echo " Kepler35 = NVIDIA Kepler generation CC 3.5" - echo " Kepler37 = NVIDIA Kepler generation CC 3.7" - echo " Maxwell50 = NVIDIA Maxwell generation CC 5.0" - echo " Maxwell52 = NVIDIA Maxwell generation CC 5.2" - echo " Maxwell53 = NVIDIA Maxwell generation CC 5.3" - echo " Pascal60 = NVIDIA Pascal generation CC 6.0" - echo " Pascal61 = NVIDIA Pascal generation CC 6.1" - echo " Volta70 = NVIDIA Volta generation CC 7.0" - echo " Volta72 = NVIDIA Volta generation CC 7.2" - echo "" - echo "--compiler=/Path/To/Compiler Set the compiler." - echo "--debug,-dbg: Enable Debugging." - echo "--cxxflags=[FLAGS] Overwrite CXXFLAGS for library build and test" - echo " build. This will still set certain required" - echo " flags via KOKKOS_CXXFLAGS (such as -fopenmp," - echo " --std=c++11, etc.)." - echo "--cxxstandard=[FLAGS] Overwrite KOKKOS_CXX_STANDARD for library build and test" - echo " c++11 (default), c++14, c++17, c++1y, c++1z, c++2a" - echo "--ldflags=[FLAGS] Overwrite LDFLAGS for library build and test" - echo " build. This will still set certain required" - echo " flags via KOKKOS_LDFLAGS (such as -fopenmp," - echo " -lpthread, etc.)." - echo "--with-gtest=/Path/To/Gtest: Set path to gtest. (Used in unit and performance" - echo " tests.)" - echo "--with-hwloc=/Path/To/Hwloc: Set path to hwloc library." - echo "--with-memkind=/Path/To/MemKind: Set path to memkind library." - echo "--with-options=[OPT]: Additional options to Kokkos:" - echo " compiler_warnings" - echo " aggressive_vectorization = add ivdep on loops" - echo " disable_profiling = do not compile with profiling hooks" - echo " " - echo "--with-cuda-options=[OPT]: Additional options to CUDA:" - echo " force_uvm, use_ldg, enable_lambda, rdc" - echo "--with-hpx-options=[OPT]: Additional options to HPX:" - echo " enable_async_dispatch" - echo "--gcc-toolchain=/Path/To/GccRoot: Set the gcc toolchain to use with clang (e.g. /usr)" - echo "--make-j=[NUM]: DEPRECATED: call make with appropriate" - echo " -j flag" - -} - -while [[ $# > 0 ]] -do - key="$1" - - case $key in - --kokkos-path*) - KOKKOS_PATH="${key#*=}" - ;; - --hpx-path*) - HPX_PATH="${key#*=}" - ;; - --prefix*) - PREFIX="${key#*=}" - ;; - --with-cuda) - update_kokkos_devices Cuda - CUDA_PATH_NVCC=$(command -v nvcc) - CUDA_PATH=${CUDA_PATH_NVCC%/bin/nvcc} - ;; - # Catch this before '--with-cuda*' - --with-cuda-options*) - KOKKOS_CUDA_OPTIONS="${key#*=}" - ;; - --with-cuda*) - update_kokkos_devices Cuda - CUDA_PATH="${key#*=}" - ;; - --with-openmp) - update_kokkos_devices OpenMP - ;; - --with-pthread) - update_kokkos_devices Pthread - ;; - --with-serial) - update_kokkos_devices Serial - ;; - --with-hpx-options*) - KOKKOS_HPX_OPT="${key#*=}" - ;; - --with-hpx*) - update_kokkos_devices HPX - if [ -z "$HPX_PATH" ]; then - HPX_PATH="${key#*=}" - fi - ;; - --with-devices*) - DEVICES="${key#*=}" - PARSE_DEVICES=$(echo $DEVICES | tr "," "\n") - for DEVICE_ in $PARSE_DEVICES - do - update_kokkos_devices $DEVICE_ - done - ;; - --with-gtest*) - GTEST_PATH="${key#*=}" - ;; - --with-hwloc*) - HWLOC_PATH="${key#*=}" - ;; - --with-memkind*) - MEMKIND_PATH="${key#*=}" - ;; - --arch*) - KOKKOS_ARCH="${key#*=}" - ;; - --cxxflags*) - KOKKOS_CXXFLAGS="${key#*=}" - KOKKOS_CXXFLAGS=${KOKKOS_CXXFLAGS//,/ } - ;; - --cxxstandard*) - KOKKOS_CXX_STANDARD="${key#*=}" - ;; - --ldflags*) - KOKKOS_LDFLAGS="${key#*=}" - ;; - --debug|-dbg) - KOKKOS_DEBUG=yes - ;; - --make-j*) - echo "Warning: ${key} is deprecated" - echo "Call make with appropriate -j flag" - ;; - --compiler*) - COMPILER="${key#*=}" - CNUM=$(command -v ${COMPILER} 2>&1 >/dev/null | grep "no ${COMPILER}" | wc -l) - if [ ${CNUM} -gt 0 ]; then - echo "Invalid compiler by --compiler command: '${COMPILER}'" - exit - fi - if [[ ! -n ${COMPILER} ]]; then - echo "Empty compiler specified by --compiler command." - exit - fi - CNUM=$(command -v ${COMPILER} | grep ${COMPILER} | wc -l) - if [ ${CNUM} -eq 0 ]; then - echo "Invalid compiler by --compiler command: '${COMPILER}'" - exit - fi - # ... valid compiler, ensure absolute path set - WCOMPATH=$(command -v $COMPILER) - COMPDIR=$(dirname $WCOMPATH) - COMPNAME=$(basename $WCOMPATH) - COMPILER=${COMPDIR}/${COMPNAME} - ;; - --with-options*) - KOKKOS_OPTIONS="${key#*=}" - ;; - --gcc-toolchain*) - KOKKOS_GCC_TOOLCHAIN="${key#*=}" - ;; - --help) - display_help_text - exit 0 - ;; - *) - echo "warning: ignoring unknown option $key" - ;; - esac - - shift -done - - -if [ "$COMPILER" == "" ]; then - COMPILER_CMD= -else - COMPILER_CMD=-DCMAKE_CXX_COMPILER=$COMPILER -fi - -if [ "$KOKKOS_DEBUG" == "" ]; then - KOKKOS_DEBUG_CMD=-DCMAKE_BUILD_TYPE=RELEASE -else - KOKKOS_DEBUG_CMD=-DCMAKE_BUILD_TYPE=DEBUG -fi - -if [ ! -e ${KOKKOS_PATH}/CMakeLists.txt ]; then - if [ "${KOKKOS_PATH}" == "" ]; then - CM_SCRIPT=$0 - KOKKOS_PATH=`dirname $CM_SCRIPT` - if [ ! -e ${KOKKOS_PATH}/CMakeLists.txt ]; then - echo "${KOKKOS_PATH} repository appears to not be complete. please verify and try again" - exit 0 - fi - else - echo "KOKKOS_PATH does not appear to be set properly. please specify in location of CMakeLists.txt" - display_help_text - exit 0 - fi -fi - -get_kokkos_device_list -get_kokkos_option_list -get_kokkos_arch_list -get_kokkos_cuda_option_list - -## if HPX is enabled, we need to enforce cxx standard = 14 -if [[ ${KOKKOS_DEVICE_CMD} == *Kokkos_ENABLE_HPX* ]]; then - if [ "${KOKKOS_CXX_STANDARD}" == "" ] || [ ${#KOKKOS_CXX_STANDARD} -lt 14 ]; then - echo CXX Standard must be 14 or higher for HPX to work. - KOKKOS_CXX_STANDARD=14 - fi -fi - -if [ "$KOKKOS_CXX_STANDARD" == "" ]; then - STANDARD_CMD= -else - STANDARD_CMD=-DKokkos_CXX_STANDARD=${KOKKOS_CXX_STANDARD} -fi - -if [[ ${COMPILER} == *clang* ]]; then - gcc_path=$(which g++ | awk --field-separator='/bin/g++' '{printf $1}' ) - KOKKOS_CXXFLAGS="${KOKKOS_CXXFLAGS} --gcc-toolchain=${gcc_path}" - - if [ ! "${CUDA_PATH}" == "" ]; then - KOKKOS_CXXFLAGS="${KOKKOS_CXXFLAGS} --cuda-path=${CUDA_PATH}" - fi -fi - -echo cmake $COMPILER_CMD -DCMAKE_CXX_FLAGS="${KOKKOS_CXXFLAGS}" -DCMAKE_EXE_LINKER_FLAGS="${KOKKOS_LDFLAGS}" -DCMAKE_INSTALL_PREFIX=${PREFIX} ${KOKKOS_DEVICE_CMD} ${KOKKOS_ARCH_CMD} -DKokkos_ENABLE_TESTS=ON ${KOKKOS_OPTION_CMD} ${KOKKOS_CUDA_OPTION_CMD} -DCMAKE_VERBOSE_MAKEFILE=ON -DCMAKE_CXX_EXTENSIONS=OFF ${STANDARD_CMD} ${KOKKOS_DEBUG_CMD} ${KOKKOS_PATH} -cmake $COMPILER_CMD -DCMAKE_CXX_FLAGS="${KOKKOS_CXXFLAGS//\"}" -DCMAKE_EXE_LINKER_FLAGS="${KOKKOS_LDFLAGS//\"}" -DCMAKE_INSTALL_PREFIX=${PREFIX} ${KOKKOS_DEVICE_CMD} ${KOKKOS_ARCH_CMD} -DKokkos_ENABLE_TESTS=ON ${KOKKOS_OPTION_CMD} ${KOKKOS_CUDA_OPTION_CMD} -DCMAKE_VERBOSE_MAKEFILE=ON -DCMAKE_CXX_EXTENSIONS=OFF ${STANDARD_CMD} ${KOKKOS_DEBUG_CMD} ${KOKKOS_PATH} diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake index 8c51eab4d78..c0be9f56411 100644 --- a/cmake/Dependencies.cmake +++ b/cmake/Dependencies.cmake @@ -6,5 +6,4 @@ TRIBITS_PACKAGE_DEFINE_DEPENDENCIES( Core core PS REQUIRED Containers containers PS OPTIONAL Algorithms algorithms PS OPTIONAL - Example example EX OPTIONAL ) diff --git a/cmake/KokkosConfigCommon.cmake.in b/cmake/KokkosConfigCommon.cmake.in index da9c61976c5..8c663d01c1e 100644 --- a/cmake/KokkosConfigCommon.cmake.in +++ b/cmake/KokkosConfigCommon.cmake.in @@ -73,6 +73,9 @@ function(kokkos_check) # use it to check that there are variables defined for all required # arguments. Success or failure messages will be displayed but we are # responsible for signaling failure and skip the build system generation. + if (KOKKOS_CHECK_RETURN_VALUE) + set(Kokkos_${arg}_FIND_QUIETLY ON) + endif() find_package_handle_standard_args("Kokkos_${arg}" DEFAULT_MSG ${KOKKOS_CHECK_${arg}}) if(NOT Kokkos_${arg}_FOUND) diff --git a/cmake/KokkosCore_config.h.in b/cmake/KokkosCore_config.h.in index 084afba8a80..1d0b58fe02d 100644 --- a/cmake/KokkosCore_config.h.in +++ b/cmake/KokkosCore_config.h.in @@ -5,11 +5,19 @@ #define KOKKOS_CORE_CONFIG_H #endif +// KOKKOS_VERSION % 100 is the patch level +// KOKKOS_VERSION / 100 % 100 is the minor version +// KOKKOS_VERSION / 10000 is the major version +#cmakedefine KOKKOS_VERSION @KOKKOS_VERSION@ + + /* Execution Spaces */ #cmakedefine KOKKOS_ENABLE_SERIAL #cmakedefine KOKKOS_ENABLE_OPENMP +#cmakedefine KOKKOS_ENABLE_OPENMPTARGET #cmakedefine KOKKOS_ENABLE_THREADS #cmakedefine KOKKOS_ENABLE_CUDA +#cmakedefine KOKKOS_ENABLE_HIP #cmakedefine KOKKOS_ENABLE_HPX #cmakedefine KOKKOS_ENABLE_MEMKIND #cmakedefine KOKKOS_ENABLE_LIBRT @@ -33,6 +41,7 @@ #cmakedefine KOKKOS_ENABLE_CUDA_LAMBDA #cmakedefine KOKKOS_ENABLE_CUDA_CONSTEXPR #cmakedefine KOKKOS_ENABLE_CUDA_LDG_INTRINSIC +#cmakedefine KOKKOS_ENABLE_HIP_RELOCATABLE_DEVICE_CODE #cmakedefine KOKKOS_ENABLE_HPX_ASYNC_DISPATCH #cmakedefine KOKKOS_ENABLE_DEBUG #cmakedefine KOKKOS_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK diff --git a/cmake/KokkosTrilinosConfig.cmake.in b/cmake/KokkosTrilinosConfig.cmake.in new file mode 100644 index 00000000000..626ef5a8ebe --- /dev/null +++ b/cmake/KokkosTrilinosConfig.cmake.in @@ -0,0 +1,17 @@ +IF (NOT TARGET Kokkos::kokkos) + # Compute the installation prefix relative to this file. + get_filename_component(KOKKOS_IMPORT_PREFIX "${CMAKE_CURRENT_LIST_FILE}" PATH) + get_filename_component(KOKKOS_IMPORT_PREFIX "${KOKKOS_IMPORT_PREFIX}" PATH) + get_filename_component(KOKKOS_IMPORT_PREFIX "${KOKKOS_IMPORT_PREFIX}" PATH) + get_filename_component(KOKKOS_IMPORT_PREFIX "${KOKKOS_IMPORT_PREFIX}" PATH) + if(KOKKOS_IMPORT_PREFIX STREQUAL "/") + set(KOKKOS_IMPORT_PREFIX "") + endif() + add_library(Kokkos::kokkos INTERFACE IMPORTED) + set_target_properties(Kokkos::kokkos PROPERTIES + INTERFACE_LINK_LIBRARIES "@Kokkos_LIBRARIES@;@KOKKOS_LINK_OPTIONS@" + INTERFACE_COMPILE_FEATURES "@KOKKOS_CXX_STANDARD_FEATURE@" + INTERFACE_COMPILE_OPTIONS "@KOKKOS_ALL_COMPILE_OPTIONS@" + INTERFACE_INCLUDE_DIRECTORIES "${KOKKOS_IMPORT_PREFIX}/include" + ) +ENDIF() diff --git a/cmake/Modules/FindTPLCUDA.cmake b/cmake/Modules/FindTPLCUDA.cmake index 36aefcdb44f..1b36ab819f0 100644 --- a/cmake/Modules/FindTPLCUDA.cmake +++ b/cmake/Modules/FindTPLCUDA.cmake @@ -1,8 +1,12 @@ IF (KOKKOS_CXX_COMPILER_ID STREQUAL Clang) + # Note: "stubs" suffix allows CMake to find the dummy + # libcuda.so provided by the NVIDIA CUDA Toolkit for + # cross-compiling CUDA on a host without a GPU. KOKKOS_FIND_IMPORTED(CUDA INTERFACE LIBRARIES cudart cuda - LIBRARY_PATHS ENV LD_LIBRARY_PATH ENV CUDA_PATH + LIBRARY_PATHS ENV LD_LIBRARY_PATH ENV CUDA_PATH /usr/local/cuda + LIBRARY_SUFFIXES lib lib64 lib/stubs lib64/stubs ALLOW_SYSTEM_PATH_FALLBACK ) ELSE() diff --git a/cmake/Modules/FindTPLPTHREAD.cmake b/cmake/Modules/FindTPLPTHREAD.cmake index b4b8c34122a..1d154e29aff 100644 --- a/cmake/Modules/FindTPLPTHREAD.cmake +++ b/cmake/Modules/FindTPLPTHREAD.cmake @@ -3,15 +3,18 @@ TRY_COMPILE(KOKKOS_HAS_PTHREAD_ARG ${KOKKOS_TOP_BUILD_DIR}/tpl_tests ${KOKKOS_SOURCE_DIR}/cmake/compile_tests/pthread.cpp LINK_LIBRARIES -pthread - COMPILE_DEFINITIONS -pthread) + COMPILE_DEFINITIONS -pthread +) +# The test no longer requires C++11 +# if we did needed C++ standard support, then we should add option +# ${CMAKE_CXX${KOKKOS_CXX_STANDARD}_STANDARD_COMPILE_OPTION} INCLUDE(FindPackageHandleStandardArgs) FIND_PACKAGE_HANDLE_STANDARD_ARGS(PTHREAD DEFAULT_MSG KOKKOS_HAS_PTHREAD_ARG) - -KOKKOS_CREATE_IMPORTED_TPL(PTHREAD - INTERFACE #this is not a real library with a real location - COMPILE_OPTIONS -pthread - LINK_OPTIONS -pthread) - - - +#Only create the TPL if we succeed +IF (KOKKOS_HAS_PTHREAD_ARG) + KOKKOS_CREATE_IMPORTED_TPL(PTHREAD + INTERFACE #this is not a real library with a real location + COMPILE_OPTIONS -pthread + LINK_OPTIONS -pthread) +ENDIF() diff --git a/cmake/README.md b/cmake/README.md index 2ac8731586b..6d0cc2daf10 100644 --- a/cmake/README.md +++ b/cmake/README.md @@ -67,7 +67,7 @@ Note that all of these use `PUBLIC`! Almost every Kokkos flag is not private to ### Compiler Features and Compiler Options -Compiler options are flags like `-fopenmp` that do not need to be "resolved." +Compiler options are flags like `-fopenmp` that do not need to be "resolved." The flag is either on or off. Compiler features are more fine-grained and require conflicting requests to be resolved. Suppose I have @@ -81,7 +81,7 @@ add_library(B b.cpp) target_compile_features(B PUBLIC cxx_std_14) target_link_libraries(A B) ```` -I have requested two diferent features. +I have requested two different features. CMake understands the requests and knows that `cxx_std_11` is a subset of `cxx_std_14`. CMake then picks C++14 for library `B`. CMake would not have been able to do feature resolution if we had directly done: @@ -145,11 +145,11 @@ If Kokkos depends on, e.g. `hwloc` the downstream project will also need to link There are three stages in adding a new third-party library (TPL): * Finding: find the desired library on the system and verify the installation is correct * Importing: create a CMake target, if necessary, that is compatible with `target_link_libraries`. This is mostly relevant for TPLs not installed with CMake. -* Exporting: make the desired library visible to downstream projects +* Exporting: make the desired library visible to downstream projects TPLs are somewhat complicated by whether the library was installed with CMake or some other build system. If CMake, our lives are greatly simplified. We simply use `find_package` to locate the installed CMake project then call `target_link_libraries(kokkoscore PUBLIC/PRIVATE TPL)`. For libaries not installed with CMake, the process is a bit more complex. -It is up to the Kokkos developers to "convert" the library into a CMake target as if it had been installed as a valid modern CMake target with properties. +It is up to the Kokkos developers to "convert" the library into a CMake target as if it had been installed as a valid modern CMake target with properties. There are helper functions for simplifying the process of importing TPLs in Kokkos, but we walk through the process in detail to clearly illustrate the steps involved. #### TPL Search Order @@ -166,8 +166,9 @@ There are 3 possibilities that could be used: The following is the search order that Kokkos follows. Note: This differs from the default search order used by CMake `find_library` and `find_header`. CMake prefers default system paths over user-provided paths. For Kokkos (and package managers in general), it is better to prefer user-provided paths since this usually indicates a specific version we want. -1. `_ROOT` -1. `Kokkos__DIR` +1. `_ROOT` command line option +1. `_ROOT` environment variable +1. `Kokkos__DIR` command line option 1. Paths added by Kokkos CMake logic 1. Default system paths (if allowed) @@ -179,7 +180,7 @@ If you do not find the TPL where you expect it, you should error out rather than #### Finding TPLs If finding a TPL that is not a modern CMake project, refer to the `FindHWLOC.cmake` file in `cmake/Modules` for an example. -You will ususally need to verify expected headers with `find_path` +You will usually need to verify expected headers with `find_path` ```` find_path(TPL_INCLUDE_DIR mytpl.h PATHS "${KOKKOS_MYTPL_DIR}/include") ```` diff --git a/cmake/compile_tests/clang_omp.cpp b/cmake/compile_tests/clang_omp.cpp index ce3bbfb2623..60a5c522820 100644 --- a/cmake/compile_tests/clang_omp.cpp +++ b/cmake/compile_tests/clang_omp.cpp @@ -1,6 +1,6 @@ #include -int main(int argc, char** argv) { +int main(int, char**) { int thr = omp_get_num_threads(); if (thr > 0) return thr; diff --git a/cmake/compile_tests/pthread.cpp b/cmake/compile_tests/pthread.cpp index 3b13f7ba357..92310da0293 100644 --- a/cmake/compile_tests/pthread.cpp +++ b/cmake/compile_tests/pthread.cpp @@ -4,6 +4,10 @@ void* kokkos_test(void* args) { return args; } int main(void) { pthread_t thread; + /* Use NULL to avoid C++11. Some compilers + do not have C++11 by default. Forcing C++11 + in the compile tests can be done, but is unnecessary + */ pthread_create(&thread, NULL, kokkos_test, NULL); pthread_join(thread, NULL); return 0; diff --git a/cmake/fake_tribits.cmake b/cmake/fake_tribits.cmake index 26948d2cfbb..acee4a249d1 100644 --- a/cmake/fake_tribits.cmake +++ b/cmake/fake_tribits.cmake @@ -24,10 +24,6 @@ IF(NOT ${MACRO_DEFINE_NAME} STREQUAL "") ENDIF() ENDMACRO() -MACRO(GLOBAL_RESET VARNAME) - SET(${VARNAME} "" CACHE INTERNAL "" FORCE) -ENDMACRO() - MACRO(GLOBAL_OVERWRITE VARNAME VALUE TYPE) SET(${VARNAME} ${VALUE} CACHE ${TYPE} "" FORCE) ENDMACRO() @@ -88,13 +84,9 @@ MACRO(ADD_INTERFACE_LIBRARY LIB_NAME) SET_TARGET_PROPERTIES(${LIB_NAME} PROPERTIES INTERFACE TRUE) ENDMACRO() -IF(NOT TARGET check) - ADD_CUSTOM_TARGET(check COMMAND ${CMAKE_CTEST_COMMAND} -VV -C ${CMAKE_CFG_INTDIR}) -ENDIF() - FUNCTION(KOKKOS_ADD_TEST) if (KOKKOS_HAS_TRILINOS) - CMAKE_PARSE_ARGUMENTS(TEST + CMAKE_PARSE_ARGUMENTS(TEST "" "EXE;NAME" "" @@ -108,22 +100,27 @@ FUNCTION(KOKKOS_ADD_TEST) TRIBITS_ADD_TEST( ${EXE_ROOT} NAME ${TEST_NAME} - ${ARGN} COMM serial mpi NUM_MPI_PROCS 1 ${TEST_UNPARSED_ARGUMENTS} ) else() - CMAKE_PARSE_ARGUMENTS(TEST + CMAKE_PARSE_ARGUMENTS(TEST "WILL_FAIL" "FAIL_REGULAR_EXPRESSION;PASS_REGULAR_EXPRESSION;EXE;NAME" "CATEGORIES;CMD_ARGS" ${ARGN}) + # To match Tribits, we should always be receiving + # the root names of exes/libs IF(TEST_EXE) - SET(EXE ${TEST_EXE}) + SET(EXE_ROOT ${TEST_EXE}) ELSE() - SET(EXE ${TEST_NAME}) + SET(EXE_ROOT ${TEST_NAME}) ENDIF() + # Prepend package name to the test name + # These should be the full target name + SET(TEST_NAME ${PACKAGE_NAME}_${TEST_NAME}) + SET(EXE ${PACKAGE_NAME}_${EXE_ROOT}) IF(WIN32) ADD_TEST(NAME ${TEST_NAME} WORKING_DIRECTORY ${LIBRARY_OUTPUT_PATH} COMMAND ${EXE}${CMAKE_EXECUTABLE_SUFFIX} ${TEST_CMD_ARGS}) ELSE() @@ -160,7 +157,7 @@ FUNCTION(KOKKOS_TPL_FIND_INCLUDE_DIRS_AND_LIBRARIES TPL_NAME) if (KOKKOS_HAS_TRILINOS) TRIBITS_TPL_FIND_INCLUDE_DIRS_AND_LIBRARIES(${TPL_NAME} ${ARGN}) else() - CMAKE_PARSE_ARGUMENTS(PARSE + CMAKE_PARSE_ARGUMENTS(PARSE "" "" "REQUIRED_HEADERS;REQUIRED_LIBS_NAMES" @@ -214,13 +211,13 @@ ENDFUNCTION() FUNCTION(KOKKOS_TARGET_INCLUDE_DIRECTORIES TARGET) IF(KOKKOS_HAS_TRILINOS) - KOKKOS_LIB_TYPE(${TARGET} INCTYPE) + KOKKOS_LIB_TYPE(${TARGET} INCTYPE) #don't trust tribits to do this correctly - but need to add package name TARGET_INCLUDE_DIRECTORIES(${TARGET} ${INCTYPE} ${ARGN}) -ELSEIF(TARGET ${TARGET}) +ELSEIF(TARGET ${TARGET}) #the target actually exists - this means we are doing separate libs #or this a test library - KOKKOS_LIB_TYPE(${TARGET} INCTYPE) + KOKKOS_LIB_TYPE(${TARGET} INCTYPE) TARGET_INCLUDE_DIRECTORIES(${TARGET} ${INCTYPE} ${ARGN}) ELSE() GET_PROPERTY(LIBS GLOBAL PROPERTY KOKKOS_LIBRARIES_NAMES) @@ -239,7 +236,7 @@ ELSE() SET(options INTERFACE) SET(oneValueArgs) SET(multiValueArgs) - CMAKE_PARSE_ARGUMENTS(PARSE + CMAKE_PARSE_ARGUMENTS(PARSE "INTERFACE" "" "" @@ -264,7 +261,7 @@ ELSE() SET(oneValueArgs) SET(multiValueArgs HEADERS SOURCES) - CMAKE_PARSE_ARGUMENTS(PARSE + CMAKE_PARSE_ARGUMENTS(PARSE "STATIC;SHARED" "" "HEADERS;SOURCES" @@ -277,10 +274,6 @@ ELSE() LIST(REMOVE_DUPLICATES PARSE_SOURCES) ENDIF() ADD_LIBRARY(${NAME} ${PARSE_SOURCES}) - target_link_libraries( - ${NAME} - PUBLIC kokkos - ) ENDIF() ENDFUNCTION() diff --git a/cmake/kokkos_arch.cmake b/cmake/kokkos_arch.cmake index c33247c9550..d73a3539818 100644 --- a/cmake/kokkos_arch.cmake +++ b/cmake/kokkos_arch.cmake @@ -9,52 +9,6 @@ FUNCTION(KOKKOS_ARCH_OPTION SUFFIX DEV_TYPE DESCRIPTION) SET(KOKKOS_ARCH_${SUFFIX} ${KOKKOS_ARCH_${SUFFIX}} PARENT_SCOPE) ENDFUNCTION() -FUNCTION(ARCH_FLAGS) - SET(COMPILERS NVIDIA PGI XL DEFAULT Cray Intel Clang AppleClang GNU) - CMAKE_PARSE_ARGUMENTS( - PARSE - "LINK_ONLY;COMPILE_ONLY" - "" - "${COMPILERS}" - ${ARGN}) - - SET(COMPILER ${KOKKOS_CXX_COMPILER_ID}) - - SET(FLAGS) - SET(NEW_COMPILE_OPTIONS) - SET(NEW_XCOMPILER_OPTIONS) - SET(NEW_LINK_OPTIONS) - LIST(APPEND NEW_XCOMPILER_OPTIONS ${KOKKOS_XCOMPILER_OPTIONS}) - LIST(APPEND NEW_COMPILE_OPTIONS ${KOKKOS_COMPILE_OPTIONS}) - LIST(APPEND NEW_LINK_OPTIONS ${KOKKOS_LINK_OPTIONS}) - FOREACH(COMP ${COMPILERS}) - IF (COMPILER STREQUAL "${COMP}") - IF (PARSE_${COMPILER}) - IF (NOT "${PARSE_${COMPILER}}" STREQUAL "NO-VALUE-SPECIFIED") - SET(FLAGS ${PARSE_${COMPILER}}) - ENDIF() - ELSEIF(PARSE_DEFAULT) - SET(FLAGS ${PARSE_DEFAULT}) - ENDIF() - ENDIF() - ENDFOREACH() - - IF (NOT LINK_ONLY) - # The funky logic here is for future handling of argument deduplication - # If we naively pass multiple -Xcompiler flags to target_compile_options - # -Xcompiler will get deduplicated and break the build - IF ("-Xcompiler" IN_LIST FLAGS) - LIST(REMOVE_ITEM FLAGS "-Xcompiler") - GLOBAL_APPEND(KOKKOS_XCOMPILER_OPTIONS ${FLAGS}) - ELSE() - GLOBAL_APPEND(KOKKOS_COMPILE_OPTIONS ${FLAGS}) - ENDIF() - ENDIF() - - IF (NOT COMPILE_ONLY) - GLOBAL_APPEND(KOKKOS_LINK_OPTIONS ${FLAGS}) - ENDIF() -ENDFUNCTION() # Make sure devices and compiler ID are done KOKKOS_CFG_DEPENDS(ARCH COMPILER_ID) @@ -98,14 +52,15 @@ KOKKOS_ARCH_OPTION(VOLTA70 GPU "NVIDIA Volta generation CC 7.0") KOKKOS_ARCH_OPTION(VOLTA72 GPU "NVIDIA Volta generation CC 7.2") KOKKOS_ARCH_OPTION(TURING75 GPU "NVIDIA Turing generation CC 7.5") KOKKOS_ARCH_OPTION(EPYC HOST "AMD Epyc architecture") - +KOKKOS_ARCH_OPTION(VEGA900 GPU "AMD GPU MI25 GFX900") +KOKKOS_ARCH_OPTION(VEGA906 GPU "AMD GPU MI50/MI60 GFX906") IF (KOKKOS_ENABLE_CUDA) #Regardless of version, make sure we define the general architecture name IF (KOKKOS_ARCH_KEPLER30 OR KOKKOS_ARCH_KEPLER32 OR KOKKOS_ARCH_KEPLER35 OR KOKKOS_ARCH_KEPLER37) SET(KOKKOS_ARCH_KEPLER ON) ENDIF() - + #Regardless of version, make sure we define the general architecture name IF (KOKKOS_ARCH_MAXWELL50 OR KOKKOS_ARCH_MAXWELL52 OR KOKKOS_ARCH_MAXWELL53) SET(KOKKOS_ARCH_MAXWELL ON) @@ -126,13 +81,13 @@ ENDIF() IF(KOKKOS_ENABLE_COMPILER_WARNINGS) SET(COMMON_WARNINGS - "-Wall" "-Wshadow" "-pedantic" + "-Wall" "-Wunused-parameter" "-Wshadow" "-pedantic" "-Wsign-compare" "-Wtype-limits" "-Wuninitialized") SET(GNU_WARNINGS "-Wempty-body" "-Wclobbered" "-Wignored-qualifiers" ${COMMON_WARNINGS}) - ARCH_FLAGS( + COMPILER_SPECIFIC_FLAGS( PGI NO-VALUE-SPECIFIED GNU ${GNU_WARNINGS} DEFAULT ${COMMON_WARNINGS} @@ -141,7 +96,8 @@ ENDIF() #------------------------------- KOKKOS_CUDA_OPTIONS --------------------------- -GLOBAL_RESET(KOKKOS_CUDA_OPTIONS) +#clear anything that might be in the cache +GLOBAL_SET(KOKKOS_CUDA_OPTIONS) # Construct the Makefile options IF (KOKKOS_ENABLE_CUDA_LAMBDA) IF(KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA) @@ -157,6 +113,7 @@ ENDIF() IF (KOKKOS_CXX_COMPILER_ID STREQUAL Clang) SET(CUDA_ARCH_FLAG "--cuda-gpu-arch") + SET(AMDGPU_ARCH_FLAG "--amdgpu-target") GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS -x cuda) IF (KOKKOS_ENABLE_CUDA) SET(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND ON CACHE BOOL "enable CUDA Clang workarounds" FORCE) @@ -171,27 +128,13 @@ IF (KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA) GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS -lineinfo) ENDIF() UNSET(_UPPERCASE_CMAKE_BUILD_TYPE) - IF (KOKKOS_CXX_COMPILER_VERSION VERSION_GREATER 9.0 OR KOKKOS_CXX_COMPILER_VERSION VERSION_EQUAL 9.0) - GLOBAL_APPEND(KOKKOS_CUDAFE_OPTIONS --diag_suppress=esa_on_defaulted_function_ignored) + IF (KOKKOS_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 9.0 AND KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 10.0) + GLOBAL_APPEND(KOKKOS_CUDAFE_OPTIONS --diag_suppress=esa_on_defaulted_function_ignored) ENDIF() ENDIF() -IF(KOKKOS_ENABLE_OPENMP) - IF (KOKKOS_CXX_COMPILER_ID STREQUAL AppleClang) - MESSAGE(FATAL_ERROR "Apple Clang does not support OpenMP. Use native Clang instead") - ENDIF() - ARCH_FLAGS( - Clang -fopenmp=libomp - PGI -mp - NVIDIA -Xcompiler -fopenmp - Cray NO-VALUE-SPECIFIED - XL -qsmp=omp - DEFAULT -fopenmp - ) -ENDIF() - IF (KOKKOS_ARCH_ARMV80) - ARCH_FLAGS( + COMPILER_SPECIFIC_FLAGS( Cray NO-VALUE-SPECIFIED PGI NO-VALUE-SPECIFIED DEFAULT -march=armv8-a @@ -199,7 +142,7 @@ IF (KOKKOS_ARCH_ARMV80) ENDIF() IF (KOKKOS_ARCH_ARMV81) - ARCH_FLAGS( + COMPILER_SPECIFIC_FLAGS( Cray NO-VALUE-SPECIFIED PGI NO-VALUE-SPECIFIED DEFAULT -march=armv8.1-a @@ -208,7 +151,7 @@ ENDIF() IF (KOKKOS_ARCH_ARMV8_THUNDERX) SET(KOKKOS_ARCH_ARMV80 ON) #Not a cache variable - ARCH_FLAGS( + COMPILER_SPECIFIC_FLAGS( Cray NO-VALUE-SPECIFIED PGI NO-VALUE-SPECIFIED DEFAULT -march=armv8-a -mtune=thunderx @@ -217,7 +160,7 @@ ENDIF() IF (KOKKOS_ARCH_ARMV8_THUNDERX2) SET(KOKKOS_ARCH_ARMV81 ON) #Not a cache variable - ARCH_FLAGS( + COMPILER_SPECIFIC_FLAGS( Cray NO-VALUE-SPECIFIED PGI NO-VALUE-SPECIFIED DEFAULT -mcpu=thunderx2t99 -mtune=thunderx2t99 @@ -225,7 +168,7 @@ IF (KOKKOS_ARCH_ARMV8_THUNDERX2) ENDIF() IF (KOKKOS_ARCH_EPYC) - ARCH_FLAGS( + COMPILER_SPECIFIC_FLAGS( Intel -mavx2 DEFAULT -march=znver1 -mtune=znver1 ) @@ -234,7 +177,7 @@ IF (KOKKOS_ARCH_EPYC) ENDIF() IF (KOKKOS_ARCH_WSM) - ARCH_FLAGS( + COMPILER_SPECIFIC_FLAGS( Intel -xSSE4.2 PGI -tp=nehalem Cray NO-VALUE-SPECIFIED @@ -245,7 +188,7 @@ ENDIF() IF (KOKKOS_ARCH_SNB OR KOKKOS_ARCH_AMDAVX) SET(KOKKOS_ARCH_AVX ON) - ARCH_FLAGS( + COMPILER_SPECIFIC_FLAGS( Intel -mavx PGI -tp=sandybridge Cray NO-VALUE-SPECIFIED @@ -255,7 +198,7 @@ ENDIF() IF (KOKKOS_ARCH_HSW) SET(KOKKOS_ARCH_AVX2 ON) - ARCH_FLAGS( + COMPILER_SPECIFIC_FLAGS( Intel -xCORE-AVX2 PGI -tp=haswell Cray NO-VALUE-SPECIFIED @@ -265,7 +208,7 @@ ENDIF() IF (KOKKOS_ARCH_BDW) SET(KOKKOS_ARCH_AVX2 ON) - ARCH_FLAGS( + COMPILER_SPECIFIC_FLAGS( Intel -xCORE-AVX2 PGI -tp=haswell Cray NO-VALUE-SPECIFIED @@ -275,7 +218,7 @@ ENDIF() IF (KOKKOS_ARCH_EPYC) SET(KOKKOS_ARCH_AMD_AVX2 ON) - ARCH_FLAGS( + COMPILER_SPECIFIC_FLAGS( Intel -mvax2 DEFAULT -march=znver1 -mtune=znver1 ) @@ -284,7 +227,7 @@ ENDIF() IF (KOKKOS_ARCH_KNL) #avx512-mic SET(KOKKOS_ARCH_AVX512MIC ON) #not a cache variable - ARCH_FLAGS( + COMPILER_SPECIFIC_FLAGS( Intel -xMIC-AVX512 PGI NO-VALUE-SPECIFIED Cray NO-VALUE-SPECIFIED @@ -294,7 +237,7 @@ ENDIF() IF (KOKKOS_ARCH_KNC) SET(KOKKOS_USE_ISA_KNC ON) - ARCH_FLAGS( + COMPILER_SPECIFIC_FLAGS( DEFAULT -mmic ) ENDIF() @@ -302,7 +245,7 @@ ENDIF() IF (KOKKOS_ARCH_SKX) #avx512-xeon SET(KOKKOS_ARCH_AVX512XEON ON) - ARCH_FLAGS( + COMPILER_SPECIFIC_FLAGS( Intel -xCORE-AVX512 PGI NO-VALUE-SPECIFIED Cray NO-VALUE-SPECIFIED @@ -319,7 +262,7 @@ IF (KOKKOS_ARCH_BDW OR KOKKOS_ARCH_SKX) ENDIF() IF (KOKKOS_ARCH_POWER7) - ARCH_FLAGS( + COMPILER_SPECIFIC_FLAGS( PGI NO-VALUE-SPECIFIED DEFAULT -mcpu=power7 -mtune=power7 ) @@ -327,7 +270,7 @@ IF (KOKKOS_ARCH_POWER7) ENDIF() IF (KOKKOS_ARCH_POWER8) - ARCH_FLAGS( + COMPILER_SPECIFIC_FLAGS( PGI NO-VALUE-SPECIFIED NVIDIA NO-VALUE-SPECIFIED DEFAULT -mcpu=power8 -mtune=power8 @@ -335,7 +278,7 @@ IF (KOKKOS_ARCH_POWER8) ENDIF() IF (KOKKOS_ARCH_POWER9) - ARCH_FLAGS( + COMPILER_SPECIFIC_FLAGS( PGI NO-VALUE-SPECIFIED NVIDIA NO-VALUE-SPECIFIED DEFAULT -mcpu=power9 -mtune=power9 @@ -347,33 +290,50 @@ IF (KOKKOS_ARCH_POWER8 OR KOKKOS_ARCH_POWER9) ENDIF() IF (Kokkos_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE) - ARCH_FLAGS( + COMPILER_SPECIFIC_FLAGS( Clang -fcuda-rdc NVIDIA --relocatable-device-code=true ) ENDIF() +#Right now we cannot get the compiler ID when cross-compiling, so just check +#that HIP is enabled +IF (Kokkos_ENABLE_HIP) + IF (Kokkos_ENABLE_HIP_RELOCATABLE_DEVICE_CODE) + COMPILER_SPECIFIC_FLAGS( + DEFAULT -fgpu-rdc + ) + ELSE() + COMPILER_SPECIFIC_FLAGS( + DEFAULT -fno-gpu-rdc + ) + ENDIF() +ENDIF() + SET(CUDA_ARCH_ALREADY_SPECIFIED "") FUNCTION(CHECK_CUDA_ARCH ARCH FLAG) -IF(KOKKOS_ARCH_${ARCH}) - IF(CUDA_ARCH_ALREADY_SPECIFIED) - MESSAGE(FATAL_ERROR "Multiple GPU architectures given! Already have ${CUDA_ARCH_ALREADY_SPECIFIED}, but trying to add ${ARCH}. If you are re-running CMake, try clearing the cache and running again.") - ENDIF() - SET(CUDA_ARCH_ALREADY_SPECIFIED ${ARCH} PARENT_SCOPE) - IF (NOT KOKKOS_ENABLE_CUDA) - MESSAGE(WARNING "Given CUDA arch ${ARCH}, but Kokkos_ENABLE_CUDA is OFF. Option will be ignored.") - UNSET(KOKKOS_ARCH_${ARCH} PARENT_SCOPE) - ELSE() - GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS "${CUDA_ARCH_FLAG}=${FLAG}") - IF(KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE) - GLOBAL_APPEND(KOKKOS_LINK_OPTIONS "${CUDA_ARCH_FLAG}=${FLAG}") + IF(KOKKOS_ARCH_${ARCH}) + IF(CUDA_ARCH_ALREADY_SPECIFIED) + MESSAGE(FATAL_ERROR "Multiple GPU architectures given! Already have ${CUDA_ARCH_ALREADY_SPECIFIED}, but trying to add ${ARCH}. If you are re-running CMake, try clearing the cache and running again.") + ENDIF() + SET(CUDA_ARCH_ALREADY_SPECIFIED ${ARCH} PARENT_SCOPE) + IF (NOT KOKKOS_ENABLE_CUDA AND NOT KOKKOS_ENABLE_OPENMPTARGET) + MESSAGE(WARNING "Given CUDA arch ${ARCH}, but Kokkos_ENABLE_CUDA and Kokkos_ENABLE_OPENMPTARGET are OFF. Option will be ignored.") + UNSET(KOKKOS_ARCH_${ARCH} PARENT_SCOPE) + ELSE() + SET(KOKKOS_CUDA_ARCH_FLAG ${FLAG} PARENT_SCOPE) + GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS "${CUDA_ARCH_FLAG}=${FLAG}") + IF(KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE) + GLOBAL_APPEND(KOKKOS_LINK_OPTIONS "${CUDA_ARCH_FLAG}=${FLAG}") + ENDIF() ENDIF() ENDIF() -ENDIF() ENDFUNCTION() +#These will define KOKKOS_CUDA_ARCH_FLAG +#to the corresponding flag name if ON CHECK_CUDA_ARCH(KEPLER30 sm_30) CHECK_CUDA_ARCH(KEPLER32 sm_32) CHECK_CUDA_ARCH(KEPLER35 sm_35) @@ -383,18 +343,76 @@ CHECK_CUDA_ARCH(MAXWELL52 sm_52) CHECK_CUDA_ARCH(MAXWELL53 sm_53) CHECK_CUDA_ARCH(PASCAL60 sm_60) CHECK_CUDA_ARCH(PASCAL61 sm_61) -CHECK_CUDA_ARCH(VOLTA70 sm_70) -CHECK_CUDA_ARCH(VOLTA72 sm_72) +CHECK_CUDA_ARCH(VOLTA70 sm_70) +CHECK_CUDA_ARCH(VOLTA72 sm_72) CHECK_CUDA_ARCH(TURING75 sm_75) +SET(AMDGPU_ARCH_ALREADY_SPECIFIED "") +FUNCTION(CHECK_AMDGPU_ARCH ARCH FLAG) + IF(KOKKOS_ARCH_${ARCH}) + IF(AMDGPU_ARCH_ALREADY_SPECIFIED) + MESSAGE(FATAL_ERROR "Multiple GPU architectures given! Already have ${AMDGPU_ARCH_ALREADY_SPECIFIED}, but trying to add ${ARCH}. If you are re-running CMake, try clearing the cache and running again.") + ENDIF() + SET(AMDGPU_ARCH_ALREADY_SPECIFIED ${ARCH} PARENT_SCOPE) + IF (NOT KOKKOS_ENABLE_HIP AND NOT KOKKOS_ENABLE_OPENMPTARGET) + MESSAGE(WARNING "Given HIP arch ${ARCH}, but Kokkos_ENABLE_AMDGPU and Kokkos_ENABLE_OPENMPTARGET are OFF. Option will be ignored.") + UNSET(KOKKOS_ARCH_${ARCH} PARENT_SCOPE) + ELSE() + SET(KOKKOS_AMDGPU_ARCH_FLAG ${FLAG} PARENT_SCOPE) + GLOBAL_APPEND(KOKKOS_AMDGPU_OPTIONS "${AMDGPU_ARCH_FLAG}=${FLAG}") + IF(KOKKOS_ENABLE_HIP) + GLOBAL_APPEND(KOKKOS_LINK_OPTIONS "${AMDGPU_ARCH_FLAG}=${FLAG}") + ENDIF() + ENDIF() + ENDIF() +ENDFUNCTION() + +#These will define KOKKOS_AMDGPU_ARCH_FLAG +#to the corresponding flag name if ON +CHECK_AMDGPU_ARCH(VEGA900 gfx900) # Radeon Instinct MI25 +CHECK_AMDGPU_ARCH(VEGA906 gfx906) # Radeon Instinct MI50 and MI60 + +IF (KOKKOS_ENABLE_OPENMPTARGET) + SET(CLANG_CUDA_ARCH ${KOKKOS_CUDA_ARCH_FLAG}) + IF (CLANG_CUDA_ARCH) + COMPILER_SPECIFIC_FLAGS( + Clang -Xopenmp-target -march=${CLANG_CUDA_ARCH} -fopenmp-targets=nvptx64-nvidia-cuda + XL -qtgtarch=${KOKKOS_CUDA_ARCH_FLAG} + ) + ENDIF() + SET(CLANG_AMDGPU_ARCH ${KOKKOS_AMDGPU_ARCH_FLAG}) + IF (CLANG_AMDGPU_ARCH) + COMPILER_SPECIFIC_FLAGS( + Clang -Xopenmp-target=amdgcn-amd-amdhsa -march=${CLANG_AMDGPU_ARCH} -fopenmp-targets=amdgcn-amd-amdhsa + ) + ENDIF() +ENDIF() + +IF(KOKKOS_ENABLE_CUDA AND NOT CUDA_ARCH_ALREADY_SPECIFIED) + MESSAGE(SEND_ERROR "CUDA enabled but no NVIDIA GPU architecture currently enabled. Please give one -DKokkos_ARCH_{..}=ON' to enable an NVIDIA GPU architecture.") +ENDIF() + #CMake verbose is kind of pointless #Let's just always print things MESSAGE(STATUS "Execution Spaces:") -IF(KOKKOS_ENABLE_CUDA) - MESSAGE(STATUS " Device Parallel: CUDA") -ELSE() - MESSAGE(STATUS " Device Parallel: NONE") + +FOREACH (_BACKEND CUDA OPENMPTARGET HIP) + IF(KOKKOS_ENABLE_${_BACKEND}) + IF(_DEVICE_PARALLEL) + MESSAGE(FATAL_ERROR "Multiple device parallel execution spaces are not allowed! " + "Trying to enable execution space ${_BACKEND}, " + "but execution space ${_DEVICE_PARALLEL} is already enabled. " + "Remove the CMakeCache.txt file and re-configure.") + ENDIF() + SET(_DEVICE_PARALLEL ${_BACKEND}) + ENDIF() +ENDFOREACH() +IF(NOT _DEVICE_PARALLEL) + SET(_DEVICE_PARALLEL "NONE") ENDIF() +MESSAGE(STATUS " Device Parallel: ${_DEVICE_PARALLEL}") +UNSET(_DEVICE_PARALLEL) + FOREACH (_BACKEND OPENMP PTHREAD HPX) IF(KOKKOS_ENABLE_${_BACKEND}) diff --git a/cmake/kokkos_check_env.cmake b/cmake/kokkos_check_env.cmake new file mode 100644 index 00000000000..a455a403b9d --- /dev/null +++ b/cmake/kokkos_check_env.cmake @@ -0,0 +1,12 @@ +SET(CRAYPE_VERSION $ENV{CRAYPE_VERSION}) +IF (CRAYPE_VERSION) + SET(KOKKOS_IS_CRAYPE TRUE) + SET(CRAYPE_LINK_TYPE $ENV{CRAYPE_LINK_TYPE}) + IF (CRAYPE_LINK_TYPE) + IF (NOT CRAYPE_LINK_TYPE STREQUAL "dynamic") + MESSAGE(WARNING "CRAYPE_LINK_TYPE is set to ${CRAYPE_LINK_TYPE}. Linking is likely to fail unless this is set to 'dynamic'") + ENDIF() + ELSE() + MESSAGE(WARNING "CRAYPE_LINK_TYPE is not set. Linking is likely to fail unless this is set to 'dynamic'") + ENDIF() +ENDIF() diff --git a/cmake/kokkos_compiler_id.cmake b/cmake/kokkos_compiler_id.cmake index d239c3b32e5..cd5e7c9e4e4 100644 --- a/cmake/kokkos_compiler_id.cmake +++ b/cmake/kokkos_compiler_id.cmake @@ -13,7 +13,7 @@ EXECUTE_PROCESS(COMMAND ${CMAKE_CXX_COMPILER} --version STRING(REGEX REPLACE "^ +" "" - INTERNAL_HAVE_COMPILER_NVCC ${INTERNAL_HAVE_COMPILER_NVCC}) + INTERNAL_HAVE_COMPILER_NVCC "${INTERNAL_HAVE_COMPILER_NVCC}") IF(INTERNAL_HAVE_COMPILER_NVCC) @@ -31,16 +31,32 @@ IF(INTERNAL_HAVE_COMPILER_NVCC) SET(KOKKOS_CXX_COMPILER_VERSION ${TEMP_CXX_COMPILER_VERSION} CACHE STRING INTERNAL FORCE) ENDIF() -IF(KOKKOS_CXX_COMPILER_ID STREQUAL Cray) - # SET nvcc's compiler version. +IF(KOKKOS_CXX_COMPILER_ID STREQUAL Clang) + # The Cray compiler reports as Clang to most versions of CMake + EXECUTE_PROCESS(COMMAND ${CMAKE_CXX_COMPILER} --version + COMMAND grep Cray + COMMAND wc -l + OUTPUT_VARIABLE INTERNAL_HAVE_CRAY_COMPILER + OUTPUT_STRIP_TRAILING_WHITESPACE) + IF (INTERNAL_HAVE_CRAY_COMPILER) #not actually Clang + SET(KOKKOS_CLANG_IS_CRAY TRUE) + ENDIF() +ENDIF() + +IF(KOKKOS_CXX_COMPILER_ID STREQUAL Cray OR KOKKOS_CLANG_IS_CRAY) + # SET Cray's compiler version. EXECUTE_PROCESS(COMMAND ${CMAKE_CXX_COMPILER} --version OUTPUT_VARIABLE INTERNAL_CXX_COMPILER_VERSION OUTPUT_STRIP_TRAILING_WHITESPACE) - STRING(REGEX MATCH "[0-9]+\\.[0-9]+\\.[0-9]+$" + STRING(REGEX MATCH "[0-9]+\\.[0-9]+\\.[0-9]+" TEMP_CXX_COMPILER_VERSION ${INTERNAL_CXX_COMPILER_VERSION}) - SET(KOKKOS_CXX_COMPILER_VERSION ${TEMP_CXX_COMPILER_VERSION} CACHE STRING INTERNAL FORCE) + IF (KOKKOS_CLANG_IS_CRAY) + SET(KOKKOS_CLANG_CRAY_COMPILER_VERSION ${TEMP_CXX_COMPILER_VERSION}) + ELSE() + SET(KOKKOS_CXX_COMPILER_VERSION ${TEMP_CXX_COMPILER_VERSION} CACHE STRING INTERNAL FORCE) + ENDIF() ENDIF() # Enforce the minimum compilers supported by Kokkos. diff --git a/cmake/kokkos_corner_cases.cmake b/cmake/kokkos_corner_cases.cmake index c03c385faf6..e30be3c841e 100644 --- a/cmake/kokkos_corner_cases.cmake +++ b/cmake/kokkos_corner_cases.cmake @@ -1,4 +1,4 @@ -IF(KOKKOS_CXX_COMPILER_ID STREQUAL Clang AND KOKKOS_ENABLE_OPENMP) +IF(KOKKOS_CXX_COMPILER_ID STREQUAL Clang AND KOKKOS_ENABLE_OPENMP AND NOT KOKKOS_CLANG_IS_CRAY) # The clang "version" doesn't actually tell you what runtimes and tools # were built into Clang. We should therefore make sure that libomp # was actually built into Clang. Otherwise the user will get nonsensical @@ -11,7 +11,7 @@ IF(KOKKOS_CXX_COMPILER_ID STREQUAL Clang AND KOKKOS_ENABLE_OPENMP) #also - this is easier to use than CMakeCheckCXXSourceCompiles TRY_COMPILE(CLANG_HAS_OMP ${KOKKOS_TOP_BUILD_DIR}/corner_cases - ${KOKKOS_SOURCE_DIR}/cmake/compile_tests/clang_omp.cpp + ${KOKKOS_SOURCE_DIR}/cmake/compile_tests/clang_omp.cpp COMPILE_DEFINITIONS -fopenmp=libomp LINK_LIBRARIES -fopenmp=libomp ) @@ -22,6 +22,30 @@ IF(KOKKOS_CXX_COMPILER_ID STREQUAL Clang AND KOKKOS_ENABLE_OPENMP) UNSET(CLANG_HAS_OMP CACHE) #make sure CMake always re-runs this ENDIF() +IF(KOKKOS_CXX_COMPILER_ID STREQUAL AppleClang AND KOKKOS_ENABLE_OPENMP) + # The clang "version" doesn't actually tell you what runtimes and tools + # were built into Clang. We should therefore make sure that libomp + # was actually built into Clang. Otherwise the user will get nonsensical + # errors when they try to build. + + #Try compile is the height of CMake nonsense + #I can't just give it compiler and link flags + #I have to hackily pretend that compiler flags are compiler definitions + #and that linker flags are libraries + #also - this is easier to use than CMakeCheckCXXSourceCompiles + TRY_COMPILE(APPLECLANG_HAS_OMP + ${KOKKOS_TOP_BUILD_DIR}/corner_cases + ${KOKKOS_SOURCE_DIR}/cmake/compile_tests/clang_omp.cpp + COMPILE_DEFINITIONS -Xpreprocessor -fopenmp + LINK_LIBRARIES -lomp + ) + IF (NOT APPLECLANG_HAS_OMP) + UNSET(APPLECLANG_HAS_OMP CACHE) #make sure CMake always re-runs this + MESSAGE(FATAL_ERROR "AppleClang failed OpenMP check. You have requested -DKokkos_ENABLE_OPENMP=ON, but the AppleClang compiler does not appear to have been built with OpenMP support") + ENDIF() + UNSET(APPLECLANG_HAS_OMP CACHE) #make sure CMake always re-runs this +ENDIF() + IF (KOKKOS_CXX_STANDARD STREQUAL 17) IF (KOKKOS_CXX_COMPILER_ID STREQUAL GNU AND KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 7) diff --git a/cmake/kokkos_enable_devices.cmake b/cmake/kokkos_enable_devices.cmake index ff098766736..7b50cfe458a 100644 --- a/cmake/kokkos_enable_devices.cmake +++ b/cmake/kokkos_enable_devices.cmake @@ -31,6 +31,41 @@ ELSE() SET(OMP_DEFAULT OFF) ENDIF() KOKKOS_DEVICE_OPTION(OPENMP ${OMP_DEFAULT} HOST "Whether to build OpenMP backend") +IF(KOKKOS_ENABLE_OPENMP) + SET(ClangOpenMPFlag -fopenmp=libomp) + IF(KOKKOS_CLANG_IS_CRAY) + SET(ClangOpenMPFlag -fopenmp) + ENDIF() + COMPILER_SPECIFIC_FLAGS( + Clang ${ClangOpenMPFlag} + AppleClang -Xpreprocessor -fopenmp + PGI -mp + NVIDIA -Xcompiler -fopenmp + Cray NO-VALUE-SPECIFIED + XL -qsmp=omp + DEFAULT -fopenmp + ) + COMPILER_SPECIFIC_LIBS( + AppleClang -lomp + ) +ENDIF() + +KOKKOS_DEVICE_OPTION(OPENMPTARGET OFF DEVICE "Whether to build the OpenMP target backend") +IF (KOKKOS_ENABLE_OPENMPTARGET) + COMPILER_SPECIFIC_FLAGS( + Clang -fopenmp -fopenmp=libomp + XL -qsmp=omp -qoffload -qnoeh + DEFAULT -fopenmp + ) + COMPILER_SPECIFIC_DEFS( + XL KOKKOS_IBM_XL_OMP45_WORKAROUND + Clang KOKKOS_WORKAROUND_OPENMPTARGET_CLANG + ) +# Are there compilers which identify as Clang and need this library? +# COMPILER_SPECIFIC_LIBS( +# Clang -lopenmptarget +# ) +ENDIF() IF(Trilinos_ENABLE_Kokkos AND TPL_ENABLE_CUDA) SET(CUDA_DEFAULT ON) @@ -59,3 +94,5 @@ ENDIF() KOKKOS_DEVICE_OPTION(SERIAL ${SERIAL_DEFAULT} HOST "Whether to build serial backend") KOKKOS_DEVICE_OPTION(HPX OFF HOST "Whether to build HPX backend (experimental)") + +KOKKOS_DEVICE_OPTION(HIP OFF DEVICE "Whether to build HIP backend") diff --git a/cmake/kokkos_enable_options.cmake b/cmake/kokkos_enable_options.cmake index c0e49482b66..4560c3df8fb 100644 --- a/cmake/kokkos_enable_options.cmake +++ b/cmake/kokkos_enable_options.cmake @@ -21,6 +21,7 @@ ENDFUNCTION() # Certain defaults will depend on knowing the enabled devices KOKKOS_CFG_DEPENDS(OPTIONS DEVICES) +KOKKOS_CFG_DEPENDS(OPTIONS COMPILER_ID) # Put a check in just in case people are using this option KOKKOS_DEPRECATED_LIST(OPTIONS ENABLE) @@ -28,8 +29,10 @@ KOKKOS_DEPRECATED_LIST(OPTIONS ENABLE) KOKKOS_ENABLE_OPTION(CUDA_RELOCATABLE_DEVICE_CODE OFF "Whether to enable relocatable device code (RDC) for CUDA") KOKKOS_ENABLE_OPTION(CUDA_UVM OFF "Whether to use unified memory (UM) for CUDA by default") KOKKOS_ENABLE_OPTION(CUDA_LDG_INTRINSIC OFF "Whether to use CUDA LDG intrinsics") +KOKKOS_ENABLE_OPTION(HIP_RELOCATABLE_DEVICE_CODE OFF "Whether to enable relocatable device code (RDC) for HIP") KOKKOS_ENABLE_OPTION(HPX_ASYNC_DISPATCH OFF "Whether HPX supports asynchronous dispatch") KOKKOS_ENABLE_OPTION(TESTS OFF "Whether to build the unit tests") +KOKKOS_ENABLE_OPTION(EXAMPLES OFF "Whether to build the examples") STRING(TOUPPER "${CMAKE_BUILD_TYPE}" UPPERCASE_CMAKE_BUILD_TYPE) IF(UPPERCASE_CMAKE_BUILD_TYPE STREQUAL "DEBUG") KOKKOS_ENABLE_OPTION(DEBUG ON "Whether to activate extra debug features - may increase compile times") @@ -51,12 +54,14 @@ IF (KOKKOS_ENABLE_CUDA) SET(KOKKOS_COMPILER_CUDA_VERSION "${KOKKOS_COMPILER_VERSION_MAJOR}${KOKKOS_COMPILER_VERSION_MINOR}") ENDIF() -IF (Trilinos_ENABLE_Kokkos AND TPL_ENABLE_CUDA AND DEFINED KOKKOS_COMPILER_CUDA_VERSION AND KOKKOS_COMPILER_CUDA_VERSION GREATER 70) - SET(LAMBDA_DEFAULT ON) +IF (Trilinos_ENABLE_Kokkos AND TPL_ENABLE_CUDA) + SET(CUDA_LAMBDA_DEFAULT ON) +ELSEIF (KOKKOS_ENABLE_CUDA AND (KOKKOS_CXX_COMPILER_ID STREQUAL Clang)) + SET(CUDA_LAMBDA_DEFAULT ON) ELSE() - SET(LAMBDA_DEFAULT OFF) + SET(CUDA_LAMBDA_DEFAULT OFF) ENDIF() -KOKKOS_ENABLE_OPTION(CUDA_LAMBDA ${LAMBDA_DEFAULT} "Whether to activate experimental lambda features") +KOKKOS_ENABLE_OPTION(CUDA_LAMBDA ${CUDA_LAMBDA_DEFAULT} "Whether to activate experimental lambda features") IF (Trilinos_ENABLE_Kokkos) SET(COMPLEX_ALIGN_DEFAULT OFF) ELSE() @@ -64,7 +69,13 @@ ELSE() ENDIF() KOKKOS_ENABLE_OPTION(COMPLEX_ALIGN ${COMPLEX_ALIGN_DEFAULT} "Whether to align Kokkos::complex to 2*alignof(RealType)") -KOKKOS_ENABLE_OPTION(CUDA_CONSTEXPR OFF "Whether to activate experimental relaxed constexpr functions") + +IF (KOKKOS_ENABLE_CUDA AND (KOKKOS_CXX_COMPILER_ID STREQUAL Clang)) + SET(CUDA_CONSTEXPR_DEFAULT ON) +ELSE() + SET(CUDA_CONSTEXPR_DEFAULT OFF) +ENDIF() +KOKKOS_ENABLE_OPTION(CUDA_CONSTEXPR ${CUDA_CONSTEXPR_DEFAULT} "Whether to activate experimental relaxed constexpr functions") FUNCTION(check_device_specific_options) CMAKE_PARSE_ARGUMENTS(SOME "" "DEVICE" "OPTIONS" ${ARGN}) @@ -84,9 +95,18 @@ FUNCTION(check_device_specific_options) ENDFUNCTION() CHECK_DEVICE_SPECIFIC_OPTIONS(DEVICE CUDA OPTIONS CUDA_UVM CUDA_RELOCATABLE_DEVICE_CODE CUDA_LAMBDA CUDA_CONSTEXPR CUDA_LDG_INTRINSIC) +CHECK_DEVICE_SPECIFIC_OPTIONS(DEVICE HIP OPTIONS HIP_RELOCATABLE_DEVICE_CODE) CHECK_DEVICE_SPECIFIC_OPTIONS(DEVICE HPX OPTIONS HPX_ASYNC_DISPATCH) # Needed due to change from deprecated name to new header define name IF (KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION) SET(KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION ON) ENDIF() + +# This is known to occur with Clang 9. We would need to use nvcc as the linker +# http://lists.llvm.org/pipermail/cfe-dev/2018-June/058296.html +# TODO: Through great effort we can use a different linker by hacking +# CMAKE_CXX_LINK_EXECUTABLE in a future release +IF (KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE AND KOKKOS_CXX_COMPILER_ID STREQUAL Clang) + MESSAGE(FATAL_ERROR "Relocatable device code is currently not supported with Clang - must use nvcc_wrapper or turn off RDC") +ENDIF() diff --git a/cmake/kokkos_functions.cmake b/cmake/kokkos_functions.cmake index 3644c48ddde..fd04966bafa 100644 --- a/cmake/kokkos_functions.cmake +++ b/cmake/kokkos_functions.cmake @@ -3,9 +3,9 @@ # kokkos_option # Validate options are given with correct case and define an internal -# upper-case version for use within +# upper-case version for use within -# +# # # @FUNCTION: kokkos_deprecated_list # @@ -62,7 +62,7 @@ FUNCTION(kokkos_option CAMEL_SUFFIX DEFAULT TYPE DOCSTRING) UNSET(${opt} CACHE) ELSE() MESSAGE(FATAL_ERROR "Matching option found for ${CAMEL_NAME} with the wrong case ${opt}. Please delete your CMakeCache.txt and change option to -D${CAMEL_NAME}=${${opt}}. This is now enforced to avoid hard-to-debug CMake cache inconsistencies.") - ENDIF() + ENDIF() ENDIF() ENDIF() ENDFOREACH() @@ -125,7 +125,7 @@ MACRO(kokkos_export_imported_tpl NAME) KOKKOS_APPEND_CONFIG_LINE("IF(NOT TARGET ${NAME})") KOKKOS_APPEND_CONFIG_LINE("ADD_LIBRARY(${NAME} UNKNOWN IMPORTED)") KOKKOS_APPEND_CONFIG_LINE("SET_TARGET_PROPERTIES(${NAME} PROPERTIES") - + GET_TARGET_PROPERTY(TPL_LIBRARY ${NAME} IMPORTED_LOCATION) IF(TPL_LIBRARY) KOKKOS_APPEND_CONFIG_LINE("IMPORTED_LOCATION ${TPL_LIBRARY}") @@ -198,7 +198,7 @@ MACRO(kokkos_import_tpl NAME) # I have still been getting errors about ROOT variables being ignored # I'm not sure if this is a scope issue - but make sure # the policy is set before we do any find_package calls - IF(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.12.0") + IF(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.12.0") CMAKE_POLICY(SET CMP0074 NEW) ENDIF() @@ -341,11 +341,12 @@ ENDMACRO() # default, custom paths are prioritized over system paths. The searched # order is: # 1. _ROOT variable -# 2. Kokkos__DIR variable -# 3. Locations in the PATHS option -# 4. Default system paths, if allowed. +# 2. _ROOT environment variable +# 3. Kokkos__DIR variable +# 4. Locations in the PATHS option +# 5. Default system paths, if allowed. # -# Default system paths are allowed if none of options (1)-(3) are specified +# Default system paths are allowed if none of options (1)-(4) are specified # or if default paths are specifically allowed via ALLOW_SYSTEM_PATH_FALLBACK # # Usage:: @@ -387,33 +388,29 @@ MACRO(kokkos_find_header VAR_NAME HEADER TPL_NAME) "PATHS" ${ARGN}) - SET(${HEADER}_FOUND FALSE) + SET(${VAR_NAME} "${VARNAME}-NOTFOUND") SET(HAVE_CUSTOM_PATHS FALSE) - IF(NOT ${HEADER}_FOUND AND DEFINED ${TPL_NAME}_ROOT) - #ONLY look in the root directory - FIND_PATH(${VAR_NAME} ${HEADER} PATHS ${${TPL_NAME}_ROOT}/include NO_DEFAULT_PATH) - SET(HAVE_CUSTOM_PATHS TRUE) - ENDIF() - IF(NOT ${HEADER}_FOUND AND DEFINED KOKKOS_${TPL_NAME}_DIR) - #ONLY look in the root directory - FIND_PATH(${VAR_NAME} ${HEADER} PATHS ${KOKKOS_${TPL_NAME}_DIR}/include NO_DEFAULT_PATH) + IF(DEFINED ${TPL_NAME}_ROOT OR + DEFINED ENV{${TPL_NAME}_ROOT} OR + DEFINED KOKKOS_${TPL_NAME}_DIR OR + TPL_PATHS) + FIND_PATH(${VAR_NAME} ${HEADER} + PATHS + ${${TPL_NAME}_ROOT} + $ENV{${TPL_NAME}_ROOT} + ${KOKKOS_${TPL_NAME}_DIR} + ${TPL_PATHS} + PATH_SUFFIXES include + NO_DEFAULT_PATH) SET(HAVE_CUSTOM_PATHS TRUE) ENDIF() - IF (NOT ${HEADER}_FOUND AND TPL_PATHS) - #we got custom paths - #ONLY look in these paths and nowhere else - FIND_PATH(${VAR_NAME} ${HEADER} PATHS ${TPL_PATHS} NO_DEFAULT_PATH) - SET(HAVE_CUSTOM_PATHS TRUE) + IF(NOT HAVE_CUSTOM_PATHS OR TPL_ALLOW_SYSTEM_PATH_FALLBACK) + #No-op if ${VAR_NAME} set by previous call + FIND_PATH(${VAR_NAME} ${HEADER}) ENDIF() - IF (NOT HAVE_CUSTOM_PATHS OR TPL_ALLOW_SYSTEM_PATH_FALLBACK) - #Now go ahead and look in system paths - IF (NOT ${HEADER}_FOUND) - FIND_PATH(${VAR_NAME} ${HEADER}) - ENDIF() - ENDIF() ENDMACRO() # @@ -424,9 +421,10 @@ ENDMACRO() # default, custom paths are prioritized over system paths. The search # order is: # 1. _ROOT variable -# 2. Kokkos__DIR variable -# 3. Locations in the PATHS option -# 4. Default system paths, if allowed. +# 2. _ROOT environment variable +# 3. Kokkos__DIR variable +# 4. Locations in the PATHS option +# 5. Default system paths, if allowed. # # Default system paths are allowed if none of options (1)-(3) are specified # or if default paths are specifically allowed via ALLOW_SYSTEM_PATH_FALLBACK @@ -439,6 +437,7 @@ ENDMACRO() # # [ALLOW_SYSTEM_PATH_FALLBACK] # [PATHS path1 [path2 ...]] +# [SUFFIXES suffix1 [suffix2 ...]] # ) # # ```` @@ -463,39 +462,46 @@ ENDMACRO() # # Custom paths to search for the library # +# ``SUFFIXES`` +# +# Suffixes appended to PATHS when attempting to locate +# the library. Defaults to {lib, lib64}. +# MACRO(kokkos_find_library VAR_NAME LIB TPL_NAME) CMAKE_PARSE_ARGUMENTS(TPL "ALLOW_SYSTEM_PATH_FALLBACK" "" - "PATHS" + "PATHS;SUFFIXES" ${ARGN}) - SET(${LIB}_FOUND FALSE) - SET(HAVE_CUSTOM_PATHS FALSE) - IF(NOT ${LIB}_FOUND AND DEFINED ${TPL_NAME}_ROOT) - FIND_LIBRARY(${VAR_NAME} ${LIB} PATHS ${${TPL_NAME}_ROOT}/lib ${${TPL_NAME}_ROOT}/lib64 NO_DEFAULT_PATH) - SET(HAVE_CUSTOM_PATHS TRUE) + IF(NOT TPL_SUFFIXES) + SET(TPL_SUFFIXES lib lib64) ENDIF() - IF(NOT ${LIB}_FOUND AND DEFINED KOKKOS_${TPL_NAME}_DIR) - #we got root paths, only look in these paths and nowhere else - FIND_LIBRARY(${VAR_NAME} ${LIB} PATHS ${KOKKOS_${TPL_NAME}_DIR}/lib ${KOKKOS_${TPL_NAME}_DIR}/lib64 NO_DEFAULT_PATH) - SET(HAVE_CUSTOM_PATHS TRUE) - ENDIF() + SET(${VAR_NAME} "${VARNAME}-NOTFOUND") + SET(HAVE_CUSTOM_PATHS FALSE) - IF (NOT ${LIB}_FOUND AND TPL_PATHS) - #we got custom paths, only look in these paths and nowhere else - FIND_LIBRARY(${VAR_NAME} ${LIB} PATHS ${TPL_PATHS} NO_DEFAULT_PATH) + IF(DEFINED ${TPL_NAME}_ROOT OR + DEFINED ENV{${TPL_NAME}_ROOT} OR + DEFINED KOKKOS_${TPL_NAME}_DIR OR + TPL_PATHS) + FIND_LIBRARY(${VAR_NAME} ${LIB} + PATHS + ${${TPL_NAME}_ROOT} + $ENV{${TPL_NAME}_ROOT} + ${KOKKOS_${TPL_NAME}_DIR} + ${TPL_PATHS} + PATH_SUFFIXES + ${TPL_SUFFIXES} + NO_DEFAULT_PATH) SET(HAVE_CUSTOM_PATHS TRUE) ENDIF() - - IF (NOT HAVE_CUSTOM_PATHS OR TPL_ALLOW_SYSTEM_PATH_FALLBACK) - IF (NOT ${LIB}_FOUND) - #Now go ahead and look in system paths - FIND_LIBRARY(${VAR_NAME} ${LIB}) - ENDIF() + IF(NOT HAVE_CUSTOM_PATHS OR TPL_ALLOW_SYSTEM_PATH_FALLBACK) + #No-op if ${VAR_NAME} set by previous call + FIND_LIBRARY(${VAR_NAME} ${LIB} PATH_SUFFIXES ${TPL_SUFFIXES}) ENDIF() + ENDMACRO() # @@ -510,26 +516,28 @@ ENDMACRO() # # INTERFACE # ALLOW_SYSTEM_PATH_FALLBACK -# LIBRARY -# LINK_LIBRARIES ... -# COMPILE_OPTIONS ... -# LINK_OPTIONS ... +# MODULE_NAME +# IMPORTED_NAME +# LIBRARY +# LIBRARIES ... +# LIBRARY_PATHS ... +# LIBRARY_SUFFIXES ... +# HEADER +# HEADERS ... +# HEADER_PATHS ... +# ) # # ``INTERFACE`` # # If specified, this TPL will build an INTERFACE library rather than an # IMPORTED target # -# ``ALLOW_SYSTEM_PATH_FALLBACK" +# ``ALLOW_SYSTEM_PATH_FALLBACK`` # # If custom paths are given and the library is not found # should we be allowed to search default system paths # or error out if not found in given paths. # -# ``LIBRARY `` -# -# If specified, this gives the name of the library to look for -# # ``MODULE_NAME `` # # If specified, the name of the enclosing module passed to @@ -541,29 +549,42 @@ ENDMACRO() # If specified, this gives the name of the target to build. # Defaults to Kokkos:: # +# ``LIBRARY `` +# +# If specified, this gives the name of the library to look for +# +# ``LIBRARIES ...`` +# +# If specified, this gives a list of libraries to find for the package +# # ``LIBRARY_PATHS ...`` # -# If specified, this gives a list of paths to search for the library -# If not given, _ROOT/lib and _ROOT/lib64 will be searched. +# If specified, this gives a list of paths to search for the library. +# If not given, _ROOT will be searched. # -# ``HEADER_PATHS ...`` +# ``LIBRARY_SUFFIXES ...`` # -# If specified, this gives a list of paths to search for the headers -# If not given, _ROOT/include and _ROOT/include will be searched. +# Suffixes appended to LIBRARY_PATHS when attempting to locate +# libraries. If not given, defaults to {lib, lib64}. +# +# ``HEADER `` +# +# If specified, this gives the name of a header to to look for # # ``HEADERS ...`` # # If specified, this gives a list of headers to find for the package # -# ``LIBRARIES ...`` +# ``HEADER_PATHS ...`` # -# If specified, this gives a list of libraries to find for the package +# If specified, this gives a list of paths to search for the headers +# If not given, _ROOT/include and _ROOT/include will be searched. # MACRO(kokkos_find_imported NAME) CMAKE_PARSE_ARGUMENTS(TPL "INTERFACE;ALLOW_SYSTEM_PATH_FALLBACK" - "HEADER;LIBRARY;IMPORTED_NAME;MODULE_NAME" - "HEADER_PATHS;LIBRARY_PATHS;HEADERS;LIBRARIES" + "IMPORTED_NAME;MODULE_NAME;LIBRARY;HEADER" + "LIBRARIES;LIBRARY_PATHS;LIBRARY_SUFFIXES;HEADERS;HEADER_PATHS" ${ARGN}) IF(NOT TPL_MODULE_NAME) @@ -584,6 +605,10 @@ MACRO(kokkos_find_imported NAME) ENDIF() ENDIF() + IF (NOT TPL_LIBRARY_SUFFIXES) + SET(TPL_LIBRARY_SUFFIXES lib lib64) + ENDIF() + SET(${NAME}_INCLUDE_DIRS) IF (TPL_HEADER) KOKKOS_FIND_HEADER(${NAME}_INCLUDE_DIRS ${TPL_HEADER} ${NAME} ${ALLOW_PATH_FALLBACK_OPT} PATHS ${TPL_HEADER_PATHS}) @@ -598,16 +623,22 @@ MACRO(kokkos_find_imported NAME) SET(${NAME}_LIBRARY) IF(TPL_LIBRARY) - KOKKOS_FIND_LIBRARY(${NAME}_LIBRARY ${TPL_LIBRARY} ${NAME} ${ALLOW_PATH_FALLBACK_OPT} PATHS ${TPL_LIBRARY_PATHS}) + KOKKOS_FIND_LIBRARY(${NAME}_LIBRARY ${TPL_LIBRARY} ${NAME} + ${ALLOW_PATH_FALLBACK_OPT} + PATHS ${TPL_LIBRARY_PATHS} + SUFFIXES ${TPL_LIBRARY_SUFFIXES}) ENDIF() SET(${NAME}_FOUND_LIBRARIES) FOREACH(LIB ${TPL_LIBRARIES}) - KOKKOS_FIND_LIBRARY(${LIB}_LOCATION ${LIB} ${NAME} ${ALLOW_PATH_FALLBACK_OPT} PATHS ${TPL_LIBRARY_PATHS}) + KOKKOS_FIND_LIBRARY(${LIB}_LOCATION ${LIB} ${NAME} + ${ALLOW_PATH_FALLBACK_OPT} + PATHS ${TPL_LIBRARY_PATHS} + SUFFIXES ${TPL_LIBRARY_SUFFIXES}) IF(${LIB}_LOCATION) LIST(APPEND ${NAME}_FOUND_LIBRARIES ${${LIB}_LOCATION}) ELSE() - SET(${NAME}_FOUND_LIBRARIES ${${LIB}_LOCATION}) + SET(${NAME}_FOUND_LIBRARIES ${${LIB}_LOCATION}) BREAK() ENDIF() ENDFOREACH() @@ -629,6 +660,13 @@ MACRO(kokkos_find_imported NAME) MARK_AS_ADVANCED(${NAME}_INCLUDE_DIRS ${NAME}_FOUND_LIBRARIES ${NAME}_LIBRARY) + #this is so much fun on a Cray system + #/usr/include should never be added as a -isystem include + #this freaks out the compiler include search order + IF (KOKKOS_IS_CRAYPE) + LIST(REMOVE_ITEM ${NAME}_INCLUDE_DIRS "/usr/include") + ENDIF() + IF (${TPL_MODULE_NAME}_FOUND) SET(IMPORT_TYPE) IF (TPL_INTERFACE) @@ -698,3 +736,66 @@ FUNCTION(kokkos_link_tpl TARGET) ENDIF() ENDFUNCTION() +FUNCTION(COMPILER_SPECIFIC_OPTIONS_HELPER) + SET(COMPILERS NVIDIA PGI XL DEFAULT Cray Intel Clang AppleClang GNU) + CMAKE_PARSE_ARGUMENTS( + PARSE + "LINK_OPTIONS;COMPILE_OPTIONS;COMPILE_DEFINITIONS;LINK_LIBRARIES" + "" + "${COMPILERS}" + ${ARGN}) + IF(PARSE_UNPARSED_ARGUMENTS) + MESSAGE(SEND_ERROR "'${PARSE_UNPARSED_ARGUMENTS}' argument(s) not recognized when providing compiler specific options") + ENDIF() + + SET(COMPILER ${KOKKOS_CXX_COMPILER_ID}) + + SET(COMPILER_SPECIFIC_FLAGS_TMP) + FOREACH(COMP ${COMPILERS}) + IF (COMPILER STREQUAL "${COMP}") + IF (PARSE_${COMPILER}) + IF (NOT "${PARSE_${COMPILER}}" STREQUAL "NO-VALUE-SPECIFIED") + SET(COMPILER_SPECIFIC_FLAGS_TMP ${PARSE_${COMPILER}}) + ENDIF() + ELSEIF(PARSE_DEFAULT) + SET(COMPILER_SPECIFIC_FLAGS_TMP ${PARSE_DEFAULT}) + ENDIF() + ENDIF() + ENDFOREACH() + + IF (PARSE_COMPILE_OPTIONS) + # The funky logic here is for future handling of argument deduplication + # If we naively pass multiple -Xcompiler flags to target_compile_options + # -Xcompiler will get deduplicated and break the build + IF ("-Xcompiler" IN_LIST COMPILER_SPECIFIC_FLAGS_TMP) + LIST(REMOVE_ITEM COMPILER_SPECIFIC_FLAGS_TMP "-Xcompiler") + GLOBAL_APPEND(KOKKOS_XCOMPILER_OPTIONS ${COMPILER_SPECIFIC_FLAGS_TMP}) + ELSE() + GLOBAL_APPEND(KOKKOS_COMPILE_OPTIONS ${COMPILER_SPECIFIC_FLAGS_TMP}) + ENDIF() + ENDIF() + + IF (PARSE_LINK_OPTIONS) + GLOBAL_APPEND(KOKKOS_LINK_OPTIONS ${COMPILER_SPECIFIC_FLAGS_TMP}) + ENDIF() + + IF (PARSE_COMPILE_DEFINITIONS) + GLOBAL_APPEND(KOKKOS_COMPILE_DEFINITIONS ${COMPILER_SPECIFIC_FLAGS_TMP}) + ENDIF() + + IF (PARSE_LINK_LIBRARIES) + GLOBAL_APPEND(KOKKOS_LINK_LIBRARIES ${COMPILER_SPECIFIC_FLAGS_TMP}) + ENDIF() +ENDFUNCTION(COMPILER_SPECIFIC_OPTIONS_HELPER) + +FUNCTION(COMPILER_SPECIFIC_FLAGS) + COMPILER_SPECIFIC_OPTIONS_HELPER(${ARGN} COMPILE_OPTIONS LINK_OPTIONS) +ENDFUNCTION(COMPILER_SPECIFIC_FLAGS) + +FUNCTION(COMPILER_SPECIFIC_DEFS) + COMPILER_SPECIFIC_OPTIONS_HELPER(${ARGN} COMPILE_DEFINITIONS) +ENDFUNCTION(COMPILER_SPECIFIC_DEFS) + +FUNCTION(COMPILER_SPECIFIC_LIBS) + COMPILER_SPECIFIC_OPTIONS_HELPER(${ARGN} LINK_LIBRARIES) +ENDFUNCTION(COMPILER_SPECIFIC_LIBS) diff --git a/cmake/kokkos_install.cmake b/cmake/kokkos_install.cmake index 1e4a5a2aad7..6a39590f036 100644 --- a/cmake/kokkos_install.cmake +++ b/cmake/kokkos_install.cmake @@ -31,12 +31,11 @@ IF (NOT KOKKOS_HAS_TRILINOS) ELSE() CONFIGURE_FILE(cmake/KokkosConfigCommon.cmake.in ${Kokkos_BINARY_DIR}/KokkosConfigCommon.cmake @ONLY) file(READ ${Kokkos_BINARY_DIR}/KokkosConfigCommon.cmake KOKKOS_CONFIG_COMMON) - file(APPEND "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/KokkosConfig_install.cmake" ${KOKKOS_CONFIG_COMMON}) + file(APPEND "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/KokkosConfig_install.cmake" "${KOKKOS_CONFIG_COMMON}") + CONFIGURE_FILE(cmake/KokkosTrilinosConfig.cmake.in ${Kokkos_BINARY_DIR}/KokkosTrilinosConfig.cmake @ONLY) + file(READ ${Kokkos_BINARY_DIR}/KokkosTrilinosConfig.cmake KOKKOS_TRILINOS_CONFIG) + file(APPEND "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/KokkosConfig_install.cmake" "${KOKKOS_TRILINOS_CONFIG}") ENDIF() -# build and install pkgconfig file -CONFIGURE_FILE(core/src/kokkos.pc.in kokkos.pc @ONLY) -INSTALL(FILES ${CMAKE_CURRENT_BINARY_DIR}/kokkos.pc DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig) - INSTALL(FILES ${CMAKE_CURRENT_BINARY_DIR}/KokkosCore_config.h DESTINATION ${KOKKOS_HEADER_DIR}) diff --git a/cmake/kokkos_test_cxx_std.cmake b/cmake/kokkos_test_cxx_std.cmake index c264517abe1..603b4b3d7ae 100644 --- a/cmake/kokkos_test_cxx_std.cmake +++ b/cmake/kokkos_test_cxx_std.cmake @@ -14,12 +14,12 @@ FUNCTION(kokkos_set_cxx_standard_feature standard) ENDIF() ELSEIF(CMAKE_CXX_EXTENSIONS) IF(KOKKOS_DONT_ALLOW_EXTENSIONS) - MESSAGE(FATAL_ERROR "The chosen configuration does not support CXX extensions flags: ${KOKKOS_DONT_ALLOW_EXTENSIONS}. Must set CMAKE_CXX_EXTENSIONS=OFF to continue") + MESSAGE(FATAL_ERROR "The chosen configuration does not support CXX extensions flags: ${KOKKOS_DONT_ALLOW_EXTENSIONS}. Must set CMAKE_CXX_EXTENSIONS=OFF to continue") ELSE() GLOBAL_SET(KOKKOS_USE_CXX_EXTENSIONS ON) ENDIF() ELSE() - #For trilinos, we need to make sure downstream projects + #For trilinos, we need to make sure downstream projects GLOBAL_SET(KOKKOS_USE_CXX_EXTENSIONS OFF) ENDIF() @@ -29,6 +29,10 @@ FUNCTION(kokkos_set_cxx_standard_feature standard) ELSEIF(NOT KOKKOS_USE_CXX_EXTENSIONS AND ${STANDARD_NAME}) MESSAGE(STATUS "Using ${${STANDARD_NAME}} for C++${standard} standard as feature") GLOBAL_SET(KOKKOS_CXX_STANDARD_FEATURE ${FEATURE_NAME}) + ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL "MSVC") + #MSVC doesn't need a command line flag, that doesn't mean it has no support + MESSAGE(STATUS "Using no flag for C++${standard} standard as feature") + GLOBAL_SET(KOKKOS_CXX_STANDARD_FEATURE ${FEATURE_NAME}) ELSE() #nope, we can't do anything here MESSAGE(WARNING "C++${standard} is not supported as a compiler feature. We will choose custom flags for now, but this behavior has been deprecated. Please open an issue at https://github.com/kokkos/kokkos/issues reporting that ${KOKKOS_CXX_COMPILER_ID} ${KOKKOS_CXX_COMPILER_VERSION} failed for ${KOKKOS_CXX_STANDARD}, preferrably including your CMake command.") @@ -119,6 +123,9 @@ IF (NOT KOKKOS_CXX_STANDARD_FEATURE) ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL Intel) INCLUDE(${KOKKOS_SRC_PATH}/cmake/intel.cmake) kokkos_set_intel_flags(${KOKKOS_CXX_STANDARD} ${KOKKOS_CXX_INTERMEDIATE_STANDARD}) + ELSEIF(KOKKOS_CXX_COMPILER_ID STREQUAL "MSVC") + INCLUDE(${KOKKOS_SRC_PATH}/cmake/msvc.cmake) + kokkos_set_msvc_flags(${KOKKOS_CXX_STANDARD} ${KOKKOS_CXX_INTERMEDIATE_STANDARD}) ELSE() INCLUDE(${KOKKOS_SRC_PATH}/cmake/gnu.cmake) kokkos_set_gnu_flags(${KOKKOS_CXX_STANDARD} ${KOKKOS_CXX_INTERMEDIATE_STANDARD}) @@ -128,9 +135,9 @@ IF (NOT KOKKOS_CXX_STANDARD_FEATURE) IF (DEFINED CXX_STD_FLAGS_ACCEPTED) UNSET(CXX_STD_FLAGS_ACCEPTED CACHE) ENDIF() - CHECK_CXX_COMPILER_FLAG(${KOKKOS_CXX_STANDARD_FLAG} CXX_STD_FLAGS_ACCEPTED) + CHECK_CXX_COMPILER_FLAG("${KOKKOS_CXX_STANDARD_FLAG}" CXX_STD_FLAGS_ACCEPTED) IF (NOT CXX_STD_FLAGS_ACCEPTED) - CHECK_CXX_COMPILER_FLAG(${KOKKOS_CXX_INTERMEDIATE_STANDARD_FLAG} CXX_INT_STD_FLAGS_ACCEPTED) + CHECK_CXX_COMPILER_FLAG("${KOKKOS_CXX_INTERMEDIATE_STANDARD_FLAG}" CXX_INT_STD_FLAGS_ACCEPTED) IF (NOT CXX_INT_STD_FLAGS_ACCEPTED) MESSAGE(FATAL_ERROR "${KOKKOS_CXX_COMPILER_ID} did not accept ${KOKKOS_CXX_STANDARD_FLAG} or ${KOKKOS_CXX_INTERMEDIATE_STANDARD_FLAG}. You likely need to reduce the level of the C++ standard from ${KOKKOS_CXX_STANDARD}") ENDIF() diff --git a/cmake/kokkos_tpls.cmake b/cmake/kokkos_tpls.cmake index 181a497d52a..76efd428479 100644 --- a/cmake/kokkos_tpls.cmake +++ b/cmake/kokkos_tpls.cmake @@ -15,6 +15,10 @@ KOKKOS_TPL_OPTION(CUDA Off) KOKKOS_TPL_OPTION(LIBRT Off) KOKKOS_TPL_OPTION(LIBDL On) +IF(KOKKOS_ENABLE_PROFILING AND NOT KOKKOS_ENABLE_LIBDL) + MESSAGE(SEND_ERROR "Kokkos_ENABLE_PROFILING requires Kokkos_ENABLE_LIBDL=ON") +ENDIF() + IF(Trilinos_ENABLE_Kokkos AND TPL_ENABLE_HPX) SET(HPX_DEFAULT ON) ELSE() diff --git a/cmake/kokkos_tribits.cmake b/cmake/kokkos_tribits.cmake index d2317d2446f..1c3b704ada8 100644 --- a/cmake/kokkos_tribits.cmake +++ b/cmake/kokkos_tribits.cmake @@ -43,6 +43,8 @@ MACRO(KOKKOS_SUBPACKAGE NAME) SET(PACKAGE_NAME ${PACKAGE_NAME}${NAME}) STRING(TOUPPER ${PACKAGE_NAME} PACKAGE_NAME_UC) SET(${PACKAGE_NAME}_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) + #ADD_INTERFACE_LIBRARY(PACKAGE_${PACKAGE_NAME}) + #GLOBAL_SET(${PACKAGE_NAME}_LIBS "") endif() ENDMACRO() @@ -114,57 +116,63 @@ MACRO(KOKKOS_INTERNAL_ADD_LIBRARY_INSTALL LIBRARY_NAME) VERIFY_EMPTY(KOKKOS_ADD_LIBRARY ${PARSE_UNPARSED_ARGUMENTS}) ENDMACRO() -FUNCTION(KOKKOS_ADD_EXECUTABLE EXE_NAME) +FUNCTION(KOKKOS_ADD_EXECUTABLE ROOT_NAME) if (KOKKOS_HAS_TRILINOS) - TRIBITS_ADD_EXECUTABLE(${EXE_NAME} ${ARGN}) + TRIBITS_ADD_EXECUTABLE(${ROOT_NAME} ${ARGN}) else() - CMAKE_PARSE_ARGUMENTS(PARSE + CMAKE_PARSE_ARGUMENTS(PARSE "TESTONLY" "" "SOURCES;TESTONLYLIBS" ${ARGN}) + SET(EXE_NAME ${PACKAGE_NAME}_${ROOT_NAME}) ADD_EXECUTABLE(${EXE_NAME} ${PARSE_SOURCES}) IF (PARSE_TESTONLYLIBS) - TARGET_LINK_LIBRARIES(${EXE_NAME} ${PARSE_TESTONLYLIBS}) + TARGET_LINK_LIBRARIES(${EXE_NAME} PRIVATE ${PARSE_TESTONLYLIBS}) ENDIF() VERIFY_EMPTY(KOKKOS_ADD_EXECUTABLE ${PARSE_UNPARSED_ARGUMENTS}) + #All executables must link to all the kokkos targets + #This is just private linkage because exe is final + TARGET_LINK_LIBRARIES(${EXE_NAME} PRIVATE kokkos) endif() ENDFUNCTION() -IF(NOT TARGET check) - ADD_CUSTOM_TARGET(check COMMAND ${CMAKE_CTEST_COMMAND} -VV -C ${CMAKE_CFG_INTDIR}) -ENDIF() - - FUNCTION(KOKKOS_ADD_EXECUTABLE_AND_TEST ROOT_NAME) IF (KOKKOS_HAS_TRILINOS) TRIBITS_ADD_EXECUTABLE_AND_TEST( - ${ROOT_NAME} - TESTONLYLIBS kokkos_gtest + ${ROOT_NAME} + TESTONLYLIBS kokkos_gtest ${ARGN} NUM_MPI_PROCS 1 COMM serial mpi FAIL_REGULAR_EXPRESSION " FAILED " ) ELSE() - CMAKE_PARSE_ARGUMENTS(PARSE + CMAKE_PARSE_ARGUMENTS(PARSE "" "" "SOURCES;CATEGORIES" ${ARGN}) VERIFY_EMPTY(KOKKOS_ADD_EXECUTABLE_AND_TEST ${PARSE_UNPARSED_ARGUMENTS}) - SET(EXE_NAME ${PACKAGE_NAME}_${ROOT_NAME}) - KOKKOS_ADD_TEST_EXECUTABLE(${EXE_NAME} + KOKKOS_ADD_TEST_EXECUTABLE(${ROOT_NAME} SOURCES ${PARSE_SOURCES} ) - KOKKOS_ADD_TEST(NAME ${ROOT_NAME} - EXE ${EXE_NAME} + KOKKOS_ADD_TEST(NAME ${ROOT_NAME} + EXE ${ROOT_NAME} FAIL_REGULAR_EXPRESSION " FAILED " ) ENDIF() ENDFUNCTION() +FUNCTION(KOKKOS_SET_EXE_PROPERTY ROOT_NAME) + SET(TARGET_NAME ${PACKAGE_NAME}_${ROOT_NAME}) + IF (NOT TARGET ${TARGET_NAME}) + MESSAGE(SEND_ERROR "No target ${TARGET_NAME} exists - cannot set target properties") + ENDIF() + SET_PROPERTY(TARGET ${TARGET_PROPERTY} PROPERTY ${ARGN}) +ENDFUNCTION() + MACRO(KOKKOS_SETUP_BUILD_ENVIRONMENT) INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_compiler_id.cmake) INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_enable_devices.cmake) @@ -178,20 +186,17 @@ MACRO(KOKKOS_SETUP_BUILD_ENVIRONMENT) INCLUDE(${KOKKOS_SRC_PATH}/cmake/kokkos_corner_cases.cmake) ENDMACRO() -MACRO(KOKKOS_ADD_TEST_EXECUTABLE EXE_NAME) - CMAKE_PARSE_ARGUMENTS(PARSE +MACRO(KOKKOS_ADD_TEST_EXECUTABLE ROOT_NAME) + CMAKE_PARSE_ARGUMENTS(PARSE "" "" "SOURCES" ${ARGN}) - KOKKOS_ADD_EXECUTABLE(${EXE_NAME} + KOKKOS_ADD_EXECUTABLE(${ROOT_NAME} SOURCES ${PARSE_SOURCES} ${PARSE_UNPARSED_ARGUMENTS} TESTONLYLIBS kokkos_gtest ) - IF (NOT KOKKOS_HAS_TRILINOS) - ADD_DEPENDENCIES(check ${EXE_NAME}) - ENDIF() ENDMACRO() MACRO(KOKKOS_PACKAGE_POSTPROCESS) @@ -201,7 +206,7 @@ MACRO(KOKKOS_PACKAGE_POSTPROCESS) ENDMACRO() FUNCTION(KOKKOS_SET_LIBRARY_PROPERTIES LIBRARY_NAME) - CMAKE_PARSE_ARGUMENTS(PARSE + CMAKE_PARSE_ARGUMENTS(PARSE "PLAIN_STYLE" "" "" @@ -230,6 +235,15 @@ FUNCTION(KOKKOS_SET_LIBRARY_PROPERTIES LIBRARY_NAME) $<$:${KOKKOS_COMPILE_OPTIONS}> ) + TARGET_COMPILE_DEFINITIONS( + ${LIBRARY_NAME} PUBLIC + $<$:${KOKKOS_COMPILE_DEFINITIONS}> + ) + + TARGET_LINK_LIBRARIES( + ${LIBRARY_NAME} PUBLIC ${KOKKOS_LINK_LIBRARIES} + ) + IF (KOKKOS_ENABLE_CUDA) TARGET_COMPILE_OPTIONS( ${LIBRARY_NAME} @@ -240,11 +254,18 @@ FUNCTION(KOKKOS_SET_LIBRARY_PROPERTIES LIBRARY_NAME) LIST(APPEND NODEDUP_CUDAFE_OPTIONS -Xcudafe ${OPT}) ENDFOREACH() TARGET_COMPILE_OPTIONS( - ${LIBRARY_NAME} + ${LIBRARY_NAME} PUBLIC $<$:${NODEDUP_CUDAFE_OPTIONS}> ) ENDIF() + IF (KOKKOS_ENABLE_HIP) + TARGET_COMPILE_OPTIONS( + ${LIBRARY_NAME} + PUBLIC $<$:${KOKKOS_AMDGPU_OPTIONS}> + ) + ENDIF() + LIST(LENGTH KOKKOS_XCOMPILER_OPTIONS XOPT_LENGTH) IF (XOPT_LENGTH GREATER 1) MESSAGE(FATAL_ERROR "CMake deduplication does not allow multiple -Xcompiler flags (${KOKKOS_XCOMPILER_OPTIONS}): will require Kokkos to upgrade to minimum 3.12") @@ -253,12 +274,12 @@ FUNCTION(KOKKOS_SET_LIBRARY_PROPERTIES LIBRARY_NAME) SET(NODEDUP_XCOMPILER_OPTIONS) FOREACH(OPT ${KOKKOS_XCOMPILER_OPTIONS}) #I have to do this for now because we can't guarantee 3.12 support - #I really should do this with the shell option - LIST(APPEND NODEDUP_XCOMPILER_OPTIONS -Xcompiler) - LIST(APPEND NODEDUP_XCOMPILER_OPTIONS ${OPT}) + #I really should do this with the shell option + LIST(APPEND NODEDUP_XCOMPILER_OPTIONS -Xcompiler) + LIST(APPEND NODEDUP_XCOMPILER_OPTIONS ${OPT}) ENDFOREACH() TARGET_COMPILE_OPTIONS( - ${LIBRARY_NAME} + ${LIBRARY_NAME} PUBLIC $<$:${NODEDUP_XCOMPILER_OPTIONS}> ) ENDIF() @@ -276,7 +297,7 @@ FUNCTION(KOKKOS_SET_LIBRARY_PROPERTIES LIBRARY_NAME) ENDFUNCTION() FUNCTION(KOKKOS_INTERNAL_ADD_LIBRARY LIBRARY_NAME) - CMAKE_PARSE_ARGUMENTS(PARSE + CMAKE_PARSE_ARGUMENTS(PARSE "STATIC;SHARED" "" "HEADERS;SOURCES" @@ -362,7 +383,7 @@ FUNCTION(KOKKOS_LIB_INCLUDE_DIRECTORIES TARGET) #ignore the target, tribits doesn't do anything directly with targets TRIBITS_INCLUDE_DIRECTORIES(${ARGN}) ELSE() #append to a list for later - KOKKOS_LIB_TYPE(${TARGET} INCTYPE) + KOKKOS_LIB_TYPE(${TARGET} INCTYPE) FOREACH(DIR ${ARGN}) TARGET_INCLUDE_DIRECTORIES(${TARGET} ${INCTYPE} $) ENDFOREACH() @@ -390,3 +411,15 @@ MACRO(KOKKOS_ADD_TEST_DIRECTORIES) ENDIF() ENDIF() ENDMACRO() + +MACRO(KOKKOS_ADD_EXAMPLE_DIRECTORIES) + if (KOKKOS_HAS_TRILINOS) + TRIBITS_ADD_EXAMPLE_DIRECTORIES(${ARGN}) + else() + IF(KOKKOS_ENABLE_EXAMPLES) + FOREACH(EXAMPLE_DIR ${ARGN}) + ADD_SUBDIRECTORY(${EXAMPLE_DIR}) + ENDFOREACH() + ENDIF() + endif() +ENDMACRO() diff --git a/cmake/msvc.cmake b/cmake/msvc.cmake new file mode 100644 index 00000000000..85421bdbaaa --- /dev/null +++ b/cmake/msvc.cmake @@ -0,0 +1,11 @@ + +FUNCTION(kokkos_set_msvc_flags full_standard int_standard) + IF (CMAKE_CXX_EXTENSIONS) + SET(KOKKOS_CXX_STANDARD_FLAG "" PARENT_SCOPE) + SET(KOKKOS_CXX_INTERMEDIATE_STANDARD_FLAG "" PARENT_SCOPE) + ELSE() + SET(KOKKOS_CXX_STANDARD_FLAG "" PARENT_SCOPE) + SET(KOKKOS_CXX_INTERMEDIATE_STANDARD_FLAG "" PARENT_SCOPE) + ENDIF() +ENDFUNCTION() + diff --git a/cmake/tpls/FindTPLCUSPARSE.cmake b/cmake/tpls/FindTPLCUSPARSE.cmake index b8cee04804c..a59868b73bc 100644 --- a/cmake/tpls/FindTPLCUSPARSE.cmake +++ b/cmake/tpls/FindTPLCUSPARSE.cmake @@ -55,19 +55,9 @@ # Check for CUDA support -IF (NOT TPL_ENABLE_CUDA OR CUDA_VERSION VERSION_LESS "4.1") - MESSAGE(FATAL_ERROR "\nCUSPARSE: did not find acceptable version of CUDA libraries (4.1 or greater)") +IF (NOT TPL_ENABLE_CUDA) + MESSAGE(FATAL_ERROR "\nCUSPARSE requires CUDA") ELSE() - IF(CMAKE_VERSION VERSION_LESS "2.8.8") - # FindCUDA before CMake 2.8.8 does not find cusparse library; therefore, we must - find_library(CUDA_cusparse_LIBRARY - cusparse - HINTS ${CUDA_TOOLKIT_ROOT_DIR}/lib - ) - IF(CUDA_cusparse_LIBRARY STREQUAL "CUDA_cusparse_LIBRARY-NOTFOUND") - MESSAGE(FATAL_ERROR "\nCUSPARSE: could not find cuspasre library.") - ENDIF() - ENDIF() GLOBAL_SET(TPL_CUSPARSE_LIBRARY_DIRS) GLOBAL_SET(TPL_CUSPARSE_INCLUDE_DIRS ${TPL_CUDA_INCLUDE_DIRS}) GLOBAL_SET(TPL_CUSPARSE_LIBRARIES ${CUDA_cusparse_LIBRARY}) diff --git a/config/test_all_sandia b/config/test_all_sandia index d94c38cbc69..193a162a4e6 100755 --- a/config/test_all_sandia +++ b/config/test_all_sandia @@ -76,19 +76,18 @@ CLANG_BUILD_LIST="Pthread,Serial,Pthread_Serial" CUDA_BUILD_LIST="Cuda_OpenMP,Cuda_Pthread,Cuda_Serial" CUDA_IBM_BUILD_LIST="Cuda_OpenMP,Cuda_Serial" -GCC_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wignored-qualifiers,-Wempty-body,-Wclobbered,-Wuninitialized" -IBM_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized" -CLANG_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized" -INTEL_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized" -#CUDA_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized" -CUDA_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Wsign-compare,-Wtype-limits,-Wuninitialized" +GCC_WARNING_FLAGS="-Wall,-Wunused-parameter,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wignored-qualifiers,-Wempty-body,-Wclobbered,-Wuninitialized" +IBM_WARNING_FLAGS="-Wall,-Wunused-parameter,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized" +CLANG_WARNING_FLAGS="-Wall,-Wunused-parameter,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized" +INTEL_WARNING_FLAGS="-Wall,-Wunused-parameter,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized" +#CUDA_WARNING_FLAGS="-Wall,-Wunused-parameter,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized" +CUDA_WARNING_FLAGS="-Wall,-Wunused-parameter,-Wshadow,-pedantic,-Wsign-compare,-Wtype-limits,-Wuninitialized" PGI_WARNING_FLAGS="" # Default. Machine specific can override. DEBUG=False ARGS="" CUSTOM_BUILD_LIST="" -QTHREADS_PATH="" DRYRUN=False BUILD_ONLY=False declare -i NUM_JOBS_TO_RUN_IN_PARALLEL=1 @@ -114,9 +113,6 @@ do --kokkos-path*) KOKKOS_PATH="${key#*=}" ;; - --qthreads-path*) - QTHREADS_PATH="${key#*=}" - ;; --build-list*) CUSTOM_BUILD_LIST="${key#*=}" ;; @@ -417,8 +413,8 @@ if [ "$PRINT_HELP" = "True" ]; then echo "--build-list=BUILD,BUILD,BUILD..." echo " Provide a comma-separated list of builds instead of running all builds" echo " Valid items:" - echo " OpenMP, Pthread, Qthreads, Serial, OpenMP_Serial, Pthread_Serial" - echo " Qthreads_Serial, Cuda_OpenMP, Cuda_Pthread, Cuda_Serial" + echo " OpenMP, Pthread, Serial, OpenMP_Serial, Pthread_Serial" + echo " Cuda_OpenMP, Cuda_Pthread, Cuda_Serial" echo "" echo "ARGS: list of expressions matching compilers to test" @@ -483,33 +479,6 @@ for ARG in $ARGS; do done done -# Check if Qthreads build requested. -HAVE_QTHREADS_BUILD="False" -if [ -n "$CUSTOM_BUILD_LIST" ]; then - if [[ "$CUSTOM_BUILD_LIST" = *Qthreads* ]]; then - HAVE_QTHREADS_BUILD="True" - fi -else - for COMPILER_DATA in "${COMPILERS[@]}"; do - ARR=($COMPILER_DATA) - BUILD_LIST=${ARR[2]} - if [[ "$BUILD_LIST" = *Qthreads* ]]; then - HAVE_QTHREADS_BUILD="True" - fi - done -fi - -# Ensure Qthreads path is set if Qthreads build is requested. -if [ "$HAVE_QTHREADS_BUILD" = "True" ]; then - if [ -z "$QTHREADS_PATH" ]; then - echo "Need to supply Qthreads path (--qthreads-path) when testing Qthreads backend." >&2 - exit 1 - else - # Strip trailing slashes from path. - QTHREADS_PATH=$(echo $QTHREADS_PATH | sed 's/\/*$//') - fi -fi - # # Functions. # @@ -627,14 +596,6 @@ single_build_and_test() { local extra_args=--with-hwloc=$(dirname $(dirname $(which hwloc-info))) fi - if [[ "$build" = *Qthreads* ]]; then - if [[ "$build_type" = hwloc* ]]; then - local extra_args="$extra_args --qthreads-path=${QTHREADS_PATH}_hwloc" - else - local extra_args="$extra_args --qthreads-path=$QTHREADS_PATH" - fi - fi - if [[ "$OPT_FLAG" = "" ]]; then OPT_FLAG="-O3" fi diff --git a/containers/performance_tests/CMakeLists.txt b/containers/performance_tests/CMakeLists.txt index ca76808190d..1011cb8fd17 100644 --- a/containers/performance_tests/CMakeLists.txt +++ b/containers/performance_tests/CMakeLists.txt @@ -5,58 +5,42 @@ KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../src ) IF(Kokkos_ENABLE_CUDA) SET(SOURCES - TestMain.cpp + TestMain.cpp TestCuda.cpp ) - KOKKOS_ADD_TEST_EXECUTABLE( PerfTestExec_Cuda + KOKKOS_ADD_EXECUTABLE_AND_TEST( PerformanceTest_Cuda SOURCES ${SOURCES} ) - - KOKKOS_ADD_TEST( NAME PerformanceTest_Cuda - EXE PerfTestExec_Cuda - ) ENDIF() IF(Kokkos_ENABLE_PTHREAD) SET(SOURCES - TestMain.cpp + TestMain.cpp TestThreads.cpp ) - KOKKOS_ADD_TEST_EXECUTABLE( PerfTestExec_Threads + KOKKOS_ADD_EXECUTABLE_AND_TEST( PerformanceTest_Threads SOURCES ${SOURCES} ) - - KOKKOS_ADD_TEST( NAME PerformanceTest_Threads - EXE PerfTestExec_Threads - ) ENDIF() IF(Kokkos_ENABLE_OPENMP) SET(SOURCES - TestMain.cpp + TestMain.cpp TestOpenMP.cpp ) - KOKKOS_ADD_TEST_EXECUTABLE( PerfTestExec_OpenMP + KOKKOS_ADD_EXECUTABLE_AND_TEST( PerformanceTest_OpenMP SOURCES ${SOURCES} ) - - KOKKOS_ADD_TEST( NAME PerformanceTest_OpenMP - EXE PerfTestExec_OpenMP - ) ENDIF() IF(Kokkos_ENABLE_HPX) SET(SOURCES - TestMain.cpp + TestMain.cpp TestHPX.cpp ) - KOKKOS_ADD_TEST_EXECUTABLE( PerfTestExec_HPX + KOKKOS_ADD_EXECUTABLE_AND_TEST( PerformanceTest_HPX SOURCES ${SOURCES} ) - - KOKKOS_ADD_TEST( NAME PerformanceTest_HPX - EXE PerfTestExec_HPX - ) ENDIF() diff --git a/containers/src/Kokkos_Bitset.hpp b/containers/src/Kokkos_Bitset.hpp index 3596c7653ac..ab75fc1e1d0 100644 --- a/containers/src/Kokkos_Bitset.hpp +++ b/containers/src/Kokkos_Bitset.hpp @@ -103,19 +103,19 @@ class Bitset { } } - KOKKOS_INLINE_FUNCTION + KOKKOS_DEFAULTED_FUNCTION Bitset(const Bitset&) = default; - KOKKOS_INLINE_FUNCTION + KOKKOS_DEFAULTED_FUNCTION Bitset& operator=(const Bitset&) = default; - KOKKOS_INLINE_FUNCTION + KOKKOS_DEFAULTED_FUNCTION Bitset(Bitset&&) = default; - KOKKOS_INLINE_FUNCTION + KOKKOS_DEFAULTED_FUNCTION Bitset& operator=(Bitset&&) = default; - KOKKOS_INLINE_FUNCTION + KOKKOS_DEFAULTED_FUNCTION ~Bitset() = default; /// number of bits in the set diff --git a/containers/src/Kokkos_DualView.hpp b/containers/src/Kokkos_DualView.hpp index d8a3ebc1ae9..ede7d9a31f0 100644 --- a/containers/src/Kokkos_DualView.hpp +++ b/containers/src/Kokkos_DualView.hpp @@ -238,6 +238,53 @@ class DualView : public ViewTraits { #endif } + /// \brief Constructor that allocates View objects on both host and device. + /// + /// This constructor works like the analogous constructor of View. + /// The first arguments are wrapped up in a ViewCtor class, this allows + /// for a label, without initializing, and all of the other things that can + /// be wrapped up in a Ctor class. + /// The arguments that follow are the dimensions of the + /// View objects. For example, if the View has three dimensions, + /// the first three integer arguments will be nonzero, and you may + /// omit the integer arguments that follow. + template + DualView(const Impl::ViewCtorProp& arg_prop, + typename std::enable_if::has_pointer, + size_t>::type const n0 = + KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n1 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n2 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n3 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n4 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n5 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n6 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t n7 = KOKKOS_IMPL_CTOR_DEFAULT_ARG) + : d_view(arg_prop, n0, n1, n2, n3, n4, n5, n6, n7), + h_view(create_mirror_view(d_view)) // without UVM, host View mirrors + , + modified_flags(t_modified_flags("DualView::modified_flags")) { +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE + modified_host = t_modified_flag(modified_flags, 0); + modified_device = t_modified_flag(modified_flags, 1); +#endif + } + + explicit inline DualView(const ViewAllocateWithoutInitializing& arg_prop, + const size_t arg_N0 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t arg_N1 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t arg_N2 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t arg_N3 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t arg_N4 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t arg_N5 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t arg_N6 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, + const size_t arg_N7 = KOKKOS_IMPL_CTOR_DEFAULT_ARG) + : DualView(Impl::ViewCtorProp( + arg_prop.label, Kokkos::WithoutInitializing), + arg_N0, arg_N1, arg_N2, arg_N3, arg_N4, arg_N5, arg_N6, + arg_N7) {} + //! Copy constructor (shallow copy) template DualView(const DualView& src) @@ -470,23 +517,43 @@ class DualView : public ViewTraits { /// as modified, by calling the modify() method with the /// appropriate template parameter. template - void sync(const typename Impl::enable_if< + void sync(const typename std::enable_if< (std::is_same::value) || (std::is_same::value), int>::type& = 0) { - if (modified_flags.data() == NULL) return; + if (modified_flags.data() == nullptr) return; int dev = get_device_side(); if (dev == 1) { // if Device is the same as DualView's device type if ((modified_flags(0) > 0) && (modified_flags(0) >= modified_flags(1))) { +#ifdef KOKKOS_ENABLE_CUDA + if (std::is_same::value) { + if (d_view.data() == h_view.data()) + Kokkos::Impl::cuda_prefetch_pointer( + Kokkos::Cuda(), d_view.data(), + sizeof(typename t_dev::value_type) * d_view.span(), true); + } +#endif + deep_copy(d_view, h_view); modified_flags(0) = modified_flags(1) = 0; } } if (dev == 0) { // hopefully Device is the same as DualView's host type if ((modified_flags(1) > 0) && (modified_flags(1) >= modified_flags(0))) { +#ifdef KOKKOS_ENABLE_CUDA + if (std::is_same::value) { + if (d_view.data() == h_view.data()) + Kokkos::Impl::cuda_prefetch_pointer( + Kokkos::Cuda(), d_view.data(), + sizeof(typename t_dev::value_type) * d_view.span(), false); + } +#endif + deep_copy(h_view, d_view); modified_flags(0) = modified_flags(1) = 0; } @@ -499,12 +566,12 @@ class DualView : public ViewTraits { } template - void sync(const typename Impl::enable_if< + void sync(const typename std::enable_if< (!std::is_same::value) || (std::is_same::value), int>::type& = 0) { - if (modified_flags.data() == NULL) return; + if (modified_flags.data() == nullptr) return; int dev = get_device_side(); @@ -527,8 +594,18 @@ class DualView : public ViewTraits { typename traits::non_const_data_type>::value) Impl::throw_runtime_exception( "Calling sync_host on a DualView with a const datatype."); - if (modified_flags.data() == NULL) return; + if (modified_flags.data() == nullptr) return; if (modified_flags(1) > modified_flags(0)) { +#ifdef KOKKOS_ENABLE_CUDA + if (std::is_same::value) { + if (d_view.data() == h_view.data()) + Kokkos::Impl::cuda_prefetch_pointer( + Kokkos::Cuda(), d_view.data(), + sizeof(typename t_dev::value_type) * d_view.span(), false); + } +#endif + deep_copy(h_view, d_view); modified_flags(1) = modified_flags(0) = 0; } @@ -539,8 +616,18 @@ class DualView : public ViewTraits { typename traits::non_const_data_type>::value) Impl::throw_runtime_exception( "Calling sync_device on a DualView with a const datatype."); - if (modified_flags.data() == NULL) return; + if (modified_flags.data() == nullptr) return; if (modified_flags(0) > modified_flags(1)) { +#ifdef KOKKOS_ENABLE_CUDA + if (std::is_same::value) { + if (d_view.data() == h_view.data()) + Kokkos::Impl::cuda_prefetch_pointer( + Kokkos::Cuda(), d_view.data(), + sizeof(typename t_dev::value_type) * d_view.span(), true); + } +#endif + deep_copy(d_view, h_view); modified_flags(1) = modified_flags(0) = 0; } @@ -548,7 +635,7 @@ class DualView : public ViewTraits { template bool need_sync() const { - if (modified_flags.data() == NULL) return false; + if (modified_flags.data() == nullptr) return false; int dev = get_device_side(); if (dev == 1) { // if Device is the same as DualView's device type @@ -565,12 +652,12 @@ class DualView : public ViewTraits { } inline bool need_sync_host() const { - if (modified_flags.data() == NULL) return false; + if (modified_flags.data() == nullptr) return false; return modified_flags(0) < modified_flags(1); } inline bool need_sync_device() const { - if (modified_flags.data() == NULL) return false; + if (modified_flags.data() == nullptr) return false; return modified_flags(1) < modified_flags(0); } @@ -581,7 +668,7 @@ class DualView : public ViewTraits { /// data as modified. template void modify() { - if (modified_flags.data() == NULL) return; + if (modified_flags.data() == nullptr) return; int dev = get_device_side(); if (dev == 1) { // if Device is the same as DualView's device type @@ -612,7 +699,7 @@ class DualView : public ViewTraits { } inline void modify_host() { - if (modified_flags.data() != NULL) { + if (modified_flags.data() != nullptr) { modified_flags(0) = (modified_flags(1) > modified_flags(0) ? modified_flags(1) : modified_flags(0)) + @@ -631,7 +718,7 @@ class DualView : public ViewTraits { } inline void modify_device() { - if (modified_flags.data() != NULL) { + if (modified_flags.data() != nullptr) { modified_flags(1) = (modified_flags(1) > modified_flags(0) ? modified_flags(1) : modified_flags(0)) + @@ -650,7 +737,7 @@ class DualView : public ViewTraits { } inline void clear_sync_state() { - if (modified_flags.data() != NULL) + if (modified_flags.data() != nullptr) modified_flags(1) = modified_flags(0) = 0; } @@ -675,7 +762,7 @@ class DualView : public ViewTraits { h_view = create_mirror_view(d_view); /* Reset dirty flags */ - if (modified_flags.data() == NULL) { + if (modified_flags.data() == nullptr) { modified_flags = t_modified_flags("DualView::modified_flags"); } else modified_flags(1) = modified_flags(0) = 0; @@ -693,7 +780,7 @@ class DualView : public ViewTraits { const size_t n5 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, const size_t n6 = KOKKOS_IMPL_CTOR_DEFAULT_ARG, const size_t n7 = KOKKOS_IMPL_CTOR_DEFAULT_ARG) { - if (modified_flags.data() == NULL) { + if (modified_flags.data() == nullptr) { modified_flags = t_modified_flags("DualView::modified_flags"); } if (modified_flags(1) >= modified_flags(0)) { @@ -866,4 +953,27 @@ void deep_copy( } // namespace Kokkos +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { + +// +// Non-member resize and realloc +// + +template +void resize(DualView& dv, Args&&... args) noexcept( + noexcept(dv.resize(std::forward(args)...))) { + dv.resize(std::forward(args)...); +} + +template +void realloc(DualView& dv, Args&&... args) noexcept( + noexcept(dv.realloc(std::forward(args)...))) { + dv.realloc(std::forward(args)...); +} + +} // end namespace Kokkos + #endif diff --git a/containers/src/Kokkos_DynRankView.hpp b/containers/src/Kokkos_DynRankView.hpp index 0ceb9d5d397..4ab212d7b9a 100644 --- a/containers/src/Kokkos_DynRankView.hpp +++ b/containers/src/Kokkos_DynRankView.hpp @@ -293,6 +293,7 @@ KOKKOS_INLINE_FUNCTION void dyn_rank_view_verify_operator_bounds( dyn_rank_view_error_operator_bounds<0>(buffer + n, LEN - n, map, args...); Kokkos::Impl::throw_runtime_exception(std::string(buffer)); #else + (void)tracker; Kokkos::abort("DynRankView bounds error"); #endif } @@ -1065,8 +1066,8 @@ class DynRankView : public ViewTraits { //---------------------------------------- // Standard constructor, destructor, and assignment operators... - KOKKOS_INLINE_FUNCTION - ~DynRankView() {} + KOKKOS_DEFAULTED_FUNCTION + ~DynRankView() = default; KOKKOS_INLINE_FUNCTION DynRankView() : m_track(), m_map(), m_rank() {} // Default ctor @@ -1773,7 +1774,7 @@ struct DynRankViewRemap { const Kokkos::Impl::ParallelFor closure( *this, Policy(0, n0)); closure.execute(); - // Kokkos::fence(); // ?? + // ExecSpace().fence(); // ?? } KOKKOS_INLINE_FUNCTION @@ -1806,7 +1807,8 @@ inline void deep_copy( const DynRankView& dst, typename ViewTraits::const_value_type& value, typename std::enable_if::specialize, void>::value>::type* = 0) { + typename ViewTraits::specialize, void>::value>::type* = + nullptr) { static_assert( std::is_same::non_const_value_type, typename ViewTraits::value_type>::value, @@ -1843,7 +1845,7 @@ inline void deep_copy( (std::is_same::value && std::is_same::value && (Kokkos::is_dyn_rank_view::value || - Kokkos::is_dyn_rank_view::value))>::type* = 0) { + Kokkos::is_dyn_rank_view::value))>::type* = nullptr) { static_assert( std::is_same::value, @@ -2009,7 +2011,7 @@ inline typename DynRankView::HostMirror create_mirror( typename std::enable_if< std::is_same::specialize, void>::value && !std::is_same::array_layout, - Kokkos::LayoutStride>::value>::type* = 0) { + Kokkos::LayoutStride>::value>::type* = nullptr) { typedef DynRankView src_type; typedef typename src_type::HostMirror dst_type; @@ -2036,7 +2038,8 @@ template typename Impl::MirrorDRVType::view_type create_mirror( const Space&, const Kokkos::DynRankView& src, typename std::enable_if::specialize, void>::value>::type* = 0) { + typename ViewTraits::specialize, void>::value>::type* = + nullptr) { return typename Impl::MirrorDRVType::view_type( src.label(), Impl::reconstructLayout(src.layout(), src.rank())); } @@ -2050,7 +2053,7 @@ inline typename DynRankView::HostMirror create_mirror_view( typename DynRankView::HostMirror::memory_space>::value && std::is_same::data_type, typename DynRankView::HostMirror::data_type>:: - value)>::type* = 0) { + value)>::type* = nullptr) { return src; } @@ -2072,7 +2075,8 @@ template typename Impl::MirrorDRViewType::view_type create_mirror_view( const Space&, const Kokkos::DynRankView& src, typename std::enable_if< - Impl::MirrorDRViewType::is_same_memspace>::type* = 0) { + Impl::MirrorDRViewType::is_same_memspace>::type* = + nullptr) { return src; } @@ -2094,7 +2098,8 @@ create_mirror_view_and_copy( const Space&, const Kokkos::DynRankView& src, std::string const& name = "", typename std::enable_if< - Impl::MirrorDRViewType::is_same_memspace>::type* = 0) { + Impl::MirrorDRViewType::is_same_memspace>::type* = + nullptr) { (void)name; return src; } @@ -2139,7 +2144,7 @@ inline void resize(DynRankView& v, static_assert(Kokkos::ViewTraits::is_managed, "Can only resize managed views"); - drview_type v_resized(v.label(), n0, n1, n2, n3, n4, n5, n6); + drview_type v_resized(v.label(), n0, n1, n2, n3, n4, n5, n6, n7); Kokkos::Impl::DynRankViewRemap(v_resized, v); @@ -2166,7 +2171,7 @@ inline void realloc(DynRankView& v, const std::string label = v.label(); v = drview_type(); // Deallocate first, if the only view to allocation - v = drview_type(label, n0, n1, n2, n3, n4, n5, n6); + v = drview_type(label, n0, n1, n2, n3, n4, n5, n6, n7); } } // namespace Kokkos diff --git a/containers/src/Kokkos_DynamicView.hpp b/containers/src/Kokkos_DynamicView.hpp index 35a64d164fa..ebbbcc5e8ca 100644 --- a/containers/src/Kokkos_DynamicView.hpp +++ b/containers/src/Kokkos_DynamicView.hpp @@ -70,10 +70,10 @@ struct ChunkArraySpace { using memory_space = typename Kokkos::CudaUVMSpace; }; #endif -#ifdef KOKKOS_ENABLE_ROCM +#ifdef KOKKOS_ENABLE_HIP template <> -struct ChunkArraySpace { - using memory_space = typename Kokkos::Experimental::ROCmHostPinnedSpace; +struct ChunkArraySpace { + using memory_space = typename Kokkos::Experimental::HIPHostPinnedSpace; }; #endif } // end namespace Impl @@ -248,8 +248,8 @@ class DynamicView : public Kokkos::ViewTraits { //---------------------------------------- template - KOKKOS_INLINE_FUNCTION reference_type operator()(const I0& i0, - const Args&... args) const { + KOKKOS_INLINE_FUNCTION reference_type + operator()(const I0& i0, const Args&... /*args*/) const { static_assert(Kokkos::Impl::are_integral::value, "Indices must be integral type"); @@ -265,7 +265,7 @@ class DynamicView : public Kokkos::ViewTraits { // If not bounds checking then we assume a non-zero pointer is valid. #if !defined(KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK) - if (0 == *ch) + if (nullptr == *ch) #endif { // Verify that allocation of the requested chunk in in progress. @@ -280,7 +280,7 @@ class DynamicView : public Kokkos::ViewTraits { // Allocation of this chunk is in progress // so wait for allocation to complete. - while (0 == *ch) + while (nullptr == *ch) ; } @@ -325,7 +325,7 @@ class DynamicView : public Kokkos::ViewTraits { --*pc; typename traits::memory_space().deallocate( m_chunks[*pc], sizeof(local_value_type) << m_chunk_shift); - m_chunks[*pc] = 0; + m_chunks[*pc] = nullptr; } } // *m_chunks[m_chunk_max+1] stores the 'extent' requested by resize @@ -366,10 +366,10 @@ class DynamicView : public Kokkos::ViewTraits { // Initialize or destroy array of chunk pointers. // Two entries beyond the max chunks are allocation counters. inline void operator()(unsigned i) const { - if (m_destroy && i < m_chunk_max && 0 != m_chunks[i]) { + if (m_destroy && i < m_chunk_max && nullptr != m_chunks[i]) { typename traits::memory_space().deallocate(m_chunks[i], m_chunk_size); } - m_chunks[i] = 0; + m_chunks[i] = nullptr; } void execute(bool arg_destroy) { @@ -419,7 +419,7 @@ class DynamicView : public Kokkos::ViewTraits { const unsigned min_chunk_size, const unsigned max_extent) : m_track(), - m_chunks(0) + m_chunks(nullptr) // The chunk size is guaranteed to be a power of two , m_chunk_shift(Kokkos::Impl::integral_power_of_two_that_contains( @@ -528,7 +528,7 @@ struct CommonSubview, typedef SrcType src_subview_type; dst_subview_type dst_sub; src_subview_type src_sub; - CommonSubview(const DstType& dst, const SrcType& src, const Arg0& arg0) + CommonSubview(const DstType& dst, const SrcType& src, const Arg0& /*arg0*/) : dst_sub(dst), src_sub(src) {} }; diff --git a/containers/src/Kokkos_ErrorReporter.hpp b/containers/src/Kokkos_ErrorReporter.hpp index 25335771e4c..e07c386b7d9 100644 --- a/containers/src/Kokkos_ErrorReporter.hpp +++ b/containers/src/Kokkos_ErrorReporter.hpp @@ -187,7 +187,7 @@ template void ErrorReporter::resize(const size_t new_size) { m_reports.resize(new_size); m_reporters.resize(new_size); - Kokkos::fence(); + typename DeviceType::execution_space().fence(); } } // namespace Experimental diff --git a/containers/src/Kokkos_OffsetView.hpp b/containers/src/Kokkos_OffsetView.hpp index a1fe793cc54..c3c66f0d7f6 100644 --- a/containers/src/Kokkos_OffsetView.hpp +++ b/containers/src/Kokkos_OffsetView.hpp @@ -362,19 +362,18 @@ class OffsetView : public ViewTraits { //---------------------------------------- private: - enum { - is_layout_left = - std::is_same::value, + static constexpr bool is_layout_left = + std::is_same::value; - is_layout_right = - std::is_same::value, + static constexpr bool is_layout_right = + std::is_same::value; - is_layout_stride = std::is_same::value, + static constexpr bool is_layout_stride = + std::is_same::value; - is_default_map = std::is_same::value && - (is_layout_left || is_layout_right || is_layout_stride) - }; + static constexpr bool is_default_map = + std::is_same::value && + (is_layout_left || is_layout_right || is_layout_stride); template ::accessible> @@ -804,8 +803,8 @@ class OffsetView : public ViewTraits { //---------------------------------------- // Standard destructor, constructors, and assignment operators - KOKKOS_INLINE_FUNCTION - ~OffsetView() {} + KOKKOS_DEFAULTED_FUNCTION + ~OffsetView() = default; KOKKOS_INLINE_FUNCTION OffsetView() : m_track(), m_map() { @@ -1317,7 +1316,7 @@ KOKKOS_INLINE_FUNCTION KOKKOS_INLINE_FUNCTION Kokkos::Impl::ALL_t shift_input(const Kokkos::Impl::ALL_t arg, - const int64_t offset) { + const int64_t /*offset*/) { return arg; } @@ -1347,9 +1346,9 @@ KOKKOS_INLINE_FUNCTION void map_arg_to_new_begin( template KOKKOS_INLINE_FUNCTION void map_arg_to_new_begin( - const size_t i, Kokkos::Array& subviewBegins, - typename std::enable_if::type shiftedArg, const Arg arg, - const A viewBegins, size_t& counter) {} + const size_t /*i*/, Kokkos::Array& /*subviewBegins*/, + typename std::enable_if::type /*shiftedArg*/, + const Arg /*arg*/, const A /*viewBegins*/, size_t& /*counter*/) {} template KOKKOS_INLINE_FUNCTION @@ -1832,7 +1831,8 @@ inline void deep_copy( const OffsetView& dst, typename ViewTraits::const_value_type& value, typename std::enable_if::specialize, void>::value>::type* = 0) { + typename ViewTraits::specialize, void>::value>::type* = + nullptr) { static_assert( std::is_same::non_const_value_type, typename ViewTraits::value_type>::value, @@ -1846,7 +1846,8 @@ template inline void deep_copy( const OffsetView& dst, const OffsetView& value, typename std::enable_if::specialize, void>::value>::type* = 0) { + typename ViewTraits::specialize, void>::value>::type* = + nullptr) { static_assert( std::is_same::value_type, typename ViewTraits::non_const_value_type>::value, @@ -1859,7 +1860,8 @@ template inline void deep_copy( const OffsetView& dst, const View& value, typename std::enable_if::specialize, void>::value>::type* = 0) { + typename ViewTraits::specialize, void>::value>::type* = + nullptr) { static_assert( std::is_same::value_type, typename ViewTraits::non_const_value_type>::value, @@ -1873,7 +1875,8 @@ template inline void deep_copy( const View& dst, const OffsetView& value, typename std::enable_if::specialize, void>::value>::type* = 0) { + typename ViewTraits::specialize, void>::value>::type* = + nullptr) { static_assert( std::is_same::value_type, typename ViewTraits::non_const_value_type>::value, @@ -2011,7 +2014,7 @@ create_mirror_view( std::is_same< typename Kokkos::Experimental::OffsetView::data_type, typename Kokkos::Experimental::OffsetView< - T, P...>::HostMirror::data_type>::value)>::type* = 0) { + T, P...>::HostMirror::data_type>::value)>::type* = nullptr) { return src; } diff --git a/containers/src/Kokkos_ScatterView.hpp b/containers/src/Kokkos_ScatterView.hpp index ecf8bfd78be..eb3bc1f2bcd 100644 --- a/containers/src/Kokkos_ScatterView.hpp +++ b/containers/src/Kokkos_ScatterView.hpp @@ -171,24 +171,41 @@ struct DefaultContribution +struct DefaultDuplication { + enum : int { value = Kokkos::Experimental::ScatterNonDuplicated }; +}; +template <> +struct DefaultContribution { + enum : int { value = Kokkos::Experimental::ScatterAtomic }; +}; +template <> +struct DefaultContribution { + enum : int { value = Kokkos::Experimental::ScatterAtomic }; +}; +#endif + /* ScatterValue is the object returned by the access operator() of ScatterAccess, This class inherits from the Sum<> reducer and it wraps join(dest, src) with convenient operator+=, etc. Note the addition of update(ValueType const& rhs) and reset() so that all reducers can have common functions See ReduceDuplicates and ResetDuplicates ) */ -template +template struct ScatterValue; -template -struct ScatterValue +struct ScatterValue - : Sum { + : Sum { public: KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in) - : Sum(value_in) {} + : Sum(value_in) {} KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ScatterValue&& other) - : Sum(other.reference()) {} + : Sum(other.reference()) {} KOKKOS_FORCEINLINE_FUNCTION void operator+=(ValueType const& rhs) { this->join(this->reference(), rhs); } @@ -206,13 +223,13 @@ struct ScatterValue -struct ScatterValue +struct ScatterValue - : Sum { + : Sum { public: KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in) - : Sum(value_in) {} + : Sum(value_in) {} KOKKOS_FORCEINLINE_FUNCTION void operator+=(ValueType const& rhs) { this->join(this->reference(), rhs); @@ -244,15 +261,15 @@ struct ScatterValue -struct ScatterValue +struct ScatterValue - : Prod { + : Prod { public: KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in) - : Prod(value_in) {} + : Prod(value_in) {} KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ScatterValue&& other) - : Prod(other.reference()) {} + : Prod(other.reference()) {} KOKKOS_FORCEINLINE_FUNCTION void operator*=(ValueType const& rhs) { this->join(this->reference(), rhs); } @@ -271,13 +288,13 @@ struct ScatterValue -struct ScatterValue +struct ScatterValue - : Prod { + : Prod { public: KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in) - : Prod(value_in) {} + : Prod(value_in) {} KOKKOS_FORCEINLINE_FUNCTION void operator*=(ValueType const& rhs) { this->join(this->reference(), rhs); @@ -320,15 +337,15 @@ struct ScatterValue -struct ScatterValue +struct ScatterValue - : Min { + : Min { public: KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in) - : Min(value_in) {} + : Min(value_in) {} KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ScatterValue&& other) - : Min(other.reference()) {} + : Min(other.reference()) {} KOKKOS_FORCEINLINE_FUNCTION void update(ValueType const& rhs) { this->join(this->reference(), rhs); } @@ -340,13 +357,13 @@ struct ScatterValue -struct ScatterValue +struct ScatterValue - : Min { + : Min { public: KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in) - : Min(value_in) {} + : Min(value_in) {} KOKKOS_FORCEINLINE_FUNCTION void atomic_min(ValueType& dest, const ValueType& src) const { @@ -382,15 +399,15 @@ struct ScatterValue -struct ScatterValue +struct ScatterValue - : Max { + : Max { public: KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in) - : Max(value_in) {} + : Max(value_in) {} KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ScatterValue&& other) - : Max(other.reference()) {} + : Max(other.reference()) {} KOKKOS_FORCEINLINE_FUNCTION void update(ValueType const& rhs) { this->join(this->reference(), rhs); } @@ -402,13 +419,13 @@ struct ScatterValue -struct ScatterValue +struct ScatterValue - : Max { + : Max { public: KOKKOS_FORCEINLINE_FUNCTION ScatterValue(ValueType& value_in) - : Max(value_in) {} + : Max(value_in) {} KOKKOS_FORCEINLINE_FUNCTION void atomic_max(ValueType& dest, const ValueType& src) const { @@ -558,6 +575,8 @@ struct ReduceDuplicatesBase { Kokkos::Profiling::beginParallelFor(std::string("reduce_") + name, 0, &kpID); } +#else + (void)name; #endif typedef RangePolicy policy_type; typedef Kokkos::Impl::ParallelFor closure_type; @@ -584,8 +603,9 @@ struct ReduceDuplicates : Base(src_in, dst_in, stride_in, start_in, n_in, name) {} KOKKOS_FORCEINLINE_FUNCTION void operator()(size_t i) const { for (size_t j = Base::start; j < Base::n; ++j) { - ScatterValue sv( - Base::dst[i]); + ScatterValue + sv(Base::dst[i]); sv.update(Base::src[i + Base::stride * j]); } } @@ -607,6 +627,8 @@ struct ResetDuplicatesBase { Kokkos::Profiling::beginParallelFor(std::string("reduce_") + name, 0, &kpID); } +#else + (void)name; #endif typedef RangePolicy policy_type; typedef Kokkos::Impl::ParallelFor closure_type; @@ -630,8 +652,9 @@ struct ResetDuplicates : public ResetDuplicatesBase { ResetDuplicates(ValueType* data_in, size_t size_in, std::string const& name) : Base(data_in, size_in, name) {} KOKKOS_FORCEINLINE_FUNCTION void operator()(size_t i) const { - ScatterValue sv( - Base::data[i]); + ScatterValue + sv(Base::data[i]); sv.reset(); } }; @@ -644,35 +667,40 @@ namespace Kokkos { namespace Experimental { template ::value, + typename Layout = Kokkos::DefaultExecutionSpace::array_layout, + typename DeviceType = Kokkos::DefaultExecutionSpace, + int Op = ScatterSum, + int duplication = Kokkos::Impl::Experimental::DefaultDuplication< + typename DeviceType::execution_space>::value, int contribution = Kokkos::Impl::Experimental::DefaultContribution< - ExecSpace, duplication>::value> + typename DeviceType::execution_space, duplication>::value> class ScatterView; -template class ScatterAccess; // non-duplicated implementation -template -class ScatterView { public: - typedef Kokkos::View original_view_type; + using execution_space = typename DeviceType::execution_space; + using memory_space = typename DeviceType::memory_space; + using device_type = Kokkos::Device; + typedef Kokkos::View original_view_type; typedef typename original_view_type::value_type original_value_type; typedef typename original_view_type::reference_type original_reference_type; - friend class ScatterAccess; - friend class ScatterAccess; + template + friend class ScatterView; - ScatterView() {} + ScatterView() = default; template ScatterView(View const& original_view) @@ -682,13 +710,26 @@ class ScatterView + KOKKOS_FUNCTION ScatterView( + const ScatterView& other_view) + : internal_view(other_view.internal_view) {} + + template + KOKKOS_FUNCTION void operator=( + const ScatterView& other_view) { + internal_view = other_view.internal_view; + } + template KOKKOS_FORCEINLINE_FUNCTION - ScatterAccess access() const { - return ScatterAccess{*this}; + return ScatterAccess(*this); } original_view_type subview() const { return internal_view; } @@ -700,18 +741,17 @@ class ScatterView::value, + memory_space, typename dest_type::memory_space>::value, "ScatterView contribute destination memory space not accessible"); if (dest.data() == internal_view.data()) return; - Kokkos::Impl::Experimental::ReduceDuplicates( + Kokkos::Impl::Experimental::ReduceDuplicates( internal_view.data(), dest.data(), 0, 0, 1, internal_view.label()); } void reset() { - Kokkos::Impl::Experimental::ResetDuplicates( + Kokkos::Impl::Experimental::ResetDuplicates( internal_view.data(), internal_view.size(), internal_view.label()); } template @@ -742,17 +782,17 @@ class ScatterView -class ScatterAccess { public: - typedef ScatterView view_type; typedef typename view_type::original_value_type original_value_type; - typedef Kokkos::Impl::Experimental::ScatterValue + typedef Kokkos::Impl::Experimental::ScatterValue< + original_value_type, Op, DeviceType, override_contribution> value_type; KOKKOS_INLINE_FUNCTION @@ -760,9 +800,8 @@ class ScatterAccess KOKKOS_FORCEINLINE_FUNCTION value_type operator()(Args... args) const { @@ -786,26 +825,47 @@ class ScatterAccess -class ScatterView +class ScatterView { public: - typedef Kokkos::View + using execution_space = typename DeviceType::execution_space; + using memory_space = typename DeviceType::memory_space; + using device_type = Kokkos::Device; + typedef Kokkos::View original_view_type; typedef typename original_view_type::value_type original_value_type; typedef typename original_view_type::reference_type original_reference_type; - friend class ScatterAccess; - friend class ScatterAccess; + template + friend class ScatterView; + typedef typename Kokkos::Impl::Experimental::DuplicatedDataType< DataType, Kokkos::LayoutRight> data_type_info; typedef typename data_type_info::value_type internal_data_type; - typedef Kokkos::View + typedef Kokkos::View internal_view_type; - ScatterView() {} + ScatterView() = default; + + template + KOKKOS_FUNCTION ScatterView( + const ScatterView& other_view) + : unique_token(other_view.unique_token), + internal_view(other_view.internal_view) {} + + template + KOKKOS_FUNCTION void operator=( + const ScatterView& other_view) { + unique_token = other_view.unique_token; + internal_view = other_view.internal_view; + } template ScatterView(View const& original_view) @@ -855,12 +915,12 @@ class ScatterView KOKKOS_FORCEINLINE_FUNCTION - ScatterAccess access() const { - return ScatterAccess{*this}; + override_contribution>(*this); } typename Kokkos::Impl::Experimental::Slice::value, + memory_space, typename dest_type::memory_space>::value, "ScatterView deep_copy destination memory space not accessible"); bool is_equal = (dest.data() == internal_view.data()); size_t start = is_equal ? 1 : 0; - Kokkos::Impl::Experimental::ReduceDuplicates( + Kokkos::Impl::Experimental::ReduceDuplicates( internal_view.data(), dest.data(), internal_view.stride(0), start, internal_view.extent(0), internal_view.label()); } void reset() { - Kokkos::Impl::Experimental::ResetDuplicates( + Kokkos::Impl::Experimental::ResetDuplicates( internal_view.data(), internal_view.size(), internal_view.label()); } template @@ -902,8 +961,8 @@ class ScatterView( + Kokkos::Impl::Experimental::ResetDuplicates( internal_view.data() + view.size(), internal_view.size() - view.size(), internal_view.label()); } @@ -931,33 +990,39 @@ class ScatterView + execution_space, Kokkos::Experimental::UniqueTokenScope::Global> unique_token_type; unique_token_type unique_token; internal_view_type internal_view; }; -template -class ScatterView +class ScatterView { public: - typedef Kokkos::View + using execution_space = typename DeviceType::execution_space; + using memory_space = typename DeviceType::memory_space; + using device_type = Kokkos::Device; + typedef Kokkos::View original_view_type; typedef typename original_view_type::value_type original_value_type; typedef typename original_view_type::reference_type original_reference_type; - friend class ScatterAccess; - friend class ScatterAccess; + template + friend class ScatterView; + typedef typename Kokkos::Impl::Experimental::DuplicatedDataType< DataType, Kokkos::LayoutLeft> data_type_info; typedef typename data_type_info::value_type internal_data_type; - typedef Kokkos::View + typedef Kokkos::View internal_view_type; - ScatterView() {} + ScatterView() = default; template ScatterView(View const& original_view) : unique_token() { @@ -1011,14 +1076,29 @@ class ScatterView + KOKKOS_FUNCTION ScatterView( + const ScatterView& other_view) + : unique_token(other_view.unique_token), + internal_view(other_view.internal_view) {} + + template + KOKKOS_FUNCTION void operator=( + const ScatterView& other_view) { + unique_token = other_view.unique_token; + internal_view = other_view.internal_view; + } + template KOKKOS_FORCEINLINE_FUNCTION - ScatterAccess access() const { - return ScatterAccess{*this}; + override_contribution>(*this); } typename Kokkos::Impl::Experimental::Slice::value, + memory_space, typename dest_type::memory_space>::value, "ScatterView deep_copy destination memory space not accessible"); auto extent = internal_view.extent(internal_view_type::rank - 1); bool is_equal = (dest.data() == internal_view.data()); size_t start = is_equal ? 1 : 0; - Kokkos::Impl::Experimental::ReduceDuplicates( + Kokkos::Impl::Experimental::ReduceDuplicates( internal_view.data(), dest.data(), internal_view.stride(internal_view_type::rank - 1), start, extent, internal_view.label()); } void reset() { - Kokkos::Impl::Experimental::ResetDuplicates( + Kokkos::Impl::Experimental::ResetDuplicates( internal_view.data(), internal_view.size(), internal_view.label()); } template @@ -1066,8 +1145,8 @@ class ScatterView( + Kokkos::Impl::Experimental::ResetDuplicates( internal_view.data() + view.size(), internal_view.size() - view.size(), internal_view.label()); } @@ -1103,7 +1182,7 @@ class ScatterView + execution_space, Kokkos::Experimental::UniqueTokenScope::Global> unique_token_type; unique_token_type unique_token; @@ -1120,17 +1199,17 @@ class ScatterView -class ScatterAccess { public: - typedef ScatterView view_type; typedef typename view_type::original_value_type original_value_type; - typedef Kokkos::Impl::Experimental::ScatterValue + typedef Kokkos::Impl::Experimental::ScatterValue< + original_value_type, Op, DeviceType, override_contribution> value_type; KOKKOS_FORCEINLINE_FUNCTION @@ -1184,7 +1263,7 @@ template ScatterView< RT, typename ViewTraits::array_layout, - typename ViewTraits::execution_space, + typename ViewTraits::device_type, Op /* just setting defaults if not specified... things got messy because the view type does not come before the duplication/contribution settings in diff --git a/containers/src/Kokkos_StaticCrsGraph.hpp b/containers/src/Kokkos_StaticCrsGraph.hpp index 26c16c4e003..c11413d6278 100644 --- a/containers/src/Kokkos_StaticCrsGraph.hpp +++ b/containers/src/Kokkos_StaticCrsGraph.hpp @@ -112,7 +112,7 @@ struct StaticCrsGraphBalancerFunctor { } } else { if ((count >= (current_block + 1) * cost_per_workset) || - (iRow + 2 == row_offsets.extent(0))) { + (iRow + 2 == int_type(row_offsets.extent(0)))) { if (end_block > current_block + 1) { int_type num_block = end_block - current_block; row_block_offsets(current_block + 1) = iRow; @@ -358,8 +358,8 @@ class StaticCrsGraph { /** \brief Destroy this view of the array. * If the last view then allocated memory is deallocated. */ - KOKKOS_INLINE_FUNCTION - ~StaticCrsGraph() {} + KOKKOS_DEFAULTED_FUNCTION + ~StaticCrsGraph() = default; /** \brief Return number of rows in the graph */ @@ -396,7 +396,7 @@ class StaticCrsGraph { const data_type count = static_cast(row_map(i + 1) - start); if (count == 0) { - return GraphRowViewConst(NULL, 1, 0); + return GraphRowViewConst(nullptr, 1, 0); } else { return GraphRowViewConst(entries, 1, count, start); } @@ -414,9 +414,10 @@ class StaticCrsGraph { row_map_type, View > partitioner(row_map, block_offsets, fix_cost_per_row, num_blocks); - Kokkos::parallel_for(Kokkos::RangePolicy(0, numRows()), + Kokkos::parallel_for("Kokkos::StaticCrsGraph::create_block_partitioning", + Kokkos::RangePolicy(0, numRows()), partitioner); - Kokkos::fence(); + typename device_type::execution_space().fence(); row_block_offsets = block_offsets; } @@ -522,7 +523,8 @@ DataType maximum_entry(const StaticCrsGraph FunctorType; DataType result = 0; - Kokkos::parallel_reduce(graph.entries.extent(0), FunctorType(graph), result); + Kokkos::parallel_reduce("Kokkos::maximum_entry", graph.entries.extent(0), + FunctorType(graph), result); return result; } diff --git a/containers/src/Kokkos_UnorderedMap.hpp b/containers/src/Kokkos_UnorderedMap.hpp index 40e8b9e962c..6f0434dd04d 100644 --- a/containers/src/Kokkos_UnorderedMap.hpp +++ b/containers/src/Kokkos_UnorderedMap.hpp @@ -100,7 +100,7 @@ class UnorderedMapInsertResult { KOKKOS_FORCEINLINE_FUNCTION bool existing() const { return (m_status & EXISTING); } - /// Did the map fail to insert the key due to insufficent capacity + /// Did the map fail to insert the key due to insufficient capacity KOKKOS_FORCEINLINE_FUNCTION bool failed() const { return m_index == UnorderedMapInvalidIndex; } @@ -201,9 +201,9 @@ class UnorderedMapInsertResult { /// template ::type>, + typename Hasher = pod_hash::type>, typename EqualTo = - pod_equal_to::type> > + pod_equal_to::type> > class UnorderedMap { private: typedef typename ViewTraits::host_mirror_space @@ -215,13 +215,13 @@ class UnorderedMap { // key_types typedef Key declared_key_type; - typedef typename Impl::remove_const::type key_type; - typedef typename Impl::add_const::type const_key_type; + typedef typename std::remove_const::type key_type; + typedef typename std::add_const::type const_key_type; // value_types typedef Value declared_value_type; - typedef typename Impl::remove_const::type value_type; - typedef typename Impl::add_const::type const_value_type; + typedef typename std::remove_const::type value_type; + typedef typename std::add_const::type const_value_type; typedef Device device_type; typedef typename Device::execution_space execution_space; @@ -296,25 +296,13 @@ class UnorderedMap { //! \name Public member functions //@{ - UnorderedMap() - : m_bounded_insert(), - m_hasher(), - m_equal_to(), - m_size(), - m_available_indexes(), - m_hash_lists(), - m_next_index(), - m_keys(), - m_values(), - m_scalars() {} - /// \brief Constructor /// /// \param capacity_hint [in] Initial guess of how many unique keys will be /// inserted into the map \param hash [in] Hasher function for \c Key /// instances. The /// default value usually suffices. - UnorderedMap(size_type capacity_hint, hasher_type hasher = hasher_type(), + UnorderedMap(size_type capacity_hint = 0, hasher_type hasher = hasher_type(), equal_to_type equal_to = equal_to_type()) : m_bounded_insert(true), m_hasher(hasher), @@ -689,7 +677,7 @@ class UnorderedMap { template UnorderedMap( UnorderedMap const &src, - typename Impl::enable_if< + typename std::enable_if< Impl::UnorderedMapCanAssign::value, int>::type = 0) @@ -705,7 +693,7 @@ class UnorderedMap { m_scalars(src.m_scalars) {} template - typename Impl::enable_if< + typename std::enable_if< Impl::UnorderedMapCanAssign::value, declared_map_type &>::type @@ -724,9 +712,9 @@ class UnorderedMap { } template - typename Impl::enable_if< - std::is_same::type, key_type>::value && - std::is_same::type, + typename std::enable_if< + std::is_same::type, key_type>::value && + std::is_same::type, value_type>::value>::type create_copy_view( UnorderedMap const &src) { diff --git a/containers/src/Kokkos_Vector.hpp b/containers/src/Kokkos_Vector.hpp index 8962485abe6..02c3e44fc48 100644 --- a/containers/src/Kokkos_Vector.hpp +++ b/containers/src/Kokkos_Vector.hpp @@ -109,7 +109,7 @@ class vector : public DualView { void resize(size_t n, const Scalar& val) { assign(n, val); } void assign(size_t n, const Scalar& val) { - /* Resize if necessary (behavour of std:vector) */ + /* Resize if necessary (behavior of std:vector) */ if (n > span()) DV::resize(size_t(n * _extra_storage)); _size = n; @@ -118,12 +118,12 @@ class vector : public DualView { if (DV::template need_sync()) { set_functor_host f(DV::h_view, val); - parallel_for(n, f); + parallel_for("Kokkos::vector::assign", n, f); typename DV::t_host::execution_space().fence(); DV::template modify(); } else { set_functor f(DV::d_view, val); - parallel_for(n, f); + parallel_for("Kokkos::vector::assign", n, f); typename DV::t_dev::execution_space().fence(); DV::template modify(); } @@ -234,7 +234,7 @@ class vector : public DualView { const_reference back() const { return DV::h_view(_size - 1); } - /* std::algorithms wich work originally with iterators, here they are + /* std::algorithms which work originally with iterators, here they are * implemented as member functions */ size_t lower_bound(const size_t& start, const size_t& theEnd, diff --git a/containers/src/impl/Kokkos_Bitset_impl.hpp b/containers/src/impl/Kokkos_Bitset_impl.hpp index f5fa4d518a0..6fd4319a822 100644 --- a/containers/src/impl/Kokkos_Bitset_impl.hpp +++ b/containers/src/impl/Kokkos_Bitset_impl.hpp @@ -77,7 +77,8 @@ struct BitsetCount { size_type apply() const { size_type count = 0u; - parallel_reduce(m_bitset.m_blocks.extent(0), *this, count); + parallel_reduce("Kokkos::Impl::BitsetCount::apply", + m_bitset.m_blocks.extent(0), *this, count); return count; } diff --git a/containers/src/impl/Kokkos_StaticCrsGraph_factory.hpp b/containers/src/impl/Kokkos_StaticCrsGraph_factory.hpp index d644c57c0ac..a5fb9990f6a 100644 --- a/containers/src/impl/Kokkos_StaticCrsGraph_factory.hpp +++ b/containers/src/impl/Kokkos_StaticCrsGraph_factory.hpp @@ -58,8 +58,8 @@ inline typename StaticCrsGraph& view, - typename Impl::enable_if::is_hostspace>::type* = 0) { + typename std::enable_if::is_hostspace>::type* = 0) { return view; } #else @@ -70,8 +70,8 @@ inline typename StaticCrsGraph& view, - typename Impl::enable_if::is_hostspace>::type* = 0) { + typename std::enable_if::is_hostspace>::type* = 0) { return view; } #endif @@ -128,8 +128,8 @@ inline typename StaticCrsGraph& view, - typename Impl::enable_if::is_hostspace>::type* = 0) + typename std::enable_if::is_hostspace>::type* = 0) #else template @@ -138,8 +138,8 @@ inline typename StaticCrsGraph& view, - typename Impl::enable_if::is_hostspace>::type* = 0) + typename std::enable_if::is_hostspace>::type* = 0) #endif { return create_mirror(view); diff --git a/containers/src/impl/Kokkos_UnorderedMap_impl.hpp b/containers/src/impl/Kokkos_UnorderedMap_impl.hpp index 55e76c424df..813936575cf 100644 --- a/containers/src/impl/Kokkos_UnorderedMap_impl.hpp +++ b/containers/src/impl/Kokkos_UnorderedMap_impl.hpp @@ -71,7 +71,10 @@ struct UnorderedMapRehash { UnorderedMapRehash(map_type const& dst, const_map_type const& src) : m_dst(dst), m_src(src) {} - void apply() const { parallel_for(m_src.capacity(), *this); } + void apply() const { + parallel_for("Kokkos::Impl::UnorderedMapRehash::apply", m_src.capacity(), + *this); + } KOKKOS_INLINE_FUNCTION void operator()(size_type i) const { @@ -91,7 +94,10 @@ struct UnorderedMapErase { UnorderedMapErase(map_type const& map) : m_map(map) {} - void apply() const { parallel_for(m_map.m_hash_lists.extent(0), *this); } + void apply() const { + parallel_for("Kokkos::Impl::UnorderedMapErase::apply", + m_map.m_hash_lists.extent(0), *this); + } KOKKOS_INLINE_FUNCTION void operator()(size_type i) const { @@ -152,7 +158,10 @@ struct UnorderedMapHistogram { m_distance("UnorderedMap Histogram"), m_block_distance("UnorderedMap Histogram") {} - void calculate() { parallel_for(m_map.m_hash_lists.extent(0), *this); } + void calculate() { + parallel_for("Kokkos::Impl::UnorderedMapHistogram::calculate", + m_map.m_hash_lists.extent(0), *this); + } void clear() { Kokkos::deep_copy(m_length, 0); @@ -229,7 +238,10 @@ struct UnorderedMapPrint { UnorderedMapPrint(map_type const& map) : m_map(map) {} - void apply() { parallel_for(m_map.m_hash_lists.extent(0), *this); } + void apply() { + parallel_for("Kokkos::Impl::UnorderedMapPrint::apply", + m_map.m_hash_lists.extent(0), *this); + } KOKKOS_INLINE_FUNCTION void operator()(size_type i) const { @@ -245,21 +257,22 @@ struct UnorderedMapPrint { }; template -struct UnorderedMapCanAssign : public false_ {}; +struct UnorderedMapCanAssign : public std::false_type {}; template -struct UnorderedMapCanAssign : public true_ {}; +struct UnorderedMapCanAssign : public std::true_type {}; template -struct UnorderedMapCanAssign : public true_ {}; +struct UnorderedMapCanAssign + : public std::true_type {}; template struct UnorderedMapCanAssign - : public true_ {}; + : public std::true_type {}; template struct UnorderedMapCanAssign - : public true_ {}; + : public std::true_type {}; } // namespace Impl } // namespace Kokkos diff --git a/containers/unit_tests/CMakeLists.txt b/containers/unit_tests/CMakeLists.txt index a83ab1293c1..448cdf43867 100644 --- a/containers/unit_tests/CMakeLists.txt +++ b/containers/unit_tests/CMakeLists.txt @@ -3,7 +3,7 @@ KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) KOKKOS_INCLUDE_DIRECTORIES(REQUIRED_DURING_INSTALLATION_TESTING ${CMAKE_CURRENT_SOURCE_DIR}) KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../src ) -foreach(Tag Threads;Serial;OpenMP;HPX;Cuda) +foreach(Tag Threads;Serial;OpenMP;HPX;Cuda;HIP) # Because there is always an exception to the rule if(Tag STREQUAL "Threads") set(DEVICE "PTHREAD") @@ -13,23 +13,31 @@ foreach(Tag Threads;Serial;OpenMP;HPX;Cuda) string(TOLOWER ${Tag} dir) # Add test for that backend if it is enabled if(Kokkos_ENABLE_${DEVICE}) - KOKKOS_ADD_EXECUTABLE_AND_TEST( - UnitTest_${Tag} - SOURCES - UnitTestMain.cpp - ${dir}/Test${Tag}_BitSet.cpp - ${dir}/Test${Tag}_DualView.cpp - ${dir}/Test${Tag}_DynamicView.cpp - ${dir}/Test${Tag}_DynRankViewAPI_generic.cpp - ${dir}/Test${Tag}_DynRankViewAPI_rank12345.cpp - ${dir}/Test${Tag}_DynRankViewAPI_rank67.cpp - ${dir}/Test${Tag}_ErrorReporter.cpp - ${dir}/Test${Tag}_OffsetView.cpp - ${dir}/Test${Tag}_ScatterView.cpp - ${dir}/Test${Tag}_StaticCrsGraph.cpp - ${dir}/Test${Tag}_UnorderedMap.cpp - ${dir}/Test${Tag}_Vector.cpp - ${dir}/Test${Tag}_ViewCtorPropEmbeddedDim.cpp + set(UnitTestSources UnitTestMain.cpp) + set(dir ${CMAKE_CURRENT_BINARY_DIR}/${dir}) + file(MAKE_DIRECTORY ${dir}) + foreach(Name + Bitset + DualView + DynamicView + DynViewAPI_generic + DynViewAPI_rank12345 + DynViewAPI_rank67 + ErrorReporter + OffsetView + ScatterView + StaticCrsGraph + UnorderedMap + Vector + ViewCtorPropEmbeddedDim + ) + set(file ${dir}/Test${Tag}_${Name}.cpp) + file(WRITE ${file} + "#include \n" + "#include \n" ) + list(APPEND UnitTestSources ${file}) + endforeach() + KOKKOS_ADD_EXECUTABLE_AND_TEST(UnitTest_${Tag} SOURCES ${UnitTestSources}) endif() endforeach() diff --git a/containers/unit_tests/Makefile b/containers/unit_tests/Makefile index a7e0233f8a5..308b5aa8b5f 100644 --- a/containers/unit_tests/Makefile +++ b/containers/unit_tests/Makefile @@ -9,7 +9,7 @@ vpath %.cpp ${KOKKOS_PATH}/containers/unit_tests/serial vpath %.cpp ${KOKKOS_PATH}/containers/unit_tests/threads vpath %.cpp ${KOKKOS_PATH}/containers/unit_tests/rocm vpath %.cpp ${KOKKOS_PATH}/containers/unit_tests/cuda - +vpath %.cpp ${CURDIR} default: build_all echo "End Build" @@ -31,14 +31,24 @@ KOKKOS_CXXFLAGS += -I$(GTEST_PATH) -I${KOKKOS_PATH}/containers/unit_tests TEST_TARGETS = TARGETS = +TESTS = Bitset DualView DynamicView DynViewAPI_generic DynViewAPI_rank12345 DynViewAPI_rank67 ErrorReporter OffsetView ScatterView StaticCrsGraph UnorderedMap Vector ViewCtorPropEmbeddedDim +tmp := $(foreach device, $(KOKKOS_DEVICELIST), \ + tmp2 := $(foreach test, $(TESTS), \ + $(if $(filter Test$(device)_$(test).cpp, $(shell ls Test$(device)_$(test).cpp 2>/dev/null)),,\ + $(shell echo "\#include" > Test$(device)_$(test).cpp); \ + $(shell echo "\#include" >> Test$(device)_$(test).cpp); \ + )\ + ) \ +) + ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) OBJ_CUDA = UnitTestMain.o gtest-all.o - OBJ_CUDA += TestCuda_BitSet.o + OBJ_CUDA += TestCuda_Bitset.o OBJ_CUDA += TestCuda_DualView.o OBJ_CUDA += TestCuda_DynamicView.o - OBJ_CUDA += TestCuda_DynRankViewAPI_generic.o - OBJ_CUDA += TestCuda_DynRankViewAPI_rank12345.o - OBJ_CUDA += TestCuda_DynRankViewAPI_rank67.o + OBJ_CUDA += TestCuda_DynViewAPI_generic.o + OBJ_CUDA += TestCuda_DynViewAPI_rank12345.o + OBJ_CUDA += TestCuda_DynViewAPI_rank67.o OBJ_CUDA += TestCuda_ErrorReporter.o OBJ_CUDA += TestCuda_OffsetView.o OBJ_CUDA += TestCuda_ScatterView.o @@ -50,33 +60,14 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) TEST_TARGETS += test-cuda endif -ifeq ($(KOKKOS_INTERNAL_USE_ROCM), 1) - OBJ_ROCM = UnitTestMain.o gtest-all.o - OBJ_ROCM += TestROCm_BitSet.o - OBJ_ROCM += TestROCm_DualView.o - OBJ_ROCM += TestROCm_DynamicView.o - OBJ_ROCM += TestROCm_DynRankViewAPI_generic.o - OBJ_ROCM += TestROCm_DynRankViewAPI_rank12345.o - OBJ_ROCM += TestROCm_DynRankViewAPI_rank67.o - OBJ_ROCM += TestROCm_ErrorReporter.o - OBJ_ROCM += TestROCm_OffsetView.o - OBJ_ROCM += TestROCm_ScatterView.o - OBJ_ROCM += TestROCm_StaticCrsGraph.o - OBJ_ROCM += TestROCm_UnorderedMap.o - OBJ_ROCM += TestROCm_Vector.o - OBJ_ROCM += TestROCm_ViewCtorPropEmbeddedDim.o - TARGETS += KokkosContainers_UnitTest_ROCm - TEST_TARGETS += test-rocm -endif - ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1) OBJ_THREADS = UnitTestMain.o gtest-all.o - OBJ_THREADS += TestThreads_BitSet.o + OBJ_THREADS += TestThreads_Bitset.o OBJ_THREADS += TestThreads_DualView.o OBJ_THREADS += TestThreads_DynamicView.o - OBJ_THREADS += TestThreads_DynRankViewAPI_generic.o - OBJ_THREADS += TestThreads_DynRankViewAPI_rank12345.o - OBJ_THREADS += TestThreads_DynRankViewAPI_rank67.o + OBJ_THREADS += TestThreads_DynViewAPI_generic.o + OBJ_THREADS += TestThreads_DynViewAPI_rank12345.o + OBJ_THREADS += TestThreads_DynViewAPI_rank67.o OBJ_THREADS += TestThreads_ErrorReporter.o OBJ_THREADS += TestThreads_OffsetView.o OBJ_THREADS += TestThreads_ScatterView.o @@ -90,12 +81,12 @@ endif ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) OBJ_OPENMP = UnitTestMain.o gtest-all.o - OBJ_OPENMP += TestOpenMP_BitSet.o + OBJ_OPENMP += TestOpenMP_Bitset.o OBJ_OPENMP += TestOpenMP_DualView.o OBJ_OPENMP += TestOpenMP_DynamicView.o - OBJ_OPENMP += TestOpenMP_DynRankViewAPI_generic.o - OBJ_OPENMP += TestOpenMP_DynRankViewAPI_rank12345.o - OBJ_OPENMP += TestOpenMP_DynRankViewAPI_rank67.o + OBJ_OPENMP += TestOpenMP_DynViewAPI_generic.o + OBJ_OPENMP += TestOpenMP_DynViewAPI_rank12345.o + OBJ_OPENMP += TestOpenMP_DynViewAPI_rank67.o OBJ_OPENMP += TestOpenMP_ErrorReporter.o OBJ_OPENMP += TestOpenMP_OffsetView.o OBJ_OPENMP += TestOpenMP_ScatterView.o @@ -109,12 +100,12 @@ endif ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1) OBJ_HPX = UnitTestMain.o gtest-all.o - OBJ_HPX += TestHPX_BitSet.o + OBJ_HPX += TestHPX_Bitset.o OBJ_HPX += TestHPX_DualView.o OBJ_HPX += TestHPX_DynamicView.o - OBJ_HPX += TestHPX_DynRankViewAPI_generic.o - OBJ_HPX += TestHPX_DynRankViewAPI_rank12345.o - OBJ_HPX += TestHPX_DynRankViewAPI_rank67.o + OBJ_HPX += TestHPX_DynViewAPI_generic.o + OBJ_HPX += TestHPX_DynViewAPI_rank12345.o + OBJ_HPX += TestHPX_DynViewAPI_rank67.o OBJ_HPX += TestHPX_ErrorReporter.o OBJ_HPX += TestHPX_OffsetView.o OBJ_HPX += TestHPX_ScatterView.o @@ -128,12 +119,12 @@ endif ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1) OBJ_SERIAL = UnitTestMain.o gtest-all.o - OBJ_SERIAL += TestSerial_BitSet.o + OBJ_SERIAL += TestSerial_Bitset.o OBJ_SERIAL += TestSerial_DualView.o OBJ_SERIAL += TestSerial_DynamicView.o - OBJ_SERIAL += TestSerial_DynRankViewAPI_generic.o - OBJ_SERIAL += TestSerial_DynRankViewAPI_rank12345.o - OBJ_SERIAL += TestSerial_DynRankViewAPI_rank67.o + OBJ_SERIAL += TestSerial_DynViewAPI_generic.o + OBJ_SERIAL += TestSerial_DynViewAPI_rank12345.o + OBJ_SERIAL += TestSerial_DynViewAPI_rank67.o OBJ_SERIAL += TestSerial_ErrorReporter.o OBJ_SERIAL += TestSerial_OffsetView.o OBJ_SERIAL += TestSerial_ScatterView.o @@ -148,9 +139,6 @@ endif KokkosContainers_UnitTest_Cuda: $(OBJ_CUDA) $(KOKKOS_LINK_DEPENDS) $(LINK) $(EXTRA_PATH) $(OBJ_CUDA) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosContainers_UnitTest_Cuda -KokkosContainers_UnitTest_ROCm: $(OBJ_ROCM) $(KOKKOS_LINK_DEPENDS) - $(LINK) $(EXTRA_PATH) $(OBJ_ROCM) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosContainers_UnitTest_ROCm - KokkosContainers_UnitTest_Threads: $(OBJ_THREADS) $(KOKKOS_LINK_DEPENDS) $(LINK) $(EXTRA_PATH) $(OBJ_THREADS) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosContainers_UnitTest_Threads @@ -166,9 +154,6 @@ KokkosContainers_UnitTest_Serial: $(OBJ_SERIAL) $(KOKKOS_LINK_DEPENDS) test-cuda: KokkosContainers_UnitTest_Cuda ./KokkosContainers_UnitTest_Cuda -test-rocm: KokkosContainers_UnitTest_ROCm - ./KokkosContainers_UnitTest_ROCm - test-threads: KokkosContainers_UnitTest_Threads ./KokkosContainers_UnitTest_Threads @@ -186,7 +171,7 @@ build_all: $(TARGETS) test: $(TEST_TARGETS) clean: kokkos-clean - rm -f *.o $(TARGETS) + rm -f *.o $(TARGETS) *.cpp # Compilation rules diff --git a/containers/unit_tests/TestBitset.hpp b/containers/unit_tests/TestBitset.hpp index 70528880a42..661a1365cb5 100644 --- a/containers/unit_tests/TestBitset.hpp +++ b/containers/unit_tests/TestBitset.hpp @@ -192,7 +192,7 @@ void test_bitset() { bitset_type bitset(test_sizes[i]); - // std::cout << " Check inital count " << std::endl; + // std::cout << " Check initial count " << std::endl; // nothing should be set { Impl::TestBitsetTest f(bitset); @@ -253,8 +253,10 @@ void test_bitset() { } } +// FIXME_HIP deadlock +#ifndef KOKKOS_ENABLE_HIP TEST(TEST_CATEGORY, bitset) { test_bitset(); } - +#endif } // namespace Test #endif // KOKKOS_TEST_BITSET_HPP diff --git a/containers/unit_tests/cuda/TestCuda_Category.hpp b/containers/unit_tests/TestCuda_Category.hpp similarity index 100% rename from containers/unit_tests/cuda/TestCuda_Category.hpp rename to containers/unit_tests/TestCuda_Category.hpp diff --git a/containers/unit_tests/TestDualView.hpp b/containers/unit_tests/TestDualView.hpp index 572ef48839f..665f2511580 100644 --- a/containers/unit_tests/TestDualView.hpp +++ b/containers/unit_tests/TestDualView.hpp @@ -67,11 +67,17 @@ struct test_dualview_combinations { Scalar result; template - Scalar run_me(unsigned int n, unsigned int m) { + Scalar run_me(unsigned int n, unsigned int m, bool with_init) { if (n < 10) n = 10; if (m < 3) m = 3; - ViewType a("A", n, m); + ViewType a; + + if (with_init) { + a = ViewType("A", n, m); + } else { + a = ViewType(Kokkos::ViewAllocateWithoutInitializing("A"), n, m); + } Kokkos::deep_copy(a.d_view, 1); a.template modify(); @@ -96,9 +102,9 @@ struct test_dualview_combinations { return count - a.d_view.extent(0) * a.d_view.extent(1) - 2 - 4 - 3 * 2; } - test_dualview_combinations(unsigned int size) { + test_dualview_combinations(unsigned int size, bool with_init) { result = run_me >( - size, 3); + size, 3, with_init); } }; @@ -124,18 +130,25 @@ struct test_dual_view_deep_copy { typedef Device execution_space; template - void run_me() { - const unsigned int n = 10; - const unsigned int m = 5; - const unsigned int sum_total = n * m; - - ViewType a("A", n, m); - ViewType b("B", n, m); + void run_me(int n, const int m, const bool use_templ_sync) { + ViewType a, b; + if (n >= 0) { + a = ViewType("A", n, m); + b = ViewType("B", n, m); + } else { + n = 0; + } + const scalar_type sum_total = scalar_type(n * m); Kokkos::deep_copy(a.d_view, 1); - a.template modify(); - a.template sync(); + if (use_templ_sync) { + a.template modify(); + a.template sync(); + } else { + a.modify_device(); + a.sync_host(); + } // Check device view is initialized as expected scalar_type a_d_sum = 0; @@ -159,7 +172,11 @@ struct test_dual_view_deep_copy { // Test deep_copy Kokkos::deep_copy(b, a); - b.template sync(); + if (use_templ_sync) { + b.template sync(); + } else { + b.sync_host(); + } // Perform same checks on b as done on a // Check device view is initialized as expected @@ -183,6 +200,145 @@ struct test_dual_view_deep_copy { } // end run_me test_dual_view_deep_copy() { + run_me >(10, 5, + true); + run_me >(10, 5, + false); + // Test zero length but allocated (a.d_view.data!=nullptr but + // a.d_view.span()==0) + run_me >(0, 5, true); + run_me >(0, 5, + false); + + // Test default constructed view + run_me >(-1, 5, + true); + run_me >(-1, 5, + false); + } +}; + +template +struct test_dualview_resize { + typedef Scalar scalar_type; + typedef Device execution_space; + + template + void run_me() { + const unsigned int n = 10; + const unsigned int m = 5; + const unsigned int factor = 2; + + ViewType a("A", n, m); + Kokkos::deep_copy(a.d_view, 1); + + /* Covers case "Resize on Device" */ + a.modify_device(); + Kokkos::resize(a, factor * n, factor * m); + ASSERT_EQ(a.extent(0), n * factor); + ASSERT_EQ(a.extent(1), m * factor); + + Kokkos::deep_copy(a.d_view, 1); + a.sync_host(); + + // Check device view is initialized as expected + scalar_type a_d_sum = 0; + // Execute on the execution_space associated with t_dev's memory space + typedef typename ViewType::t_dev::memory_space::execution_space + t_dev_exec_space; + Kokkos::parallel_reduce( + Kokkos::RangePolicy(0, a.d_view.extent(0)), + SumViewEntriesFunctor(a.d_view), + a_d_sum); + + // Check host view is synced as expected + scalar_type a_h_sum = 0; + for (size_t i = 0; i < a.h_view.extent(0); ++i) + for (size_t j = 0; j < a.h_view.extent(1); ++j) { + a_h_sum += a.h_view(i, j); + } + + // Check + ASSERT_EQ(a_h_sum, a_d_sum); + ASSERT_EQ(a_h_sum, a.extent(0) * a.extent(1)); + + /* Covers case "Resize on Host" */ + a.modify_host(); + + Kokkos::resize(a, n / factor, m / factor); + ASSERT_EQ(a.extent(0), n / factor); + ASSERT_EQ(a.extent(1), m / factor); + + a.sync_device(); + + // Check device view is initialized as expected + a_d_sum = 0; + // Execute on the execution_space associated with t_dev's memory space + typedef typename ViewType::t_dev::memory_space::execution_space + t_dev_exec_space; + Kokkos::parallel_reduce( + Kokkos::RangePolicy(0, a.d_view.extent(0)), + SumViewEntriesFunctor(a.d_view), + a_d_sum); + + // Check host view is synced as expected + a_h_sum = 0; + for (size_t i = 0; i < a.h_view.extent(0); ++i) + for (size_t j = 0; j < a.h_view.extent(1); ++j) { + a_h_sum += a.h_view(i, j); + } + + // Check + ASSERT_EQ(a_h_sum, a.extent(0) * a.extent(1)); + ASSERT_EQ(a_h_sum, a_d_sum); + + } // end run_me + + test_dualview_resize() { + run_me >(); + } +}; + +template +struct test_dualview_realloc { + typedef Scalar scalar_type; + typedef Device execution_space; + + template + void run_me() { + const unsigned int n = 10; + const unsigned int m = 5; + + ViewType a("A", n, m); + Kokkos::realloc(a, n, m); + + Kokkos::deep_copy(a.d_view, 1); + a.modify_device(); + a.sync_host(); + + // Check device view is initialized as expected + scalar_type a_d_sum = 0; + // Execute on the execution_space associated with t_dev's memory space + typedef typename ViewType::t_dev::memory_space::execution_space + t_dev_exec_space; + Kokkos::parallel_reduce( + Kokkos::RangePolicy(0, a.d_view.extent(0)), + SumViewEntriesFunctor(a.d_view), + a_d_sum); + + // Check host view is synced as expected + scalar_type a_h_sum = 0; + for (size_t i = 0; i < a.h_view.extent(0); ++i) + for (size_t j = 0; j < a.h_view.extent(1); ++j) { + a_h_sum += a.h_view(i, j); + } + + // Check + ASSERT_EQ(a_h_sum, a.extent(0) * a.extent(1)); + ASSERT_EQ(a_h_sum, a_d_sum); + } // end run_me + + test_dualview_realloc() { run_me >(); } }; @@ -190,8 +346,8 @@ struct test_dual_view_deep_copy { } // namespace Impl template -void test_dualview_combinations(unsigned int size) { - Impl::test_dualview_combinations test(size); +void test_dualview_combinations(unsigned int size, bool with_init) { + Impl::test_dualview_combinations test(size, with_init); ASSERT_EQ(test.result, 0); } @@ -200,8 +356,22 @@ void test_dualview_deep_copy() { Impl::test_dual_view_deep_copy(); } +template +void test_dualview_realloc() { + Impl::test_dualview_realloc(); +} + +template +void test_dualview_resize() { + Impl::test_dualview_resize(); +} + TEST(TEST_CATEGORY, dualview_combination) { - test_dualview_combinations(10); + test_dualview_combinations(10, true); +} + +TEST(TEST_CATEGORY, dualview_combinations_without_init) { + test_dualview_combinations(10, false); } TEST(TEST_CATEGORY, dualview_deep_copy) { @@ -209,6 +379,14 @@ TEST(TEST_CATEGORY, dualview_deep_copy) { test_dualview_deep_copy(); } +TEST(TEST_CATEGORY, dualview_realloc) { + test_dualview_realloc(); +} + +TEST(TEST_CATEGORY, dualview_resize) { + test_dualview_resize(); +} + } // namespace Test -#endif // KOKKOS_TEST_UNORDERED_MAP_HPP +#endif // KOKKOS_TEST_DUALVIEW_HPP diff --git a/containers/unit_tests/TestDynViewAPI.hpp b/containers/unit_tests/TestDynViewAPI.hpp index 3692aa8a125..5c1d0229cba 100644 --- a/containers/unit_tests/TestDynViewAPI.hpp +++ b/containers/unit_tests/TestDynViewAPI.hpp @@ -706,8 +706,6 @@ class TestDynViewAPI { typedef typename View0::host_mirror_space host_view_space; - TestDynViewAPI() {} - static void run_tests() { run_test_resize_realloc(); run_test_mirror(); @@ -1078,12 +1076,12 @@ class TestDynViewAPI { ASSERT_TRUE(Kokkos::is_dyn_rank_view::value); ASSERT_FALSE(Kokkos::is_dyn_rank_view >::value); - ASSERT_TRUE(dx.data() == 0); // Okay with UVM - ASSERT_TRUE(dy.data() == 0); // Okay with UVM - ASSERT_TRUE(dz.data() == 0); // Okay with UVM - ASSERT_TRUE(hx.data() == 0); - ASSERT_TRUE(hy.data() == 0); - ASSERT_TRUE(hz.data() == 0); + ASSERT_TRUE(dx.data() == nullptr); // Okay with UVM + ASSERT_TRUE(dy.data() == nullptr); // Okay with UVM + ASSERT_TRUE(dz.data() == nullptr); // Okay with UVM + ASSERT_TRUE(hx.data() == nullptr); + ASSERT_TRUE(hy.data() == nullptr); + ASSERT_TRUE(hz.data() == nullptr); ASSERT_EQ(dx.extent(0), 0u); // Okay with UVM ASSERT_EQ(dy.extent(0), 0u); // Okay with UVM ASSERT_EQ(dz.extent(0), 0u); // Okay with UVM @@ -1154,11 +1152,11 @@ class TestDynViewAPI { ASSERT_EQ(dx.use_count(), size_t(2)); - ASSERT_FALSE(dx.data() == 0); - ASSERT_FALSE(const_dx.data() == 0); - ASSERT_FALSE(unmanaged_dx.data() == 0); - ASSERT_FALSE(unmanaged_from_ptr_dx.data() == 0); - ASSERT_FALSE(dy.data() == 0); + ASSERT_FALSE(dx.data() == nullptr); + ASSERT_FALSE(const_dx.data() == nullptr); + ASSERT_FALSE(unmanaged_dx.data() == nullptr); + ASSERT_FALSE(unmanaged_from_ptr_dx.data() == nullptr); + ASSERT_FALSE(dy.data() == nullptr); ASSERT_NE(dx, dy); ASSERT_EQ(dx.extent(0), unsigned(N0)); @@ -1318,17 +1316,17 @@ class TestDynViewAPI { ASSERT_NE(dx, dz); dx = dView0(); - ASSERT_TRUE(dx.data() == 0); - ASSERT_FALSE(dy.data() == 0); - ASSERT_FALSE(dz.data() == 0); + ASSERT_TRUE(dx.data() == nullptr); + ASSERT_FALSE(dy.data() == nullptr); + ASSERT_FALSE(dz.data() == nullptr); dy = dView0(); - ASSERT_TRUE(dx.data() == 0); - ASSERT_TRUE(dy.data() == 0); - ASSERT_FALSE(dz.data() == 0); + ASSERT_TRUE(dx.data() == nullptr); + ASSERT_TRUE(dy.data() == nullptr); + ASSERT_FALSE(dz.data() == nullptr); dz = dView0(); - ASSERT_TRUE(dx.data() == 0); - ASSERT_TRUE(dy.data() == 0); - ASSERT_TRUE(dz.data() == 0); + ASSERT_TRUE(dx.data() == nullptr); + ASSERT_TRUE(dy.data() == nullptr); + ASSERT_TRUE(dz.data() == nullptr); // View - DynRankView Interoperability tests // deep_copy from view to dynrankview diff --git a/containers/unit_tests/TestDynViewAPI_generic.hpp b/containers/unit_tests/TestDynViewAPI_generic.hpp index 90ca5df194d..b3e2812b440 100644 --- a/containers/unit_tests/TestDynViewAPI_generic.hpp +++ b/containers/unit_tests/TestDynViewAPI_generic.hpp @@ -44,7 +44,10 @@ #include namespace Test { +// FIXME_HIP attempt to access inaccessible memory space +#ifndef KOKKOS_ENABLE_HIP TEST(TEST_CATEGORY, dyn_rank_view_api_generic) { TestDynViewAPI::run_tests(); } +#endif } // namespace Test diff --git a/containers/unit_tests/TestDynViewAPI_rank12345.hpp b/containers/unit_tests/TestDynViewAPI_rank12345.hpp index 050ebbe35ca..86a2e4e9548 100644 --- a/containers/unit_tests/TestDynViewAPI_rank12345.hpp +++ b/containers/unit_tests/TestDynViewAPI_rank12345.hpp @@ -45,7 +45,10 @@ #include namespace Test { +// FIXME_HIP failing with wrong value +#ifndef KOKKOS_ENABLE_HIP TEST(TEST_CATEGORY, dyn_rank_view_api_operator_rank12345) { TestDynViewAPI::run_operator_test_rank12345(); } +#endif } // namespace Test diff --git a/containers/unit_tests/TestDynamicView.hpp b/containers/unit_tests/TestDynamicView.hpp index 235464ef07e..8eabbcb3718 100644 --- a/containers/unit_tests/TestDynamicView.hpp +++ b/containers/unit_tests/TestDynamicView.hpp @@ -79,7 +79,6 @@ struct TestDynamicView { ASSERT_EQ(da.size(), da_size); #if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) -#if !defined(KOKKOS_ENABLE_CUDA) || (8000 <= CUDA_VERSION) Kokkos::parallel_for( Kokkos::RangePolicy(0, da_size), KOKKOS_LAMBDA(const int i) { da(i) = Scalar(i); }); @@ -93,7 +92,6 @@ struct TestDynamicView { result_sum); ASSERT_EQ(result_sum, (value_type)(da_size * (da_size - 1) / 2)); -#endif #endif // add 3x more entries i.e. 4x larger than previous size @@ -103,7 +101,6 @@ struct TestDynamicView { ASSERT_EQ(da.size(), da_resize); #if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) -#if !defined(KOKKOS_ENABLE_CUDA) || (8000 <= CUDA_VERSION) Kokkos::parallel_for( Kokkos::RangePolicy(da_size, da_resize), KOKKOS_LAMBDA(const int i) { da(i) = Scalar(i); }); @@ -118,7 +115,6 @@ struct TestDynamicView { ASSERT_EQ(new_result_sum + result_sum, (value_type)(da_resize * (da_resize - 1) / 2)); -#endif #endif } // end scope @@ -135,7 +131,6 @@ struct TestDynamicView { ASSERT_EQ(da.size(), da_size); #if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) -#if !defined(KOKKOS_ENABLE_CUDA) || (8000 <= CUDA_VERSION) Kokkos::parallel_for( Kokkos::RangePolicy(0, da_size), KOKKOS_LAMBDA(const int i) { da(i) = Scalar(i); }); @@ -149,7 +144,6 @@ struct TestDynamicView { result_sum); ASSERT_EQ(result_sum, (value_type)(da_size * (da_size - 1) / 2)); -#endif #endif // add 3x more entries i.e. 4x larger than previous size @@ -159,7 +153,6 @@ struct TestDynamicView { ASSERT_EQ(da.size(), da_resize); #if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) -#if !defined(KOKKOS_ENABLE_CUDA) || (8000 <= CUDA_VERSION) Kokkos::parallel_for( Kokkos::RangePolicy(da_size, da_resize), KOKKOS_LAMBDA(const int i) { da(i) = Scalar(i); }); @@ -174,7 +167,6 @@ struct TestDynamicView { ASSERT_EQ(new_result_sum + result_sum, (value_type)(da_resize * (da_resize - 1) / 2)); -#endif #endif } // end scope @@ -191,7 +183,6 @@ struct TestDynamicView { ASSERT_EQ(da.size(), da_size); #if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) -#if !defined(KOKKOS_ENABLE_CUDA) || (8000 <= CUDA_VERSION) Kokkos::parallel_for( Kokkos::RangePolicy(0, da_size), KOKKOS_LAMBDA(const int i) { da(i) = Scalar(i); }); @@ -205,7 +196,6 @@ struct TestDynamicView { result_sum); ASSERT_EQ(result_sum, (value_type)(da_size * (da_size - 1) / 2)); -#endif #endif // remove the final 3/4 entries i.e. first 1/4 remain @@ -214,7 +204,6 @@ struct TestDynamicView { ASSERT_EQ(da.size(), da_resize); #if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) -#if !defined(KOKKOS_ENABLE_CUDA) || (8000 <= CUDA_VERSION) Kokkos::parallel_for( Kokkos::RangePolicy(0, da_resize), KOKKOS_LAMBDA(const int i) { da(i) = Scalar(i); }); @@ -228,7 +217,6 @@ struct TestDynamicView { new_result_sum); ASSERT_EQ(new_result_sum, (value_type)(da_resize * (da_resize - 1) / 2)); -#endif #endif } // end scope } diff --git a/containers/unit_tests/TestErrorReporter.hpp b/containers/unit_tests/TestErrorReporter.hpp index 49b71cdea7a..318132500c5 100644 --- a/containers/unit_tests/TestErrorReporter.hpp +++ b/containers/unit_tests/TestErrorReporter.hpp @@ -50,9 +50,13 @@ #include #include +#ifndef M_PI +#define M_PI 3.14159265358979323846 +#endif + namespace Test { -// Just save the data in the report. Informative text goies in the +// Just save the data in the report. Informative text goes in the // operator<<(..). template struct ThreeValReport { @@ -85,7 +89,7 @@ struct ErrorReporterDriverBase { error_reporter_type; error_reporter_type m_errorReporter; - ErrorReporterDriverBase(int reporter_capacity, int test_size) + ErrorReporterDriverBase(int reporter_capacity, int /*test_size*/) : m_errorReporter(reporter_capacity) {} KOKKOS_INLINE_FUNCTION bool error_condition(const int work_idx) const { @@ -176,7 +180,8 @@ struct ErrorReporterDriver : public ErrorReporterDriverBase { } }; -#if defined(KOKKOS_CLASS_LAMBDA) +#if defined(KOKKOS_CLASS_LAMBDA) && \ + (!defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_CUDA_LAMBDA)) template struct ErrorReporterDriverUseLambda : public ErrorReporterDriverBase { @@ -225,7 +230,8 @@ struct ErrorReporterDriverNativeOpenMP }; #endif -#if defined(KOKKOS_CLASS_LAMBDA) +#if defined(KOKKOS_CLASS_LAMBDA) && \ + (!defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_CUDA_LAMBDA)) TEST(TEST_CATEGORY, ErrorReporterViaLambda) { TestErrorReporter>(); } diff --git a/containers/unit_tests/rocm/TestROCm_Category.hpp b/containers/unit_tests/TestHIP_Category.hpp similarity index 93% rename from containers/unit_tests/rocm/TestROCm_Category.hpp rename to containers/unit_tests/TestHIP_Category.hpp index d37cd05db68..c2d60d18148 100644 --- a/containers/unit_tests/rocm/TestROCm_Category.hpp +++ b/containers/unit_tests/TestHIP_Category.hpp @@ -42,10 +42,10 @@ //@HEADER */ -#ifndef KOKKOS_TEST_ROCM_HPP -#define KOKKOS_TEST_ROCM_HPP +#ifndef KOKKOS_TEST_HIP_HPP +#define KOKKOS_TEST_HIP_HPP -#define TEST_CATEGORY rocm -#define TEST_EXECSPACE Kokkos::Experimental::ROCm +#define TEST_CATEGORY hip +#define TEST_EXECSPACE Kokkos::Experimental::HIP #endif diff --git a/containers/unit_tests/hpx/TestHPX_Category.hpp b/containers/unit_tests/TestHPX_Category.hpp similarity index 100% rename from containers/unit_tests/hpx/TestHPX_Category.hpp rename to containers/unit_tests/TestHPX_Category.hpp diff --git a/containers/unit_tests/TestOffsetView.hpp b/containers/unit_tests/TestOffsetView.hpp index 12bcda9524e..5114b8022fa 100644 --- a/containers/unit_tests/TestOffsetView.hpp +++ b/containers/unit_tests/TestOffsetView.hpp @@ -60,7 +60,7 @@ using std::endl; namespace Test { template -void test_offsetview_construction(unsigned int size) { +void test_offsetview_construction() { typedef Kokkos::Experimental::OffsetView offset_view_type; typedef Kokkos::View view_type; @@ -185,15 +185,17 @@ void test_offsetview_construction(unsigned int size) { Kokkos::deep_copy(view3D, 1); - Kokkos::Array begins = {{-10, -20, -30}}; - Kokkos::Experimental::OffsetView offsetView3D(view3D, - begins); - typedef Kokkos::MDRangePolicy, Kokkos::IndexType > range3_type; typedef typename range3_type::point_type point3_type; + typename point3_type::value_type begins0 = -10, begins1 = -20, + begins2 = -30; + Kokkos::Array begins = {{begins0, begins1, begins2}}; + Kokkos::Experimental::OffsetView offsetView3D(view3D, + begins); + range3_type rangePolicy3DZero(point3_type{{0, 0, 0}}, point3_type{{extent0, extent1, extent2}}); @@ -207,9 +209,8 @@ void test_offsetview_construction(unsigned int size) { view3DSum); range3_type rangePolicy3D( - point3_type{{begins[0], begins[1], begins[2]}}, - point3_type{ - {begins[0] + extent0, begins[1] + extent1, begins[2] + extent2}}); + point3_type{{begins0, begins1, begins2}}, + point3_type{{begins0 + extent0, begins1 + extent1, begins2 + extent2}}); int offsetView3DSum = 0; Kokkos::parallel_reduce( @@ -388,7 +389,7 @@ void test_offsetview_unmanaged_construction() { } template -void test_offsetview_subview(unsigned int size) { +void test_offsetview_subview() { { // test subview 1 Kokkos::Experimental::OffsetView sliceMe("offsetToSlice", {-10, 20}); @@ -675,7 +676,7 @@ void test_offsetview_offsets_rank3() { #endif TEST(TEST_CATEGORY, offsetview_construction) { - test_offsetview_construction(10); + test_offsetview_construction(); } TEST(TEST_CATEGORY, offsetview_unmanaged_construction) { @@ -683,7 +684,7 @@ TEST(TEST_CATEGORY, offsetview_unmanaged_construction) { } TEST(TEST_CATEGORY, offsetview_subview) { - test_offsetview_subview(10); + test_offsetview_subview(); } #if defined(KOKKOS_ENABLE_CUDA_LAMBDA) || !defined(KOKKOS_ENABLE_CUDA) diff --git a/containers/unit_tests/openmp/TestOpenMP_Category.hpp b/containers/unit_tests/TestOpenMP_Category.hpp similarity index 100% rename from containers/unit_tests/openmp/TestOpenMP_Category.hpp rename to containers/unit_tests/TestOpenMP_Category.hpp diff --git a/containers/unit_tests/TestScatterView.hpp b/containers/unit_tests/TestScatterView.hpp index 93b69cc769c..915d96d3210 100644 --- a/containers/unit_tests/TestScatterView.hpp +++ b/containers/unit_tests/TestScatterView.hpp @@ -50,21 +50,21 @@ namespace Test { -template struct test_scatter_view_impl_cls; -template -struct test_scatter_view_impl_cls { public: - typedef Kokkos::Experimental::ScatterView scatter_view_type; - typedef Kokkos::View orig_view_type; + typedef Kokkos::View orig_view_type; scatter_view_type scatter_view; int scatterSize; @@ -90,7 +90,8 @@ struct test_scatter_view_impl_cls(0, n); + auto policy = + Kokkos::RangePolicy(0, n); Kokkos::parallel_for(policy, *this, "scatter_view_test: Sum"); } @@ -123,17 +124,17 @@ struct test_scatter_view_impl_cls -struct test_scatter_view_impl_cls { public: - typedef Kokkos::Experimental::ScatterView scatter_view_type; - typedef Kokkos::View orig_view_type; + typedef Kokkos::View orig_view_type; scatter_view_type scatter_view; int scatterSize; @@ -159,7 +160,8 @@ struct test_scatter_view_impl_cls(0, n); + auto policy = + Kokkos::RangePolicy(0, n); Kokkos::parallel_for(policy, *this, "scatter_view_test: Prod"); } @@ -192,17 +194,17 @@ struct test_scatter_view_impl_cls -struct test_scatter_view_impl_cls { public: - typedef Kokkos::Experimental::ScatterView scatter_view_type; - typedef Kokkos::View orig_view_type; + typedef Kokkos::View orig_view_type; scatter_view_type scatter_view; int scatterSize; @@ -228,7 +230,8 @@ struct test_scatter_view_impl_cls(0, n); + auto policy = + Kokkos::RangePolicy(0, n); Kokkos::parallel_for(policy, *this, "scatter_view_test: Prod"); } @@ -261,17 +264,17 @@ struct test_scatter_view_impl_cls -struct test_scatter_view_impl_cls { public: - typedef Kokkos::Experimental::ScatterView scatter_view_type; - typedef Kokkos::View orig_view_type; + typedef Kokkos::View orig_view_type; scatter_view_type scatter_view; int scatterSize; @@ -297,7 +300,7 @@ struct test_scatter_view_impl_cls(0, n); + Kokkos::RangePolicy policy(0, n); Kokkos::parallel_for(policy, *this, "scatter_view_test: Prod"); } @@ -330,20 +333,18 @@ struct test_scatter_view_impl_cls struct test_scatter_view_config { public: typedef - typename test_scatter_view_impl_cls::scatter_view_type scatter_view_def; - typedef typename test_scatter_view_impl_cls::orig_view_type orig_view_def; - test_scatter_view_config() {} - void run_test(int n) { // Test creation via create_scatter_view { @@ -351,7 +352,7 @@ struct test_scatter_view_config { scatter_view_def scatter_view = Kokkos::Experimental::create_scatter_view< op, duplication, contribution>(original_view); - test_scatter_view_impl_cls scatter_view_test_impl(scatter_view); scatter_view_test_impl.initialize(original_view); @@ -379,7 +380,7 @@ struct test_scatter_view_config { orig_view_def original_view("original_view", n); scatter_view_def scatter_view(original_view); - test_scatter_view_impl_cls scatter_view_test_impl(scatter_view); scatter_view_test_impl.initialize(original_view); @@ -405,17 +406,18 @@ struct test_scatter_view_config { } }; -template +template struct TestDuplicatedScatterView { TestDuplicatedScatterView(int n) { // ScatterSum test - test_scatter_view_config< - ExecSpace, Kokkos::LayoutRight, Kokkos::Experimental::ScatterDuplicated, - Kokkos::Experimental::ScatterNonAtomic, ScatterType> + test_scatter_view_config test_sv_right_config; test_sv_right_config.run_test(n); test_scatter_view_config< - ExecSpace, Kokkos::LayoutLeft, Kokkos::Experimental::ScatterDuplicated, + DeviceType, Kokkos::LayoutLeft, Kokkos::Experimental::ScatterDuplicated, Kokkos::Experimental::ScatterNonAtomic, ScatterType> test_sv_left_config; test_sv_left_config.run_test(n); @@ -429,6 +431,16 @@ template struct TestDuplicatedScatterView { TestDuplicatedScatterView(int) {} }; +template +struct TestDuplicatedScatterView< + Kokkos::Device, ScatterType> { + TestDuplicatedScatterView(int) {} +}; +template +struct TestDuplicatedScatterView< + Kokkos::Device, ScatterType> { + TestDuplicatedScatterView(int) {} +}; #endif #ifdef KOKKOS_ENABLE_ROCM @@ -440,17 +452,15 @@ struct TestDuplicatedScatterView { }; #endif -template +template void test_scatter_view(int n) { - // all of these configurations should compile okay, but only some of them are - // correct and/or sensible in terms of memory use - Kokkos::Experimental::UniqueToken unique_token{ExecSpace()}; + using execution_space = typename DeviceType::execution_space; // no atomics or duplication is only sensible if the execution space // is running essentially in serial (doesn't have to be Serial though, // we also test OpenMP with one thread: LAMMPS cares about that) - if (unique_token.size() == 1) { - test_scatter_view_config @@ -458,9 +468,9 @@ void test_scatter_view(int n) { test_sv_config.run_test(n); } #ifdef KOKKOS_ENABLE_SERIAL - if (!std::is_same::value) { + if (!std::is_same::value) { #endif - test_scatter_view_config test_sv_config; @@ -473,16 +483,18 @@ void test_scatter_view(int n) { constexpr std::size_t maximum_allowed_total_bytes = 8ull * 1024ull * 1024ull * 1024ull; std::size_t const maximum_allowed_copy_bytes = - maximum_allowed_total_bytes / std::size_t(unique_token.size()); + maximum_allowed_total_bytes / + std::size_t(execution_space().concurrency()); constexpr std::size_t bytes_per_value = sizeof(double) * 3; std::size_t const maximum_allowed_copy_values = maximum_allowed_copy_bytes / bytes_per_value; n = std::min(n, int(maximum_allowed_copy_values)); - TestDuplicatedScatterView duptest(n); + TestDuplicatedScatterView duptest(n); } +// FIXME_HIP ScatterView requires UniqueToken +#ifndef KOKKOS_ENABLE_HIP TEST(TEST_CATEGORY, scatterview) { -#ifndef KOKKOS_ENABLE_ROCM test_scatter_view(10); test_scatter_view(10); test_scatter_view(10); @@ -504,8 +516,38 @@ TEST(TEST_CATEGORY, scatterview) { test_scatter_view(big_n); test_scatter_view(big_n); test_scatter_view(big_n); +} + +TEST(TEST_CATEGORY, scatterview_devicetype) { + using device_type = + Kokkos::Device; + + test_scatter_view(10); + test_scatter_view(10); + test_scatter_view(10); + test_scatter_view(10); + +#ifdef KOKKOS_ENABLE_CUDA + if (std::is_same::value) { + using cuda_device_type = Kokkos::Device; + test_scatter_view(10); + test_scatter_view(10); + test_scatter_view(10); + test_scatter_view(10); + using cudauvm_device_type = + Kokkos::Device; + test_scatter_view( + 10); + test_scatter_view( + 10); + test_scatter_view( + 10); + test_scatter_view( + 10); + } #endif } +#endif } // namespace Test diff --git a/containers/unit_tests/serial/TestSerial_Category.hpp b/containers/unit_tests/TestSerial_Category.hpp similarity index 100% rename from containers/unit_tests/serial/TestSerial_Category.hpp rename to containers/unit_tests/TestSerial_Category.hpp diff --git a/containers/unit_tests/threads/TestThreads_Category.hpp b/containers/unit_tests/TestThreads_Category.hpp similarity index 100% rename from containers/unit_tests/threads/TestThreads_Category.hpp rename to containers/unit_tests/TestThreads_Category.hpp diff --git a/containers/unit_tests/TestUnorderedMap.hpp b/containers/unit_tests/TestUnorderedMap.hpp index 82782d3bf43..3ec3a4e5ec2 100644 --- a/containers/unit_tests/TestUnorderedMap.hpp +++ b/containers/unit_tests/TestUnorderedMap.hpp @@ -174,6 +174,9 @@ struct TestFind { } // namespace Impl +// MSVC reports a syntax error for this test. +// WORKAROUND MSVC +#ifndef _WIN32 template void test_insert(uint32_t num_nodes, uint32_t num_inserts, uint32_t num_duplicates, bool near) { @@ -225,6 +228,7 @@ void test_insert(uint32_t num_nodes, uint32_t num_inserts, EXPECT_EQ(0u, map.size()); } } +#endif template void test_failed_insert(uint32_t num_nodes) { @@ -291,12 +295,17 @@ void test_deep_copy(uint32_t num_nodes) { } } +// FIXME_HIP deadlock +#ifndef KOKKOS_ENABLE_HIP +// WORKAROUND MSVC +#ifndef _WIN32 TEST(TEST_CATEGORY, UnorderedMap_insert) { for (int i = 0; i < 500; ++i) { test_insert(100000, 90000, 100, true); test_insert(100000, 90000, 100, false); } } +#endif TEST(TEST_CATEGORY, UnorderedMap_failed_insert) { for (int i = 0; i < 1000; ++i) test_failed_insert(10000); @@ -305,6 +314,19 @@ TEST(TEST_CATEGORY, UnorderedMap_failed_insert) { TEST(TEST_CATEGORY, UnorderedMap_deep_copy) { for (int i = 0; i < 2; ++i) test_deep_copy(10000); } +#endif + +TEST(TEST_CATEGORY, UnorderedMap_valid_empty) { + using Key = int; + using Value = int; + using Map = Kokkos::UnorderedMap; + + Map m{}; + Map n{}; + n = Map{m.capacity()}; + n.rehash(m.capacity()); + Kokkos::deep_copy(n, m); +} } // namespace Test diff --git a/containers/unit_tests/TestViewCtorPropEmbeddedDim.hpp b/containers/unit_tests/TestViewCtorPropEmbeddedDim.hpp index 6bac2ca9bd0..3f7d4101f7e 100644 --- a/containers/unit_tests/TestViewCtorPropEmbeddedDim.hpp +++ b/containers/unit_tests/TestViewCtorPropEmbeddedDim.hpp @@ -64,7 +64,7 @@ struct TestViewCtorProp_EmbeddedDim { using DynRankViewIntType = typename Kokkos::DynRankView; using DynRankViewDoubleType = typename Kokkos::DynRankView; - // Cuda 7.0 has issues with using a lamda in parallel_for to initialize the + // Cuda 7.0 has issues with using a lambda in parallel_for to initialize the // view - replace with this functor template struct Functor { diff --git a/containers/unit_tests/cuda/TestCuda_DynRankViewAPI_rank67.cpp b/containers/unit_tests/cuda/TestCuda_DynRankViewAPI_rank67.cpp deleted file mode 100644 index 19e248dd933..00000000000 --- a/containers/unit_tests/cuda/TestCuda_DynRankViewAPI_rank67.cpp +++ /dev/null @@ -1,47 +0,0 @@ - -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include -#include diff --git a/containers/unit_tests/cuda/TestCuda_ScatterView.cpp b/containers/unit_tests/cuda/TestCuda_ScatterView.cpp deleted file mode 100644 index 10b63d037d7..00000000000 --- a/containers/unit_tests/cuda/TestCuda_ScatterView.cpp +++ /dev/null @@ -1,47 +0,0 @@ - -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include -#include diff --git a/containers/unit_tests/cuda/TestCuda_StaticCrsGraph.cpp b/containers/unit_tests/cuda/TestCuda_StaticCrsGraph.cpp deleted file mode 100644 index 9c93da9c503..00000000000 --- a/containers/unit_tests/cuda/TestCuda_StaticCrsGraph.cpp +++ /dev/null @@ -1,47 +0,0 @@ - -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include -#include diff --git a/containers/unit_tests/cuda/TestCuda_UnorderedMap.cpp b/containers/unit_tests/cuda/TestCuda_UnorderedMap.cpp deleted file mode 100644 index b204e68977f..00000000000 --- a/containers/unit_tests/cuda/TestCuda_UnorderedMap.cpp +++ /dev/null @@ -1,47 +0,0 @@ - -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include -#include diff --git a/containers/unit_tests/cuda/TestCuda_ViewCtorPropEmbeddedDim.cpp b/containers/unit_tests/cuda/TestCuda_ViewCtorPropEmbeddedDim.cpp deleted file mode 100644 index c865deb0b25..00000000000 --- a/containers/unit_tests/cuda/TestCuda_ViewCtorPropEmbeddedDim.cpp +++ /dev/null @@ -1,47 +0,0 @@ - -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include -#include diff --git a/containers/unit_tests/hpx/TestHPX_DynRankViewAPI_generic.cpp b/containers/unit_tests/hpx/TestHPX_DynRankViewAPI_generic.cpp deleted file mode 100644 index 778bd891d68..00000000000 --- a/containers/unit_tests/hpx/TestHPX_DynRankViewAPI_generic.cpp +++ /dev/null @@ -1,47 +0,0 @@ - -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include -#include diff --git a/containers/unit_tests/hpx/TestHPX_DynRankViewAPI_rank12345.cpp b/containers/unit_tests/hpx/TestHPX_DynRankViewAPI_rank12345.cpp deleted file mode 100644 index 0bb77a266d5..00000000000 --- a/containers/unit_tests/hpx/TestHPX_DynRankViewAPI_rank12345.cpp +++ /dev/null @@ -1,47 +0,0 @@ - -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include -#include diff --git a/containers/unit_tests/hpx/TestHPX_DynRankViewAPI_rank67.cpp b/containers/unit_tests/hpx/TestHPX_DynRankViewAPI_rank67.cpp deleted file mode 100644 index 6594cb3213f..00000000000 --- a/containers/unit_tests/hpx/TestHPX_DynRankViewAPI_rank67.cpp +++ /dev/null @@ -1,47 +0,0 @@ - -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include -#include diff --git a/containers/unit_tests/hpx/TestHPX_DynamicView.cpp b/containers/unit_tests/hpx/TestHPX_DynamicView.cpp deleted file mode 100644 index c1efc778a69..00000000000 --- a/containers/unit_tests/hpx/TestHPX_DynamicView.cpp +++ /dev/null @@ -1,47 +0,0 @@ - -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include -#include diff --git a/containers/unit_tests/hpx/TestHPX_ErrorReporter.cpp b/containers/unit_tests/hpx/TestHPX_ErrorReporter.cpp deleted file mode 100644 index 3f68c6d07f3..00000000000 --- a/containers/unit_tests/hpx/TestHPX_ErrorReporter.cpp +++ /dev/null @@ -1,47 +0,0 @@ - -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include -#include diff --git a/containers/unit_tests/hpx/TestHPX_OffsetView.cpp b/containers/unit_tests/hpx/TestHPX_OffsetView.cpp deleted file mode 100644 index 17193002282..00000000000 --- a/containers/unit_tests/hpx/TestHPX_OffsetView.cpp +++ /dev/null @@ -1,47 +0,0 @@ - -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include -#include diff --git a/containers/unit_tests/hpx/TestHPX_ScatterView.cpp b/containers/unit_tests/hpx/TestHPX_ScatterView.cpp deleted file mode 100644 index 8f9eb059189..00000000000 --- a/containers/unit_tests/hpx/TestHPX_ScatterView.cpp +++ /dev/null @@ -1,47 +0,0 @@ - -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include -#include diff --git a/containers/unit_tests/hpx/TestHPX_StaticCrsGraph.cpp b/containers/unit_tests/hpx/TestHPX_StaticCrsGraph.cpp deleted file mode 100644 index 4f513efb0f0..00000000000 --- a/containers/unit_tests/hpx/TestHPX_StaticCrsGraph.cpp +++ /dev/null @@ -1,47 +0,0 @@ - -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include -#include diff --git a/containers/unit_tests/hpx/TestHPX_UnorderedMap.cpp b/containers/unit_tests/hpx/TestHPX_UnorderedMap.cpp deleted file mode 100644 index 517135290a8..00000000000 --- a/containers/unit_tests/hpx/TestHPX_UnorderedMap.cpp +++ /dev/null @@ -1,47 +0,0 @@ - -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include -#include diff --git a/containers/unit_tests/hpx/TestHPX_ViewCtorPropEmbeddedDim.cpp b/containers/unit_tests/hpx/TestHPX_ViewCtorPropEmbeddedDim.cpp deleted file mode 100644 index de2e96be730..00000000000 --- a/containers/unit_tests/hpx/TestHPX_ViewCtorPropEmbeddedDim.cpp +++ /dev/null @@ -1,47 +0,0 @@ - -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include -#include diff --git a/containers/unit_tests/openmp/TestOpenMP_DynRankViewAPI_generic.cpp b/containers/unit_tests/openmp/TestOpenMP_DynRankViewAPI_generic.cpp deleted file mode 100644 index 637be64dfa0..00000000000 --- a/containers/unit_tests/openmp/TestOpenMP_DynRankViewAPI_generic.cpp +++ /dev/null @@ -1,47 +0,0 @@ - -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include -#include diff --git a/containers/unit_tests/openmp/TestOpenMP_DynRankViewAPI_rank12345.cpp b/containers/unit_tests/openmp/TestOpenMP_DynRankViewAPI_rank12345.cpp deleted file mode 100644 index 01b57a1690a..00000000000 --- a/containers/unit_tests/openmp/TestOpenMP_DynRankViewAPI_rank12345.cpp +++ /dev/null @@ -1,47 +0,0 @@ - -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include -#include diff --git a/containers/unit_tests/openmp/TestOpenMP_DynRankViewAPI_rank67.cpp b/containers/unit_tests/openmp/TestOpenMP_DynRankViewAPI_rank67.cpp deleted file mode 100644 index 7d742eaeed1..00000000000 --- a/containers/unit_tests/openmp/TestOpenMP_DynRankViewAPI_rank67.cpp +++ /dev/null @@ -1,47 +0,0 @@ - -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include -#include diff --git a/containers/unit_tests/openmp/TestOpenMP_DynamicView.cpp b/containers/unit_tests/openmp/TestOpenMP_DynamicView.cpp deleted file mode 100644 index 75b616f168c..00000000000 --- a/containers/unit_tests/openmp/TestOpenMP_DynamicView.cpp +++ /dev/null @@ -1,47 +0,0 @@ - -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include -#include diff --git a/containers/unit_tests/openmp/TestOpenMP_ErrorReporter.cpp b/containers/unit_tests/openmp/TestOpenMP_ErrorReporter.cpp deleted file mode 100644 index 1f00f185328..00000000000 --- a/containers/unit_tests/openmp/TestOpenMP_ErrorReporter.cpp +++ /dev/null @@ -1,47 +0,0 @@ - -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include -#include diff --git a/containers/unit_tests/openmp/TestOpenMP_OffsetView.cpp b/containers/unit_tests/openmp/TestOpenMP_OffsetView.cpp deleted file mode 100644 index 98531ff212e..00000000000 --- a/containers/unit_tests/openmp/TestOpenMP_OffsetView.cpp +++ /dev/null @@ -1,47 +0,0 @@ - -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include -#include diff --git a/containers/unit_tests/openmp/TestOpenMP_ScatterView.cpp b/containers/unit_tests/openmp/TestOpenMP_ScatterView.cpp deleted file mode 100644 index c49577f75dd..00000000000 --- a/containers/unit_tests/openmp/TestOpenMP_ScatterView.cpp +++ /dev/null @@ -1,47 +0,0 @@ - -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include -#include diff --git a/containers/unit_tests/openmp/TestOpenMP_StaticCrsGraph.cpp b/containers/unit_tests/openmp/TestOpenMP_StaticCrsGraph.cpp deleted file mode 100644 index d8ab7b6b211..00000000000 --- a/containers/unit_tests/openmp/TestOpenMP_StaticCrsGraph.cpp +++ /dev/null @@ -1,47 +0,0 @@ - -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include -#include diff --git a/containers/unit_tests/openmp/TestOpenMP_UnorderedMap.cpp b/containers/unit_tests/openmp/TestOpenMP_UnorderedMap.cpp deleted file mode 100644 index c3db0c0d888..00000000000 --- a/containers/unit_tests/openmp/TestOpenMP_UnorderedMap.cpp +++ /dev/null @@ -1,47 +0,0 @@ - -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include -#include diff --git a/containers/unit_tests/openmp/TestOpenMP_Vector.cpp b/containers/unit_tests/openmp/TestOpenMP_Vector.cpp deleted file mode 100644 index 7ac49f24fb3..00000000000 --- a/containers/unit_tests/openmp/TestOpenMP_Vector.cpp +++ /dev/null @@ -1,47 +0,0 @@ - -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include -#include diff --git a/containers/unit_tests/openmp/TestOpenMP_ViewCtorPropEmbeddedDim.cpp b/containers/unit_tests/openmp/TestOpenMP_ViewCtorPropEmbeddedDim.cpp deleted file mode 100644 index b9ae5d80ed9..00000000000 --- a/containers/unit_tests/openmp/TestOpenMP_ViewCtorPropEmbeddedDim.cpp +++ /dev/null @@ -1,47 +0,0 @@ - -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include -#include diff --git a/containers/unit_tests/rocm/TestROCm_BitSet.cpp b/containers/unit_tests/rocm/TestROCm_BitSet.cpp deleted file mode 100644 index c72077eb4c7..00000000000 --- a/containers/unit_tests/rocm/TestROCm_BitSet.cpp +++ /dev/null @@ -1,47 +0,0 @@ - -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include -#include diff --git a/containers/unit_tests/rocm/TestROCm_DualView.cpp b/containers/unit_tests/rocm/TestROCm_DualView.cpp deleted file mode 100644 index e9820395ba5..00000000000 --- a/containers/unit_tests/rocm/TestROCm_DualView.cpp +++ /dev/null @@ -1,47 +0,0 @@ - -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include -#include diff --git a/containers/unit_tests/rocm/TestROCm_DynRankViewAPI_generic.cpp b/containers/unit_tests/rocm/TestROCm_DynRankViewAPI_generic.cpp deleted file mode 100644 index 7a3dd65f0ef..00000000000 --- a/containers/unit_tests/rocm/TestROCm_DynRankViewAPI_generic.cpp +++ /dev/null @@ -1,47 +0,0 @@ - -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include -#include diff --git a/containers/unit_tests/rocm/TestROCm_DynRankViewAPI_rank12345.cpp b/containers/unit_tests/rocm/TestROCm_DynRankViewAPI_rank12345.cpp deleted file mode 100644 index 3963dd9c9cf..00000000000 --- a/containers/unit_tests/rocm/TestROCm_DynRankViewAPI_rank12345.cpp +++ /dev/null @@ -1,47 +0,0 @@ - -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include -#include diff --git a/containers/unit_tests/rocm/TestROCm_DynRankViewAPI_rank67.cpp b/containers/unit_tests/rocm/TestROCm_DynRankViewAPI_rank67.cpp deleted file mode 100644 index b9a45826228..00000000000 --- a/containers/unit_tests/rocm/TestROCm_DynRankViewAPI_rank67.cpp +++ /dev/null @@ -1,47 +0,0 @@ - -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include -#include diff --git a/containers/unit_tests/rocm/TestROCm_DynamicView.cpp b/containers/unit_tests/rocm/TestROCm_DynamicView.cpp deleted file mode 100644 index 285ed916c43..00000000000 --- a/containers/unit_tests/rocm/TestROCm_DynamicView.cpp +++ /dev/null @@ -1,47 +0,0 @@ - -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include -#include diff --git a/containers/unit_tests/rocm/TestROCm_ErrorReporter.cpp b/containers/unit_tests/rocm/TestROCm_ErrorReporter.cpp deleted file mode 100644 index 2af2f79a168..00000000000 --- a/containers/unit_tests/rocm/TestROCm_ErrorReporter.cpp +++ /dev/null @@ -1,47 +0,0 @@ - -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include -#include diff --git a/containers/unit_tests/rocm/TestROCm_ScatterView.cpp b/containers/unit_tests/rocm/TestROCm_ScatterView.cpp deleted file mode 100644 index f7000bc99e2..00000000000 --- a/containers/unit_tests/rocm/TestROCm_ScatterView.cpp +++ /dev/null @@ -1,47 +0,0 @@ - -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include -#include diff --git a/containers/unit_tests/rocm/TestROCm_StaticCrsGraph.cpp b/containers/unit_tests/rocm/TestROCm_StaticCrsGraph.cpp deleted file mode 100644 index bb1e04c5364..00000000000 --- a/containers/unit_tests/rocm/TestROCm_StaticCrsGraph.cpp +++ /dev/null @@ -1,47 +0,0 @@ - -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include -#include diff --git a/containers/unit_tests/rocm/TestROCm_UnorderedMap.cpp b/containers/unit_tests/rocm/TestROCm_UnorderedMap.cpp deleted file mode 100644 index 7b8172fabdc..00000000000 --- a/containers/unit_tests/rocm/TestROCm_UnorderedMap.cpp +++ /dev/null @@ -1,47 +0,0 @@ - -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include -#include diff --git a/containers/unit_tests/rocm/TestROCm_Vector.cpp b/containers/unit_tests/rocm/TestROCm_Vector.cpp deleted file mode 100644 index 1759797487c..00000000000 --- a/containers/unit_tests/rocm/TestROCm_Vector.cpp +++ /dev/null @@ -1,47 +0,0 @@ - -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include -#include diff --git a/containers/unit_tests/rocm/TestROCm_ViewCtorPropEmbeddedDim.cpp b/containers/unit_tests/rocm/TestROCm_ViewCtorPropEmbeddedDim.cpp deleted file mode 100644 index e95680445b5..00000000000 --- a/containers/unit_tests/rocm/TestROCm_ViewCtorPropEmbeddedDim.cpp +++ /dev/null @@ -1,47 +0,0 @@ - -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include -#include diff --git a/containers/unit_tests/serial/TestSerial_DynRankViewAPI_generic.cpp b/containers/unit_tests/serial/TestSerial_DynRankViewAPI_generic.cpp deleted file mode 100644 index e8f577c6ac3..00000000000 --- a/containers/unit_tests/serial/TestSerial_DynRankViewAPI_generic.cpp +++ /dev/null @@ -1,47 +0,0 @@ - -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include -#include diff --git a/containers/unit_tests/serial/TestSerial_DynRankViewAPI_rank12345.cpp b/containers/unit_tests/serial/TestSerial_DynRankViewAPI_rank12345.cpp deleted file mode 100644 index 7db8983c1b1..00000000000 --- a/containers/unit_tests/serial/TestSerial_DynRankViewAPI_rank12345.cpp +++ /dev/null @@ -1,47 +0,0 @@ - -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include -#include diff --git a/containers/unit_tests/serial/TestSerial_DynRankViewAPI_rank67.cpp b/containers/unit_tests/serial/TestSerial_DynRankViewAPI_rank67.cpp deleted file mode 100644 index a3a745efb5d..00000000000 --- a/containers/unit_tests/serial/TestSerial_DynRankViewAPI_rank67.cpp +++ /dev/null @@ -1,47 +0,0 @@ - -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include -#include diff --git a/containers/unit_tests/serial/TestSerial_DynamicView.cpp b/containers/unit_tests/serial/TestSerial_DynamicView.cpp deleted file mode 100644 index 6624e3aa07f..00000000000 --- a/containers/unit_tests/serial/TestSerial_DynamicView.cpp +++ /dev/null @@ -1,47 +0,0 @@ - -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include -#include diff --git a/containers/unit_tests/serial/TestSerial_ErrorReporter.cpp b/containers/unit_tests/serial/TestSerial_ErrorReporter.cpp deleted file mode 100644 index 280302275fe..00000000000 --- a/containers/unit_tests/serial/TestSerial_ErrorReporter.cpp +++ /dev/null @@ -1,47 +0,0 @@ - -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include -#include diff --git a/containers/unit_tests/serial/TestSerial_OffsetView.cpp b/containers/unit_tests/serial/TestSerial_OffsetView.cpp deleted file mode 100644 index 5f8caf7c3f0..00000000000 --- a/containers/unit_tests/serial/TestSerial_OffsetView.cpp +++ /dev/null @@ -1,46 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include -#include diff --git a/containers/unit_tests/serial/TestSerial_ScatterView.cpp b/containers/unit_tests/serial/TestSerial_ScatterView.cpp deleted file mode 100644 index 3f102e5cbc9..00000000000 --- a/containers/unit_tests/serial/TestSerial_ScatterView.cpp +++ /dev/null @@ -1,47 +0,0 @@ - -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include -#include diff --git a/containers/unit_tests/serial/TestSerial_StaticCrsGraph.cpp b/containers/unit_tests/serial/TestSerial_StaticCrsGraph.cpp deleted file mode 100644 index 64f09e76e58..00000000000 --- a/containers/unit_tests/serial/TestSerial_StaticCrsGraph.cpp +++ /dev/null @@ -1,47 +0,0 @@ - -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include -#include diff --git a/containers/unit_tests/serial/TestSerial_UnorderedMap.cpp b/containers/unit_tests/serial/TestSerial_UnorderedMap.cpp deleted file mode 100644 index a72be8e2fc2..00000000000 --- a/containers/unit_tests/serial/TestSerial_UnorderedMap.cpp +++ /dev/null @@ -1,47 +0,0 @@ - -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include -#include diff --git a/containers/unit_tests/serial/TestSerial_Vector.cpp b/containers/unit_tests/serial/TestSerial_Vector.cpp deleted file mode 100644 index 3826dab1d70..00000000000 --- a/containers/unit_tests/serial/TestSerial_Vector.cpp +++ /dev/null @@ -1,47 +0,0 @@ - -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include -#include diff --git a/containers/unit_tests/serial/TestSerial_ViewCtorPropEmbeddedDim.cpp b/containers/unit_tests/serial/TestSerial_ViewCtorPropEmbeddedDim.cpp deleted file mode 100644 index 1251808bed7..00000000000 --- a/containers/unit_tests/serial/TestSerial_ViewCtorPropEmbeddedDim.cpp +++ /dev/null @@ -1,47 +0,0 @@ - -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include -#include diff --git a/containers/unit_tests/threads/TestThreads_DynRankViewAPI_generic.cpp b/containers/unit_tests/threads/TestThreads_DynRankViewAPI_generic.cpp deleted file mode 100644 index b015683bb9d..00000000000 --- a/containers/unit_tests/threads/TestThreads_DynRankViewAPI_generic.cpp +++ /dev/null @@ -1,47 +0,0 @@ - -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include -#include diff --git a/containers/unit_tests/threads/TestThreads_DynRankViewAPI_rank12345.cpp b/containers/unit_tests/threads/TestThreads_DynRankViewAPI_rank12345.cpp deleted file mode 100644 index dea56c408bd..00000000000 --- a/containers/unit_tests/threads/TestThreads_DynRankViewAPI_rank12345.cpp +++ /dev/null @@ -1,47 +0,0 @@ - -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include -#include diff --git a/containers/unit_tests/threads/TestThreads_DynRankViewAPI_rank67.cpp b/containers/unit_tests/threads/TestThreads_DynRankViewAPI_rank67.cpp deleted file mode 100644 index 17a289b5065..00000000000 --- a/containers/unit_tests/threads/TestThreads_DynRankViewAPI_rank67.cpp +++ /dev/null @@ -1,47 +0,0 @@ - -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include -#include diff --git a/containers/unit_tests/threads/TestThreads_DynamicView.cpp b/containers/unit_tests/threads/TestThreads_DynamicView.cpp deleted file mode 100644 index 499321dbbea..00000000000 --- a/containers/unit_tests/threads/TestThreads_DynamicView.cpp +++ /dev/null @@ -1,47 +0,0 @@ - -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include -#include diff --git a/containers/unit_tests/threads/TestThreads_ErrorReporter.cpp b/containers/unit_tests/threads/TestThreads_ErrorReporter.cpp deleted file mode 100644 index 513fdc4af62..00000000000 --- a/containers/unit_tests/threads/TestThreads_ErrorReporter.cpp +++ /dev/null @@ -1,47 +0,0 @@ - -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include -#include diff --git a/containers/unit_tests/threads/TestThreads_OffsetView.cpp b/containers/unit_tests/threads/TestThreads_OffsetView.cpp deleted file mode 100644 index 717967b2ef3..00000000000 --- a/containers/unit_tests/threads/TestThreads_OffsetView.cpp +++ /dev/null @@ -1,47 +0,0 @@ - -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include -#include diff --git a/containers/unit_tests/threads/TestThreads_ScatterView.cpp b/containers/unit_tests/threads/TestThreads_ScatterView.cpp deleted file mode 100644 index fbb37606ea0..00000000000 --- a/containers/unit_tests/threads/TestThreads_ScatterView.cpp +++ /dev/null @@ -1,47 +0,0 @@ - -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include -#include diff --git a/containers/unit_tests/threads/TestThreads_StaticCrsGraph.cpp b/containers/unit_tests/threads/TestThreads_StaticCrsGraph.cpp deleted file mode 100644 index 29117c4ef65..00000000000 --- a/containers/unit_tests/threads/TestThreads_StaticCrsGraph.cpp +++ /dev/null @@ -1,47 +0,0 @@ - -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include -#include diff --git a/containers/unit_tests/threads/TestThreads_UnorderedMap.cpp b/containers/unit_tests/threads/TestThreads_UnorderedMap.cpp deleted file mode 100644 index 9a06288de46..00000000000 --- a/containers/unit_tests/threads/TestThreads_UnorderedMap.cpp +++ /dev/null @@ -1,47 +0,0 @@ - -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include -#include diff --git a/containers/unit_tests/threads/TestThreads_Vector.cpp b/containers/unit_tests/threads/TestThreads_Vector.cpp deleted file mode 100644 index 33e8b26c8ef..00000000000 --- a/containers/unit_tests/threads/TestThreads_Vector.cpp +++ /dev/null @@ -1,47 +0,0 @@ - -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include -#include diff --git a/containers/unit_tests/threads/TestThreads_ViewCtorPropEmbeddedDim.cpp b/containers/unit_tests/threads/TestThreads_ViewCtorPropEmbeddedDim.cpp deleted file mode 100644 index 567b2241953..00000000000 --- a/containers/unit_tests/threads/TestThreads_ViewCtorPropEmbeddedDim.cpp +++ /dev/null @@ -1,47 +0,0 @@ - -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include -#include diff --git a/core/cmake/Dependencies.cmake b/core/cmake/Dependencies.cmake index 9ad7660bdf6..cc901a4ede0 100644 --- a/core/cmake/Dependencies.cmake +++ b/core/cmake/Dependencies.cmake @@ -1,5 +1,5 @@ TRIBITS_PACKAGE_DEFINE_DEPENDENCIES( - LIB_OPTIONAL_TPLS Pthread CUDA HWLOC QTHREADS DLlib HPX + LIB_OPTIONAL_TPLS Pthread CUDA HWLOC DLlib HPX TEST_OPTIONAL_TPLS CUSPARSE ) diff --git a/core/cmake/KokkosCore_config.h.in b/core/cmake/KokkosCore_config.h.in index f430c2b5f65..095c869a32d 100644 --- a/core/cmake/KokkosCore_config.h.in +++ b/core/cmake/KokkosCore_config.h.in @@ -12,6 +12,7 @@ #endif #cmakedefine KOKKOS_ENABLE_CUDA +#cmakedefine KOKKOS_ENABLE_HIP #cmakedefine KOKKOS_ENABLE_OPENMP #cmakedefine KOKKOS_ENABLE_THREADS #cmakedefine KOKKOS_ENABLE_SERIAL @@ -63,6 +64,10 @@ #cmakedefine KOKKOS_ENABLE_ISA_POWERPCLE #endif +#ifdef KOKKOS_ENABLE_HIP +#cmakedefine KOKKOS_ENABLE_HIP_RELOCATABLE_DEVICE_CODE +#endif + #cmakedefine KOKKOS_ARCH_ARMV80 1 #cmakedefine KOKKOS_ARCH_ARMV81 1 #cmakedefine KOKKOS_ARCH_ARMV8_THUNDERX 1 diff --git a/core/perf_test/CMakeLists.txt b/core/perf_test/CMakeLists.txt index 79567835ee0..ca695e2700d 100644 --- a/core/perf_test/CMakeLists.txt +++ b/core/perf_test/CMakeLists.txt @@ -48,6 +48,14 @@ SET(SOURCES PerfTest_ViewResize_8.cpp ) +IF(Kokkos_ENABLE_HIP) +# FIXME requires TeamPolicy + LIST(REMOVE_ITEM SOURCES + PerfTest_CustomReduction.cpp + PerfTest_ExecSpacePartitioning.cpp + ) +ENDIF() + # Per #374, we always want to build this test, but we only want to run # it as a PERFORMANCE test. That's why we separate building the test # from running the test. @@ -58,11 +66,14 @@ KOKKOS_INCLUDE_DIRECTORIES("${CMAKE_CURRENT_SOURCE_DIR}/../../algorithms/src") KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) KOKKOS_INCLUDE_DIRECTORIES(REQUIRED_DURING_INSTALLATION_TESTING ${CMAKE_CURRENT_SOURCE_DIR}) -KOKKOS_ADD_EXECUTABLE_AND_TEST( - PerfTestExec - SOURCES ${SOURCES} - CATEGORIES PERFORMANCE -) +# This test currently times out for MSVC +IF(NOT KOKKOS_CXX_COMPILER_ID STREQUAL "MSVC") + KOKKOS_ADD_EXECUTABLE_AND_TEST( + PerfTestExec + SOURCES ${SOURCES} + CATEGORIES PERFORMANCE + ) +ENDIF() KOKKOS_ADD_EXECUTABLE_AND_TEST( PerformanceTest_TaskDag diff --git a/core/perf_test/PerfTestBlasKernels.hpp b/core/perf_test/PerfTestBlasKernels.hpp index a5a376565d8..2717b133bd6 100644 --- a/core/perf_test/PerfTestBlasKernels.hpp +++ b/core/perf_test/PerfTestBlasKernels.hpp @@ -49,64 +49,9 @@ namespace Kokkos { -template -struct Dot; - -template -struct DotSingle; - -template -struct Scale; - -template -struct AXPBY; - -/** \brief Y = alpha * X + beta * Y */ -template -void axpby(const ConstScalarType& alpha, const ConstVectorType& X, - const ConstScalarType& beta, const VectorType& Y) { - typedef AXPBY functor; - - parallel_for(Y.extent(0), functor(alpha, X, beta, Y)); -} - -/** \brief Y *= alpha */ -template -void scale(const ConstScalarType& alpha, const VectorType& Y) { - typedef Scale functor; - - parallel_for(Y.extent(0), functor(alpha, Y)); -} - -template -void dot(const ConstVectorType& X, const ConstVectorType& Y, - const Finalize& finalize) { - typedef Dot functor; - - parallel_reduce(X.extent(0), functor(X, Y), finalize); -} - -template -void dot(const ConstVectorType& X, const Finalize& finalize) { - typedef DotSingle functor; - - parallel_reduce(X.extent(0), functor(X), finalize); -} - -} /* namespace Kokkos */ - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { - -template +template struct Dot { - typedef typename Device::execution_space execution_space; + typedef typename Type::execution_space execution_space; static_assert(static_cast(Type::Rank) == static_cast(1), "Dot static_assert Fail: Rank != 1"); @@ -136,9 +81,9 @@ struct Dot { static void init(value_type& update) { update = 0; } }; -template +template struct DotSingle { - typedef typename Device::execution_space execution_space; + typedef typename Type::execution_space execution_space; static_assert(static_cast(Type::Rank) == static_cast(1), "DotSingle static_assert Fail: Rank != 1"); @@ -169,9 +114,9 @@ struct DotSingle { static void init(value_type& update) { update = 0; } }; -template +template struct Scale { - typedef typename Device::execution_space execution_space; + typedef typename VectorType::execution_space execution_space; static_assert(static_cast(ScalarType::Rank) == static_cast(0), @@ -196,10 +141,9 @@ struct Scale { void operator()(int i) const { Y[i] *= alpha(); } }; -template +template struct AXPBY { - typedef typename Device::execution_space execution_space; + typedef typename VectorType::execution_space execution_space; static_assert(static_cast(ScalarType::Rank) == static_cast(0), @@ -233,4 +177,42 @@ struct AXPBY { } /* namespace Kokkos */ +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +/** \brief Y = alpha * X + beta * Y */ +template +void axpby(const ConstScalarType& alpha, const ConstVectorType& X, + const ConstScalarType& beta, const VectorType& Y) { + typedef AXPBY functor; + + parallel_for(Y.extent(0), functor(alpha, X, beta, Y)); +} + +/** \brief Y *= alpha */ +template +void scale(const ConstScalarType& alpha, const VectorType& Y) { + typedef Scale functor; + + parallel_for(Y.extent(0), functor(alpha, Y)); +} + +template +void dot(const ConstVectorType& X, const ConstVectorType& Y, + const Finalize& finalize) { + typedef Dot functor; + + parallel_reduce(X.extent(0), functor(X, Y), finalize); +} + +template +void dot(const ConstVectorType& X, const Finalize& finalize) { + typedef DotSingle functor; + + parallel_reduce(X.extent(0), functor(X), finalize); +} + +} /* namespace Kokkos */ + #endif /* #ifndef KOKKOS_BLAS_KERNELS_HPP */ diff --git a/core/perf_test/PerfTestGramSchmidt.cpp b/core/perf_test/PerfTestGramSchmidt.cpp index 5e85163acfc..0916b230ecb 100644 --- a/core/perf_test/PerfTestGramSchmidt.cpp +++ b/core/perf_test/PerfTestGramSchmidt.cpp @@ -69,7 +69,10 @@ struct InvNorm2 : public Kokkos::DotSingle { KOKKOS_INLINE_FUNCTION void final(value_type& result) const { - result = std::sqrt(result); +#ifndef KOKKOS_ENABLE_HIP // FIXME_HIP + using std::sqrt; +#endif + result = sqrt(result); Rjj() = result; inv() = (0 < result) ? 1.0 / result : 0; } diff --git a/core/perf_test/PerfTestMain.cpp b/core/perf_test/PerfTestMain.cpp index 8cd015fbe85..8f4d48d57bf 100644 --- a/core/perf_test/PerfTestMain.cpp +++ b/core/perf_test/PerfTestMain.cpp @@ -54,13 +54,13 @@ int command_line_num_args(int n = 0) { return n_args; } -const char* command_line_arg(int k, char** input_args = NULL) { +const char* command_line_arg(int k, char** input_args = nullptr) { static char** args; - if (input_args != NULL) args = input_args; + if (input_args != nullptr) args = input_args; if (command_line_num_args() > k) return args[k]; else - return NULL; + return nullptr; } } // namespace Test diff --git a/core/perf_test/PerfTest_Category.hpp b/core/perf_test/PerfTest_Category.hpp index c2cff225020..0f24490bfeb 100644 --- a/core/perf_test/PerfTest_Category.hpp +++ b/core/perf_test/PerfTest_Category.hpp @@ -50,7 +50,7 @@ namespace Test { extern int command_line_num_args(int n = 0); -extern const char* command_line_arg(int k, char** input_args = NULL); +extern const char* command_line_arg(int k, char** input_args = nullptr); } // namespace Test diff --git a/core/perf_test/PerfTest_CustomReduction.cpp b/core/perf_test/PerfTest_CustomReduction.cpp index d06851ec9b3..75ca4a0d5a4 100644 --- a/core/perf_test/PerfTest_CustomReduction.cpp +++ b/core/perf_test/PerfTest_CustomReduction.cpp @@ -76,8 +76,8 @@ void custom_reduction_test(int N, int R, int num_trials) { Kokkos::ThreadVectorRange(team, 32), [&](const int& k, Scalar& max_) { const Scalar val = a((i * 32 + j) * 32 + k); - if (val > lmax) lmax = val; - if ((k == 11) && (j == 17) && (i == 2)) lmax = 11.5; + if (val > max_) max_ = val; + if ((k == 11) && (j == 17) && (i == 2)) max_ = 11.5; }, Kokkos::Max(t_max)); if (t_max > thread_max) thread_max = t_max; @@ -106,8 +106,8 @@ void custom_reduction_test(int N, int R, int num_trials) { Kokkos::ThreadVectorRange(team, 32), [&](const int& k, Scalar& max_) { const Scalar val = a((i * 32 + j) * 32 + k); - if (val > lmax) lmax = val; - if ((k == 11) && (j == 17) && (i == 2)) lmax = 11.5; + if (val > max_) max_ = val; + if ((k == 11) && (j == 17) && (i == 2)) max_ = 11.5; }, Kokkos::Max(t_max)); if (t_max > thread_max) thread_max = t_max; diff --git a/core/src/CMakeLists.txt b/core/src/CMakeLists.txt index eb058a982e1..5b91b30787a 100644 --- a/core/src/CMakeLists.txt +++ b/core/src/CMakeLists.txt @@ -35,6 +35,10 @@ IF (KOKKOS_ENABLE_OPENMP) ENDIF() ENDIF() +IF (KOKKOS_ENABLE_OPENMPTARGET) + APPEND_GLOB(KOKKOS_CORE_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/OpenMPTarget/*.cpp) +ENDIF() + IF (KOKKOS_ENABLE_PTHREAD) APPEND_GLOB(KOKKOS_CORE_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/Threads/*.cpp) IF (KOKKOS_ENABLE_ETI) @@ -42,6 +46,10 @@ IF (KOKKOS_ENABLE_PTHREAD) ENDIF() ENDIF() +IF (KOKKOS_ENABLE_HIP) + APPEND_GLOB(KOKKOS_CORE_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/HIP/*.cpp) +ENDIF() + IF (KOKKOS_ENABLE_HPX) APPEND_GLOB(KOKKOS_CORE_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/HPX/*.cpp) ENDIF() diff --git a/core/src/Cuda/Kokkos_CudaSpace.cpp b/core/src/Cuda/Kokkos_CudaSpace.cpp index 24be022d24c..e11961d763a 100644 --- a/core/src/Cuda/Kokkos_CudaSpace.cpp +++ b/core/src/Cuda/Kokkos_CudaSpace.cpp @@ -147,7 +147,7 @@ void CudaSpace::access_error(const void *const) { /*--------------------------------------------------------------------------*/ bool CudaUVMSpace::available() { -#if defined(CUDA_VERSION) && (6000 <= CUDA_VERSION) && !defined(__APPLE__) +#if defined(CUDA_VERSION) && !defined(__APPLE__) enum { UVM_available = true }; #else enum { UVM_available = false }; @@ -219,19 +219,10 @@ void *CudaSpace::allocate(const size_t arg_alloc_size) const { void *CudaUVMSpace::allocate(const size_t arg_alloc_size) const { void *ptr = nullptr; - enum { max_uvm_allocations = 65536 }; - Cuda::impl_static_fence(); if (arg_alloc_size > 0) { Kokkos::Impl::num_uvm_allocations++; - if (Kokkos::Impl::num_uvm_allocations.load() > max_uvm_allocations) { - throw Experimental::CudaRawMemoryAllocationFailure( - arg_alloc_size, 1, - Experimental::RawMemoryAllocationFailure::FailureMode:: - MaximumCudaUVMAllocationsExceeded); - } - auto error_code = cudaMallocManaged(&ptr, arg_alloc_size, cudaMemAttachGlobal); @@ -360,7 +351,8 @@ SharedAllocationRecord::attach_texture_object( resDesc.res.linear.sizeInBytes = alloc_size; resDesc.res.linear.devPtr = alloc_ptr; - CUDA_SAFE_CALL(cudaCreateTextureObject(&tex_obj, &resDesc, &texDesc, NULL)); + CUDA_SAFE_CALL( + cudaCreateTextureObject(&tex_obj, &resDesc, &texDesc, nullptr)); return tex_obj; } @@ -797,6 +789,8 @@ SharedAllocationRecord // Iterate records to print orphaned memory ... void SharedAllocationRecord::print_records( std::ostream &s, const Kokkos::CudaSpace &, bool detail) { + (void)s; + (void)detail; #ifdef KOKKOS_DEBUG SharedAllocationRecord *r = &s_root_record; @@ -869,6 +863,8 @@ void SharedAllocationRecord::print_records( void SharedAllocationRecord::print_records( std::ostream &s, const Kokkos::CudaUVMSpace &, bool detail) { + (void)s; + (void)detail; #ifdef KOKKOS_DEBUG SharedAllocationRecord::print_host_accessible_records( s, "CudaUVM", &s_root_record, detail); @@ -881,6 +877,8 @@ void SharedAllocationRecord::print_records( void SharedAllocationRecord::print_records( std::ostream &s, const Kokkos::CudaHostPinnedSpace &, bool detail) { + (void)s; + (void)detail; #ifdef KOKKOS_DEBUG SharedAllocationRecord::print_host_accessible_records( s, "CudaHostPinned", &s_root_record, detail); @@ -895,7 +893,7 @@ void SharedAllocationRecord::print_records( //============================================================================== void *cuda_resize_scratch_space(std::int64_t bytes, bool force_shrink) { - static void *ptr = NULL; + static void *ptr = nullptr; static std::int64_t current_size = 0; if (current_size == 0) { current_size = bytes; @@ -917,6 +915,27 @@ void *cuda_resize_scratch_space(std::int64_t bytes, bool force_shrink) { return ptr; } +void cuda_prefetch_pointer(const Cuda &space, const void *ptr, size_t bytes, + bool to_device) { + if ((ptr == nullptr) || (bytes == 0)) return; + cudaPointerAttributes attr; + CUDA_SAFE_CALL(cudaPointerGetAttributes(&attr, ptr)); + // I measured this and it turns out prefetching towards the host slows + // DualView syncs down. Probably because the latency is not too bad in the + // first place for the pull down. If we want to change that provde + // cudaCpuDeviceId as the device if to_device is false +#if CUDA_VERSION < 10000 + bool is_managed = attr.isManaged; +#else + bool is_managed = attr.type == cudaMemoryTypeManaged; +#endif + if (to_device && is_managed && + space.cuda_device_prop().concurrentManagedAccess) { + CUDA_SAFE_CALL(cudaMemPrefetchAsync(ptr, bytes, space.cuda_device(), + space.cuda_stream())); + } +} + } // namespace Impl } // namespace Kokkos #else diff --git a/core/src/Cuda/Kokkos_Cuda_Error.hpp b/core/src/Cuda/Kokkos_Cuda_Error.hpp index 3b674bbb30d..01e60315ee0 100644 --- a/core/src/Cuda/Kokkos_Cuda_Error.hpp +++ b/core/src/Cuda/Kokkos_Cuda_Error.hpp @@ -58,10 +58,10 @@ namespace Impl { void cuda_device_synchronize(); void cuda_internal_error_throw(cudaError e, const char* name, - const char* file = NULL, const int line = 0); + const char* file = nullptr, const int line = 0); inline void cuda_internal_safe_call(cudaError e, const char* name, - const char* file = NULL, + const char* file = nullptr, const int line = 0) { if (cudaSuccess != e) { cuda_internal_error_throw(e, name, file, line); diff --git a/core/src/Cuda/Kokkos_Cuda_Instance.cpp b/core/src/Cuda/Kokkos_Cuda_Instance.cpp index b3c7edf67cd..37d0ffb687e 100644 --- a/core/src/Cuda/Kokkos_Cuda_Instance.cpp +++ b/core/src/Cuda/Kokkos_Cuda_Instance.cpp @@ -115,10 +115,14 @@ __global__ void query_cuda_kernel_arch(int *d_arch) { /** Query what compute capability is actually launched to the device: */ int cuda_kernel_arch() { - int *d_arch = 0; + int arch = 0; + int *d_arch = nullptr; + cudaMalloc((void **)&d_arch, sizeof(int)); + cudaMemcpy(d_arch, &arch, sizeof(int), cudaMemcpyDefault); + query_cuda_kernel_arch<<<1, 1>>>(d_arch); - int arch = 0; + cudaMemcpy(&arch, d_arch, sizeof(int), cudaMemcpyDefault); cudaFree(d_arch); return arch; @@ -313,6 +317,7 @@ void CudaInternal::initialize(int cuda_device_id, cudaStream_t stream) { enum { WordSize = sizeof(size_type) }; +#ifndef KOKKOS_IMPL_TURN_OFF_CUDA_HOST_INIT_CHECK #ifdef KOKKOS_ENABLE_DEPRECATED_CODE if (!HostSpace::execution_space::is_initialized()) { #else @@ -323,6 +328,7 @@ void CudaInternal::initialize(int cuda_device_id, cudaStream_t stream) { "initialized"); throw_runtime_exception(msg); } +#endif const CudaInternalDevices &dev_info = CudaInternalDevices::singleton(); @@ -340,7 +346,8 @@ void CudaInternal::initialize(int cuda_device_id, cudaStream_t stream) { if (ok_init && ok_dev) { const struct cudaDeviceProp &cudaProp = dev_info.m_cudaProp[cuda_device_id]; - m_cudaDev = cuda_device_id; + m_cudaDev = cuda_device_id; + m_deviceProp = cudaProp; CUDA_SAFE_CALL(cudaSetDevice(m_cudaDev)); Kokkos::Impl::cuda_device_synchronize(); @@ -348,17 +355,24 @@ void CudaInternal::initialize(int cuda_device_id, cudaStream_t stream) { // Query what compute capability architecture a kernel executes: m_cudaArch = cuda_kernel_arch(); + if (m_cudaArch == 0) { + std::stringstream ss; + ss << "Kokkos::Cuda::initialize ERROR: likely mismatch of architecture" + << std::endl; + std::string msg = ss.str(); + Kokkos::abort(msg.c_str()); + } + int compiled_major = m_cudaArch / 100; int compiled_minor = (m_cudaArch % 100) / 10; - if (compiled_major < 5 && cudaProp.major >= 5) { + if (compiled_major != cudaProp.major || compiled_minor < cudaProp.minor) { std::stringstream ss; ss << "Kokkos::Cuda::initialize ERROR: running kernels compiled for " "compute capability " << compiled_major << "." << compiled_minor - << " (< 5.0) on device with compute capability " << cudaProp.major - << "." << cudaProp.minor - << " (>=5.0), this would give incorrect results!" << std::endl; + << " on device with compute capability " << cudaProp.major << "." + << cudaProp.minor << " is not supported by CUDA!" << std::endl; std::string msg = ss.str(); Kokkos::abort(msg.c_str()); } @@ -742,7 +756,7 @@ int Cuda::impl_is_initialized() void Cuda::initialize(const Cuda::SelectDevice config, size_t num_instances) #else void Cuda::impl_initialize(const Cuda::SelectDevice config, - size_t num_instances) + size_t /*num_instances*/) #endif { Impl::CudaInternal::singleton().initialize(config.cuda_device_id, 0); @@ -826,6 +840,9 @@ const char *Cuda::name() { return "Cuda"; } cudaStream_t Cuda::cuda_stream() const { return m_space_instance->m_stream; } int Cuda::cuda_device() const { return m_space_instance->m_cudaDev; } +const cudaDeviceProp &Cuda::cuda_device_prop() const { + return m_space_instance->m_deviceProp; +} } // namespace Kokkos diff --git a/core/src/Cuda/Kokkos_Cuda_Instance.hpp b/core/src/Cuda/Kokkos_Cuda_Instance.hpp index 9d2c939af85..2158f03dd58 100644 --- a/core/src/Cuda/Kokkos_Cuda_Instance.hpp +++ b/core/src/Cuda/Kokkos_Cuda_Instance.hpp @@ -97,6 +97,8 @@ class CudaInternal { int m_maxThreadsPerSM; int m_maxThreadsPerBlock; + cudaDeviceProp m_deviceProp; + mutable size_type m_scratchSpaceCount; mutable size_type m_scratchFlagsCount; mutable size_type m_scratchUnifiedCount; diff --git a/core/src/Cuda/Kokkos_Cuda_KernelLaunch.hpp b/core/src/Cuda/Kokkos_Cuda_KernelLaunch.hpp index 590fa7a7847..ca72b3b3021 100644 --- a/core/src/Cuda/Kokkos_Cuda_KernelLaunch.hpp +++ b/core/src/Cuda/Kokkos_Cuda_KernelLaunch.hpp @@ -150,7 +150,7 @@ template __global__ static void cuda_parallel_launch_constant_or_global_memory( const DriverType* driver_ptr) { const DriverType& driver = - driver_ptr != NULL + driver_ptr != nullptr ? *driver_ptr : *((const DriverType*)kokkos_impl_cuda_constant_memory_buffer); @@ -162,7 +162,7 @@ __global__ __launch_bounds__(maxTperB, minBperSM) static void cuda_parallel_launch_constant_or_global_memory( const DriverType* driver_ptr) { const DriverType& driver = - driver_ptr != NULL + driver_ptr != nullptr ? *driver_ptr : *((const DriverType*)kokkos_impl_cuda_constant_memory_buffer); @@ -260,6 +260,8 @@ struct CudaParallelLaunch< (prefer_shmem ? cudaFuncCachePreferShared : cudaFuncCachePreferL1))); } +#else + (void)prefer_shmem; #endif // Copy functor to constant memory on the device @@ -317,6 +319,8 @@ struct CudaParallelLaunch, (prefer_shmem ? cudaFuncCachePreferShared : cudaFuncCachePreferL1))); } +#else + (void)prefer_shmem; #endif // Copy functor to constant memory on the device @@ -371,6 +375,8 @@ struct CudaParallelLaunch< (prefer_shmem ? cudaFuncCachePreferShared : cudaFuncCachePreferL1))); } +#else + (void)prefer_shmem; #endif KOKKOS_ENSURE_CUDA_LOCK_ARRAYS_ON_DEVICE(); @@ -419,6 +425,8 @@ struct CudaParallelLaunch, (prefer_shmem ? cudaFuncCachePreferShared : cudaFuncCachePreferL1))); } +#else + (void)prefer_shmem; #endif KOKKOS_ENSURE_CUDA_LOCK_ARRAYS_ON_DEVICE(); @@ -465,11 +473,13 @@ struct CudaParallelLaunch< (prefer_shmem ? cudaFuncCachePreferShared : cudaFuncCachePreferL1))); } +#else + (void)prefer_shmem; #endif KOKKOS_ENSURE_CUDA_LOCK_ARRAYS_ON_DEVICE(); - DriverType* driver_ptr = NULL; + DriverType* driver_ptr = nullptr; driver_ptr = reinterpret_cast( cuda_instance->scratch_functor(sizeof(DriverType))); cudaMemcpyAsync(driver_ptr, &driver, sizeof(DriverType), @@ -516,11 +526,13 @@ struct CudaParallelLaunch, (prefer_shmem ? cudaFuncCachePreferShared : cudaFuncCachePreferL1))); } +#else + (void)prefer_shmem; #endif KOKKOS_ENSURE_CUDA_LOCK_ARRAYS_ON_DEVICE(); - DriverType* driver_ptr = NULL; + DriverType* driver_ptr = nullptr; driver_ptr = reinterpret_cast( cuda_instance->scratch_functor(sizeof(DriverType))); cudaMemcpyAsync(driver_ptr, &driver, sizeof(DriverType), diff --git a/core/src/Cuda/Kokkos_Cuda_Locks.hpp b/core/src/Cuda/Kokkos_Cuda_Locks.hpp index 84a9c3821e3..a4b5d08ccf6 100644 --- a/core/src/Cuda/Kokkos_Cuda_Locks.hpp +++ b/core/src/Cuda/Kokkos_Cuda_Locks.hpp @@ -113,10 +113,10 @@ __device__ #define CUDA_SPACE_ATOMIC_MASK 0x1FFFF -/// \brief Aquire a lock for the address +/// \brief Acquire a lock for the address /// -/// This function tries to aquire the lock for the hash value derived -/// from the provided ptr. If the lock is successfully aquired the +/// This function tries to acquire the lock for the hash value derived +/// from the provided ptr. If the lock is successfully acquired the /// function returns true. Otherwise it returns false. __device__ inline bool lock_address_cuda_space(void* ptr) { size_t offset = size_t(ptr); @@ -131,7 +131,7 @@ __device__ inline bool lock_address_cuda_space(void* ptr) { /// /// This function releases the lock for the hash value derived /// from the provided ptr. This function should only be called -/// after previously successfully aquiring a lock with +/// after previously successfully acquiring a lock with /// lock_address. __device__ inline void unlock_address_cuda_space(void* ptr) { size_t offset = size_t(ptr); diff --git a/core/src/Cuda/Kokkos_Cuda_Parallel.hpp b/core/src/Cuda/Kokkos_Cuda_Parallel.hpp index 4c5fe4b7f14..71ddadf74e2 100644 --- a/core/src/Cuda/Kokkos_Cuda_Parallel.hpp +++ b/core/src/Cuda/Kokkos_Cuda_Parallel.hpp @@ -192,7 +192,7 @@ class TeamPolicyInternal } template - inline int team_size_max(const FunctorType& f, const ReducerType& r, + inline int team_size_max(const FunctorType& f, const ReducerType& /*r*/, const ParallelReduceTag&) const { using closure_type = Impl::ParallelReduce, @@ -304,7 +304,7 @@ class TeamPolicyInternal return m_thread_scratch_size[level]; } - inline typename traits::execution_space space() const { return m_space; } + const typename traits::execution_space& space() const { return m_space; } TeamPolicyInternal() : m_space(typename traits::execution_space()), @@ -325,13 +325,13 @@ class TeamPolicyInternal m_team_scratch_size{0, 0}, m_thread_scratch_size{0, 0}, m_chunk_size(32) { - // Make sure league size is permissable + // Make sure league size is permissible if (league_size_ >= int(Impl::cuda_internal_maximum_grid_count())) Impl::throw_runtime_exception( "Requested too large league_size for TeamPolicy on Cuda execution " "space."); - // Make sure total block size is permissable + // Make sure total block size is permissible if (m_team_size * m_vector_length > 1024) { Impl::throw_runtime_exception( std::string("Kokkos::TeamPolicy< Cuda > the team size is too large. " @@ -351,7 +351,7 @@ class TeamPolicyInternal m_team_scratch_size{0, 0}, m_thread_scratch_size{0, 0}, m_chunk_size(32) { - // Make sure league size is permissable + // Make sure league size is permissible if (league_size_ >= int(Impl::cuda_internal_maximum_grid_count())) Impl::throw_runtime_exception( "Requested too large league_size for TeamPolicy on Cuda execution " @@ -367,13 +367,13 @@ class TeamPolicyInternal m_team_scratch_size{0, 0}, m_thread_scratch_size{0, 0}, m_chunk_size(32) { - // Make sure league size is permissable + // Make sure league size is permissible if (league_size_ >= int(Impl::cuda_internal_maximum_grid_count())) Impl::throw_runtime_exception( "Requested too large league_size for TeamPolicy on Cuda execution " "space."); - // Make sure total block size is permissable + // Make sure total block size is permissible if (m_team_size * m_vector_length > 1024) { Impl::throw_runtime_exception( std::string("Kokkos::TeamPolicy< Cuda > the team size is too large. " @@ -392,7 +392,7 @@ class TeamPolicyInternal m_team_scratch_size{0, 0}, m_thread_scratch_size{0, 0}, m_chunk_size(32) { - // Make sure league size is permissable + // Make sure league size is permissible if (league_size_ >= int(Impl::cuda_internal_maximum_grid_count())) Impl::throw_runtime_exception( "Requested too large league_size for TeamPolicy on Cuda execution " @@ -895,10 +895,10 @@ class ParallelFor, // Functor's reduce memory, team scan memory, and team shared memory depend // upon team size. - m_scratch_ptr[0] = NULL; + m_scratch_ptr[0] = nullptr; m_scratch_ptr[1] = m_team_size <= 0 - ? NULL + ? nullptr : cuda_resize_scratch_space( static_cast(m_scratch_size[1]) * static_cast(Cuda::concurrency() / @@ -1207,7 +1207,7 @@ class ParallelReduce, ReducerType, ParallelReduce(const FunctorType& arg_functor, const Policy& arg_policy, const ViewType& arg_result, typename std::enable_if::value, - void*>::type = NULL) + void*>::type = nullptr) : m_functor(arg_functor), m_policy(arg_policy), m_reducer(InvalidType()), @@ -1499,7 +1499,7 @@ class ParallelReduce, ReducerType, ParallelReduce(const FunctorType& arg_functor, const Policy& arg_policy, const ViewType& arg_result, typename std::enable_if::value, - void*>::type = NULL) + void*>::type = nullptr) : m_functor(arg_functor), m_policy(arg_policy), m_reducer(InvalidType()), @@ -1810,7 +1810,7 @@ class ParallelReduce, ParallelReduce(const FunctorType& arg_functor, const Policy& arg_policy, const ViewType& arg_result, typename std::enable_if::value, - void*>::type = NULL) + void*>::type = nullptr) : m_functor(arg_functor), m_policy(arg_policy), m_reducer(InvalidType()), @@ -1824,7 +1824,7 @@ class ParallelReduce, m_team_begin(0), m_shmem_begin(0), m_shmem_size(0), - m_scratch_ptr{NULL, NULL}, + m_scratch_ptr{nullptr, nullptr}, m_league_size(arg_policy.league_size()), m_team_size(arg_policy.team_size()), m_vector_size(arg_policy.vector_length()) { @@ -1861,7 +1861,7 @@ class ParallelReduce, m_scratch_size[1] = m_policy.scratch_size(1, m_team_size); m_scratch_ptr[1] = m_team_size <= 0 - ? NULL + ? nullptr : cuda_resize_scratch_space( static_cast(m_scratch_size[1]) * (static_cast(Cuda::concurrency() / @@ -1923,7 +1923,7 @@ class ParallelReduce, m_team_begin(0), m_shmem_begin(0), m_shmem_size(0), - m_scratch_ptr{NULL, NULL}, + m_scratch_ptr{nullptr, nullptr}, m_league_size(arg_policy.league_size()), m_team_size(arg_policy.team_size()), m_vector_size(arg_policy.vector_length()) { @@ -1960,7 +1960,7 @@ class ParallelReduce, m_scratch_size[1] = m_policy.scratch_size(1, m_team_size); m_scratch_ptr[1] = m_team_size <= 0 - ? NULL + ? nullptr : cuda_resize_scratch_space( static_cast(m_scratch_size[1]) * static_cast(Cuda::concurrency() / diff --git a/core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp b/core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp index 41431bfb8da..7d996fba043 100644 --- a/core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp +++ b/core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp @@ -133,7 +133,8 @@ __device__ bool cuda_inter_block_reduction( typename FunctorValueTraits::reference_type value, typename FunctorValueTraits::reference_type neutral, const JoinOp& join, Cuda::size_type* const m_scratch_space, - typename FunctorValueTraits::pointer_type const result, + typename FunctorValueTraits::pointer_type const /*result*/, Cuda::size_type* const m_scratch_flags, const int max_active_thread = blockDim.y) { #ifdef __CUDA_ARCH__ @@ -236,6 +237,12 @@ __device__ bool cuda_inter_block_reduction( // "value" return last_block; #else + (void)value; + (void)neutral; + (void)join; + (void)m_scratch_space; + (void)m_scratch_flags; + (void)max_active_thread; return true; #endif } @@ -426,6 +433,10 @@ __device__ inline // "value" return last_block; #else + (void)reducer; + (void)m_scratch_space; + (void)m_scratch_flags; + (void)max_active_thread; return true; #endif } @@ -500,7 +511,7 @@ struct CudaReductionsFunctor { } __device__ static inline bool scalar_inter_block_reduction( - const FunctorType& functor, const Cuda::size_type block_id, + const FunctorType& functor, const Cuda::size_type /*block_id*/, const Cuda::size_type block_count, Cuda::size_type* const shared_data, Cuda::size_type* const global_data, Cuda::size_type* const global_flags) { Scalar* const global_team_buffer_element = ((Scalar*)global_data); @@ -577,7 +588,7 @@ struct CudaReductionsFunctor { __device__ static inline void scalar_intra_block_reduction( const FunctorType& functor, Scalar value, const bool skip, Scalar* result, - const int shared_elements, Scalar* shared_team_buffer_element) { + const int /*shared_elements*/, Scalar* shared_team_buffer_element) { const int warp_id = (threadIdx.y * blockDim.x) / 32; Scalar* const my_shared_team_buffer_element = shared_team_buffer_element + threadIdx.y * blockDim.x + threadIdx.x; @@ -601,7 +612,7 @@ struct CudaReductionsFunctor { } __device__ static inline bool scalar_inter_block_reduction( - const FunctorType& functor, const Cuda::size_type block_id, + const FunctorType& functor, const Cuda::size_type /*block_id*/, const Cuda::size_type block_count, Cuda::size_type* const shared_data, Cuda::size_type* const global_data, Cuda::size_type* const global_flags) { Scalar* const global_team_buffer_element = ((Scalar*)global_data); diff --git a/core/src/Cuda/Kokkos_Cuda_Task.hpp b/core/src/Cuda/Kokkos_Cuda_Task.hpp index 237d2430d6c..decbecc5e67 100644 --- a/core/src/Cuda/Kokkos_Cuda_Task.hpp +++ b/core/src/Cuda/Kokkos_Cuda_Task.hpp @@ -128,7 +128,7 @@ class TaskQueueSpecialization> { auto current_task = OptionalRef(); // Loop until all queues are empty and no tasks in flight - while (not queue.is_done()) { + while (!queue.is_done()) { if (warp_lane == 0) { // should be (?) same as team_exec.team_rank() == 0 // pop off a task current_task = diff --git a/core/src/Cuda/Kokkos_Cuda_Team.hpp b/core/src/Cuda/Kokkos_Cuda_Team.hpp index ac9ab9660cb..d9d5ed0bf32 100644 --- a/core/src/Cuda/Kokkos_Cuda_Team.hpp +++ b/core/src/Cuda/Kokkos_Cuda_Team.hpp @@ -164,6 +164,8 @@ class CudaTeamMember { template KOKKOS_INLINE_FUNCTION void team_broadcast(ValueType& val, const int& thread_id) const { + (void)val; + (void)thread_id; #ifdef __CUDA_ARCH__ if (1 == blockDim.z) { // team == block __syncthreads(); @@ -184,6 +186,9 @@ class CudaTeamMember { template KOKKOS_INLINE_FUNCTION void team_broadcast(Closure const& f, ValueType& val, const int& thread_id) const { + (void)f; + (void)val; + (void)thread_id; #ifdef __CUDA_ARCH__ f(val); @@ -230,6 +235,8 @@ class CudaTeamMember { typename std::enable_if::value>::type team_reduce(ReducerType const& reducer, typename ReducerType::value_type& value) const noexcept { + (void)reducer; + (void)value; #ifdef __CUDA_ARCH__ cuda_intra_block_reduction(reducer, value, blockDim.y); #endif /* #ifdef __CUDA_ARCH__ */ @@ -274,6 +281,8 @@ class CudaTeamMember { return base_data[threadIdx.y]; #else + (void)value; + (void)global_accum; return Type(); #endif } @@ -302,6 +311,8 @@ class CudaTeamMember { typename std::enable_if::value>::type vector_reduce(ReducerType const& reducer, typename ReducerType::value_type& value) { + (void)reducer; + (void)value; #ifdef __CUDA_ARCH__ if (blockDim.x == 1) return; @@ -509,6 +520,11 @@ class CudaTeamMember { return 0; #else + (void)reducer; + (void)global_scratch_flags; + (void)global_scratch_space; + (void)shmem; + (void)shmem_size; return 0; #endif } @@ -683,6 +699,8 @@ KOKKOS_INLINE_FUNCTION void parallel_for( const Impl::TeamThreadRangeBoundariesStruct& loop_boundaries, const Closure& closure) { + (void)loop_boundaries; + (void)closure; #ifdef __CUDA_ARCH__ for (iType i = loop_boundaries.start + threadIdx.y; i < loop_boundaries.end; i += blockDim.y) @@ -706,6 +724,9 @@ KOKKOS_INLINE_FUNCTION parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct< iType, Impl::CudaTeamMember>& loop_boundaries, const Closure& closure, const ReducerType& reducer) { + (void)loop_boundaries; + (void)closure; + (void)reducer; #ifdef __CUDA_ARCH__ typename ReducerType::value_type value; reducer.init(value); @@ -734,6 +755,9 @@ KOKKOS_INLINE_FUNCTION parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct< iType, Impl::CudaTeamMember>& loop_boundaries, const Closure& closure, ValueType& result) { + (void)loop_boundaries; + (void)closure; + (void)result; #ifdef __CUDA_ARCH__ ValueType val; Kokkos::Sum reducer(val); @@ -755,6 +779,8 @@ KOKKOS_INLINE_FUNCTION void parallel_for( const Impl::TeamVectorRangeBoundariesStruct& loop_boundaries, const Closure& closure) { + (void)loop_boundaries; + (void)closure; #ifdef __CUDA_ARCH__ for (iType i = loop_boundaries.start + threadIdx.y * blockDim.x + threadIdx.x; i < loop_boundaries.end; i += blockDim.y * blockDim.x) @@ -768,6 +794,9 @@ KOKKOS_INLINE_FUNCTION parallel_reduce(const Impl::TeamVectorRangeBoundariesStruct< iType, Impl::CudaTeamMember>& loop_boundaries, const Closure& closure, const ReducerType& reducer) { + (void)loop_boundaries; + (void)closure; + (void)reducer; #ifdef __CUDA_ARCH__ typename ReducerType::value_type value; reducer.init(value); @@ -788,6 +817,9 @@ KOKKOS_INLINE_FUNCTION parallel_reduce(const Impl::TeamVectorRangeBoundariesStruct< iType, Impl::CudaTeamMember>& loop_boundaries, const Closure& closure, ValueType& result) { + (void)loop_boundaries; + (void)closure; + (void)result; #ifdef __CUDA_ARCH__ ValueType val; Kokkos::Sum reducer(val); @@ -818,6 +850,8 @@ KOKKOS_INLINE_FUNCTION void parallel_for( const Impl::ThreadVectorRangeBoundariesStruct& loop_boundaries, const Closure& closure) { + (void)loop_boundaries; + (void)closure; #ifdef __CUDA_ARCH__ for (iType i = loop_boundaries.start + threadIdx.x; i < loop_boundaries.end; i += blockDim.x) { @@ -853,6 +887,9 @@ KOKKOS_INLINE_FUNCTION parallel_reduce(Impl::ThreadVectorRangeBoundariesStruct< iType, Impl::CudaTeamMember> const& loop_boundaries, Closure const& closure, ReducerType const& reducer) { + (void)loop_boundaries; + (void)closure; + (void)reducer; #ifdef __CUDA_ARCH__ reducer.init(reducer.reference()); @@ -884,6 +921,9 @@ KOKKOS_INLINE_FUNCTION parallel_reduce(Impl::ThreadVectorRangeBoundariesStruct< iType, Impl::CudaTeamMember> const& loop_boundaries, Closure const& closure, ValueType& result) { + (void)loop_boundaries; + (void)closure; + (void)result; #ifdef __CUDA_ARCH__ result = ValueType(); @@ -912,6 +952,8 @@ KOKKOS_INLINE_FUNCTION void parallel_scan( const Impl::ThreadVectorRangeBoundariesStruct& loop_boundaries, const Closure& closure) { + (void)loop_boundaries; + (void)closure; #ifdef __CUDA_ARCH__ // Extract value_type from closure @@ -986,6 +1028,7 @@ template KOKKOS_INLINE_FUNCTION void single( const Impl::VectorSingleStruct&, const FunctorType& lambda) { + (void)lambda; #ifdef __CUDA_ARCH__ if (threadIdx.x == 0) lambda(); #ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK @@ -1003,6 +1046,7 @@ template KOKKOS_INLINE_FUNCTION void single( const Impl::ThreadSingleStruct&, const FunctorType& lambda) { + (void)lambda; #ifdef __CUDA_ARCH__ if (threadIdx.x == 0 && threadIdx.y == 0) lambda(); #ifdef KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK @@ -1020,6 +1064,8 @@ template KOKKOS_INLINE_FUNCTION void single( const Impl::VectorSingleStruct&, const FunctorType& lambda, ValueType& val) { + (void)lambda; + (void)val; #ifdef __CUDA_ARCH__ if (threadIdx.x == 0) lambda(val); unsigned mask = blockDim.x == 32 @@ -1034,6 +1080,9 @@ template KOKKOS_INLINE_FUNCTION void single( const Impl::ThreadSingleStruct& single_struct, const FunctorType& lambda, ValueType& val) { + (void)single_struct; + (void)lambda; + (void)val; #ifdef __CUDA_ARCH__ if (threadIdx.x == 0 && threadIdx.y == 0) { lambda(val); diff --git a/core/src/Cuda/Kokkos_Cuda_UniqueToken.hpp b/core/src/Cuda/Kokkos_Cuda_UniqueToken.hpp index a0de4eaa7f4..3b470edbc35 100644 --- a/core/src/Cuda/Kokkos_Cuda_UniqueToken.hpp +++ b/core/src/Cuda/Kokkos_Cuda_UniqueToken.hpp @@ -76,41 +76,17 @@ class UniqueToken { explicit UniqueToken(execution_space const& = execution_space()); #endif -#ifdef KOKKOS_CUDA_9_DEFAULTED_BUG_WORKAROUND - KOKKOS_INLINE_FUNCTION - UniqueToken(const UniqueToken& rhs) - : m_buffer(rhs.m_buffer), m_count(rhs.m_count) {} - - KOKKOS_INLINE_FUNCTION - UniqueToken(UniqueToken&& rhs) - : m_buffer(std::move(rhs.m_buffer)), m_count(std::move(rhs.m_count)) {} - - KOKKOS_INLINE_FUNCTION - UniqueToken& operator=(const UniqueToken& rhs) { - m_buffer = rhs.m_buffer; - m_count = rhs.m_count; - return *this; - } - - KOKKOS_INLINE_FUNCTION - UniqueToken& operator=(UniqueToken&& rhs) { - m_buffer = std::move(rhs.m_buffer); - m_count = std::move(rhs.m_count); - return *this; - } -#else - KOKKOS_INLINE_FUNCTION + KOKKOS_DEFAULTED_FUNCTION UniqueToken(const UniqueToken&) = default; - KOKKOS_INLINE_FUNCTION + KOKKOS_DEFAULTED_FUNCTION UniqueToken(UniqueToken&&) = default; - KOKKOS_INLINE_FUNCTION + KOKKOS_DEFAULTED_FUNCTION UniqueToken& operator=(const UniqueToken&) = default; - KOKKOS_INLINE_FUNCTION + KOKKOS_DEFAULTED_FUNCTION UniqueToken& operator=(UniqueToken&&) = default; -#endif /// \brief upper bound for acquired values, i.e. 0 <= value < size() KOKKOS_INLINE_FUNCTION diff --git a/core/src/Cuda/Kokkos_Cuda_Vectorization.hpp b/core/src/Cuda/Kokkos_Cuda_Vectorization.hpp index 085262b8040..f75d2e56f7a 100644 --- a/core/src/Cuda/Kokkos_Cuda_Vectorization.hpp +++ b/core/src/Cuda/Kokkos_Cuda_Vectorization.hpp @@ -55,7 +55,7 @@ namespace Kokkos { namespace Impl { // Include all lanes -constexpr unsigned shfl_all_mask = 0xffffffff; +constexpr unsigned shfl_all_mask = 0xffffffffu; //---------------------------------------------------------------------------- // Shuffle operations require input to be a register (stack) variable @@ -71,20 +71,32 @@ struct in_place_shfl_op { return *static_cast(this); } - // sizeof(Scalar) == sizeof(int) case + // sizeof(Scalar) <= sizeof(int) case template // requires _assignable_from_bits - __device__ inline typename std::enable_if::type + __device__ inline typename std::enable_if::type operator()(Scalar& out, Scalar const& in, int lane_or_delta, int width, unsigned mask = shfl_all_mask) const noexcept { + using shfl_type = int; + union conv_type { + Scalar orig; + shfl_type conv; + }; + conv_type tmp_in; + tmp_in.orig = in; + conv_type tmp_out; + tmp_out.conv = tmp_in.conv; + conv_type res; //------------------------------------------------ - reinterpret_cast(out) = self().do_shfl_op( - mask, reinterpret_cast(in), lane_or_delta, width); + res.conv = self().do_shfl_op( + mask, reinterpret_cast(tmp_out.conv), lane_or_delta, + width); //------------------------------------------------ + out = res.orig; } // TODO: figure out why 64-bit shfl fails in Clang -#if (CUDA_VERSION >= 9000) && (!defined(KOKKOS_COMPILER_CLANG)) +#if !defined(KOKKOS_COMPILER_CLANG) // sizeof(Scalar) == sizeof(double) case // requires _assignable_from_bits template @@ -140,6 +152,10 @@ struct in_place_shfl_fn : in_place_shfl_op { __device__ KOKKOS_IMPL_FORCEINLINE T do_shfl_op(unsigned mask, T& val, int lane, int width) const noexcept { + (void)mask; + (void)val; + (void)lane; + (void)width; return KOKKOS_IMPL_CUDA_SHFL_MASK(mask, val, lane, width); } }; @@ -167,6 +183,10 @@ struct in_place_shfl_down_fn : in_place_shfl_op { __device__ KOKKOS_IMPL_FORCEINLINE T do_shfl_op(unsigned mask, T& val, int lane, int width) const noexcept { + (void)mask; + (void)val; + (void)lane; + (void)width; return KOKKOS_IMPL_CUDA_SHFL_DOWN_MASK(mask, val, lane, width); } }; diff --git a/core/src/Cuda/Kokkos_Cuda_Version_9_8_Compatibility.hpp b/core/src/Cuda/Kokkos_Cuda_Version_9_8_Compatibility.hpp index 66231e55f9c..0cdd84ce271 100644 --- a/core/src/Cuda/Kokkos_Cuda_Version_9_8_Compatibility.hpp +++ b/core/src/Cuda/Kokkos_Cuda_Version_9_8_Compatibility.hpp @@ -1,20 +1,6 @@ #include #if defined(__CUDA_ARCH__) -#if (CUDA_VERSION < 9000) -#define KOKKOS_IMPL_CUDA_ACTIVEMASK 0 -#define KOKKOS_IMPL_CUDA_SYNCWARP __threadfence_block() -#define KOKKOS_IMPL_CUDA_SYNCWARP_MASK(m) \ - if (m) __threadfence_block() -#define KOKKOS_IMPL_CUDA_BALLOT(x) __ballot(x) -#define KOKKOS_IMPL_CUDA_BALLOT_MASK(m, x) __ballot(x) -#define KOKKOS_IMPL_CUDA_SHFL(x, y, z) __shfl(x, y, z) -#define KOKKOS_IMPL_CUDA_SHFL_MASK(m, x, y, z) __shfl(x, y, z) -#define KOKKOS_IMPL_CUDA_SHFL_UP(x, y, z) __shfl_up(x, y, z) -#define KOKKOS_IMPL_CUDA_SHFL_UP_MASK(m, x, y, z) __shfl_up(x, y, z) -#define KOKKOS_IMPL_CUDA_SHFL_DOWN(x, y, z) __shfl_down(x, y, z) -#define KOKKOS_IMPL_CUDA_SHFL_DOWN_MASK(m, x, y, z) __shfl_down(x, y, z) -#else #define KOKKOS_IMPL_CUDA_ACTIVEMASK __activemask() #define KOKKOS_IMPL_CUDA_SYNCWARP __syncwarp(0xffffffff) #define KOKKOS_IMPL_CUDA_SYNCWARP_MASK(m) __syncwarp(m) @@ -27,7 +13,6 @@ #define KOKKOS_IMPL_CUDA_SHFL_DOWN(x, y, z) \ __shfl_down_sync(0xffffffff, x, y, z) #define KOKKOS_IMPL_CUDA_SHFL_DOWN_MASK(m, x, y, z) __shfl_down_sync(m, x, y, z) -#endif #else #define KOKKOS_IMPL_CUDA_ACTIVEMASK 0 #define KOKKOS_IMPL_CUDA_SYNCWARP @@ -41,25 +26,13 @@ #define KOKKOS_IMPL_CUDA_SHFL_DOWN_MASK(m, x, y, z) 0 #endif -#if (CUDA_VERSION >= 9000) && (!defined(KOKKOS_COMPILER_CLANG)) +#if !defined(KOKKOS_COMPILER_CLANG) #define KOKKOS_IMPL_CUDA_MAX_SHFL_SIZEOF sizeof(long long) #else #define KOKKOS_IMPL_CUDA_MAX_SHFL_SIZEOF sizeof(int) #endif #if defined(__CUDA_ARCH__) -#if (CUDA_VERSION < 9000) -#define KOKKOS_IMPL_CUDA_SYNCWARP_OR_RETURN(MSG) \ - { \ - const unsigned b = __ballot(1); \ - if (b != 0xffffffff) { \ - printf(" SYNCWARP AT %s (%d,%d,%d) (%d,%d,%d) failed %x\n", MSG, \ - blockIdx.x, blockIdx.y, blockIdx.z, threadIdx.x, threadIdx.y, \ - threadIdx.z, b); \ - return; \ - } \ - } -#else #define KOKKOS_IMPL_CUDA_SYNCWARP_OR_RETURN(MSG) \ { \ __syncwarp(); \ @@ -71,7 +44,6 @@ return; \ } \ } -#endif #else #define KOKKOS_IMPL_CUDA_SYNCWARP_OR_RETURN(MSG) #endif diff --git a/core/src/Cuda/Kokkos_Cuda_View.hpp b/core/src/Cuda/Kokkos_Cuda_View.hpp index 08fdbea3872..364f334a4cd 100644 --- a/core/src/Cuda/Kokkos_Cuda_View.hpp +++ b/core/src/Cuda/Kokkos_Cuda_View.hpp @@ -84,8 +84,8 @@ struct CudaTextureFetch { KOKKOS_INLINE_FUNCTION CudaTextureFetch() : m_obj(), m_ptr(), m_offset() {} - KOKKOS_INLINE_FUNCTION - ~CudaTextureFetch() {} + KOKKOS_DEFAULTED_FUNCTION + ~CudaTextureFetch() = default; KOKKOS_INLINE_FUNCTION CudaTextureFetch(const CudaTextureFetch& rhs) @@ -153,8 +153,8 @@ struct CudaLDGFetch { KOKKOS_INLINE_FUNCTION CudaLDGFetch() : m_ptr() {} - KOKKOS_INLINE_FUNCTION - ~CudaLDGFetch() {} + KOKKOS_DEFAULTED_FUNCTION + ~CudaLDGFetch() = default; KOKKOS_INLINE_FUNCTION CudaLDGFetch(const CudaLDGFetch& rhs) : m_ptr(rhs.m_ptr) {} @@ -250,7 +250,7 @@ class ViewDataHandle< KOKKOS_INLINE_FUNCTION static handle_type assign(value_type* arg_data_ptr, track_type const& arg_tracker) { - if (arg_data_ptr == NULL) return handle_type(); + if (arg_data_ptr == nullptr) return handle_type(); #if defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST) // Assignment of texture = non-texture requires creation of a texture object @@ -273,6 +273,7 @@ class ViewDataHandle< return handle_type(arg_data_ptr, r); #else + (void)arg_tracker; Kokkos::Impl::cuda_abort( "Cannot create Cuda texture object from within a Cuda kernel"); return handle_type(); diff --git a/core/src/Cuda/Kokkos_Cuda_fwd.hpp b/core/src/Cuda/Kokkos_Cuda_fwd.hpp new file mode 100644 index 00000000000..4bda5e9411f --- /dev/null +++ b/core/src/Cuda/Kokkos_Cuda_fwd.hpp @@ -0,0 +1,67 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_CUDA_FWD_HPP_ +#define KOKKOS_CUDA_FWD_HPP_ +#if defined(KOKKOS_ENABLE_CUDA) +namespace Kokkos { + +class CudaSpace; ///< Memory space on Cuda GPU +class CudaUVMSpace; ///< Memory space on Cuda GPU with UVM +class CudaHostPinnedSpace; ///< Memory space on Host accessible to Cuda GPU +class Cuda; ///< Execution space for Cuda GPU + +namespace Impl { + +template +void cuda_prefetch_pointer(const ExecSpace& /*space*/, const void* /*ptr*/, + size_t /*bytes*/, bool /*to_device*/) {} + +void cuda_prefetch_pointer(const Cuda& space, const void* ptr, size_t bytes, + bool to_device); + +} // namespace Impl +} // namespace Kokkos +#endif +#endif diff --git a/core/src/HIP/KokkosExp_HIP_IterateTile.hpp b/core/src/HIP/KokkosExp_HIP_IterateTile.hpp new file mode 100644 index 00000000000..e2525d70c17 --- /dev/null +++ b/core/src/HIP/KokkosExp_HIP_IterateTile.hpp @@ -0,0 +1,3296 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_HIP_EXP_ITERATE_TILE_REFACTOR_HPP +#define KOKKOS_HIP_EXP_ITERATE_TILE_REFACTOR_HPP + +#include +#if defined(__HIPCC__) + +#include +#include +#include + +#include + +#if defined(KOKKOS_ENABLE_PROFILING) +#include +#include +#endif + +namespace Kokkos { +namespace Impl { + +// ------------------------------------------------------------------ // +// ParallelFor iteration pattern +template +struct DeviceIterateTile; + +// Rank 2 +// Specializations for void tag type +template +struct DeviceIterateTile<2, PolicyType, Functor, void> { + using index_type = typename PolicyType::index_type; + + __device__ DeviceIterateTile(const PolicyType& policy_, const Functor& f_) + : m_policy(policy_), m_func(f_) {} + + KOKKOS_INLINE_FUNCTION + void exec_range() const { + // LL + if (PolicyType::inner_direction == PolicyType::Left) { + for (index_type tile_id1 = static_cast(hipBlockIdx_y); + tile_id1 < m_policy.m_tile_end[1]; tile_id1 += hipGridDim_y) { + const index_type offset_1 = + tile_id1 * m_policy.m_tile[1] + + static_cast(hipThreadIdx_y) + + static_cast(m_policy.m_lower[1]); + if (offset_1 < m_policy.m_upper[1] && + static_cast(hipThreadIdx_y) < m_policy.m_tile[1]) { + for (index_type tile_id0 = static_cast(hipBlockIdx_x); + tile_id0 < m_policy.m_tile_end[0]; tile_id0 += hipGridDim_x) { + const index_type offset_0 = + tile_id0 * m_policy.m_tile[0] + + static_cast(hipThreadIdx_x) + + static_cast(m_policy.m_lower[0]); + if (offset_0 < m_policy.m_upper[0] && + static_cast(hipThreadIdx_x) < m_policy.m_tile[0]) { + m_func(offset_0, offset_1); + } + } + } + } + } + // LR + else { + for (index_type tile_id0 = static_cast(hipBlockIdx_x); + tile_id0 < m_policy.m_tile_end[0]; tile_id0 += hipGridDim_x) { + const index_type offset_0 = + tile_id0 * m_policy.m_tile[0] + + static_cast(hipThreadIdx_x) + + static_cast(m_policy.m_lower[0]); + if (offset_0 < m_policy.m_upper[0] && + static_cast(hipThreadIdx_x) < m_policy.m_tile[0]) { + for (index_type tile_id1 = static_cast(hipBlockIdx_y); + tile_id1 < m_policy.m_tile_end[1]; tile_id1 += hipGridDim_y) { + const index_type offset_1 = + tile_id1 * m_policy.m_tile[1] + + static_cast(hipThreadIdx_y) + + static_cast(m_policy.m_lower[1]); + if (offset_1 < m_policy.m_upper[1] && + static_cast(hipThreadIdx_y) < m_policy.m_tile[1]) { + m_func(offset_0, offset_1); + } + } + } + } + } + } // end exec_range + + private: + const PolicyType& m_policy; + const Functor& m_func; +}; + +// Specializations for tag type +template +struct DeviceIterateTile<2, PolicyType, Functor, Tag> { + using index_type = typename PolicyType::index_type; + + KOKKOS_INLINE_FUNCTION + DeviceIterateTile(const PolicyType& policy_, const Functor& f_) + : m_policy(policy_), m_func(f_) {} + + KOKKOS_INLINE_FUNCTION + void exec_range() const { + if (PolicyType::inner_direction == PolicyType::Left) { + // Loop over size maxnumblocks until full range covered + for (index_type tile_id1 = static_cast(hipBlockIdx_y); + tile_id1 < m_policy.m_tile_end[1]; tile_id1 += hipGridDim_y) { + const index_type offset_1 = + tile_id1 * m_policy.m_tile[1] + + static_cast(hipThreadIdx_y) + + static_cast(m_policy.m_lower[1]); + if (offset_1 < m_policy.m_upper[1] && + static_cast(hipThreadIdx_y) < m_policy.m_tile[1]) { + for (index_type tile_id0 = static_cast(hipBlockIdx_x); + tile_id0 < m_policy.m_tile_end[0]; tile_id0 += hipGridDim_x) { + const index_type offset_0 = + tile_id0 * m_policy.m_tile[0] + + static_cast(hipThreadIdx_x) + + static_cast(m_policy.m_lower[0]); + if (offset_0 < m_policy.m_upper[0] && + static_cast(hipThreadIdx_x) < m_policy.m_tile[0]) { + m_func(Tag(), offset_0, offset_1); + } + } + } + } + } else { + for (index_type tile_id0 = static_cast(hipBlockIdx_x); + tile_id0 < m_policy.m_tile_end[0]; tile_id0 += hipGridDim_x) { + const index_type offset_0 = + tile_id0 * m_policy.m_tile[0] + + static_cast(hipThreadIdx_x) + + static_cast(m_policy.m_lower[0]); + if (offset_0 < m_policy.m_upper[0] && + static_cast(hipThreadIdx_x) < m_policy.m_tile[0]) { + for (index_type tile_id1 = static_cast(hipBlockIdx_y); + tile_id1 < m_policy.m_tile_end[1]; tile_id1 += hipGridDim_y) { + const index_type offset_1 = + tile_id1 * m_policy.m_tile[1] + + static_cast(hipThreadIdx_y) + + static_cast(m_policy.m_lower[1]); + if (offset_1 < m_policy.m_upper[1] && + static_cast(hipThreadIdx_y) < m_policy.m_tile[1]) { + m_func(Tag(), offset_0, offset_1); + } + } + } + } + } + } // end exec_range + + private: + const PolicyType& m_policy; + const Functor& m_func; +}; + +// Rank 3 +// Specializations for void tag type +template +struct DeviceIterateTile<3, PolicyType, Functor, void> { + using index_type = typename PolicyType::index_type; + + __device__ DeviceIterateTile(const PolicyType& policy_, const Functor& f_) + : m_policy(policy_), m_func(f_) {} + + KOKKOS_INLINE_FUNCTION + void exec_range() const { + // LL + if (PolicyType::inner_direction == PolicyType::Left) { + for (index_type tile_id2 = static_cast(hipBlockIdx_z); + tile_id2 < m_policy.m_tile_end[2]; tile_id2 += hipGridDim_z) { + const index_type offset_2 = + tile_id2 * m_policy.m_tile[2] + + static_cast(hipThreadIdx_z) + + static_cast(m_policy.m_lower[2]); + if (offset_2 < m_policy.m_upper[2] && + static_cast(hipThreadIdx_z) < m_policy.m_tile[2]) { + for (index_type tile_id1 = static_cast(hipBlockIdx_y); + tile_id1 < m_policy.m_tile_end[1]; tile_id1 += hipGridDim_y) { + const index_type offset_1 = + tile_id1 * m_policy.m_tile[1] + + static_cast(hipThreadIdx_y) + + static_cast(m_policy.m_lower[1]); + if (offset_1 < m_policy.m_upper[1] && + static_cast(hipThreadIdx_y) < m_policy.m_tile[1]) { + for (index_type tile_id0 = static_cast(hipBlockIdx_x); + tile_id0 < m_policy.m_tile_end[0]; + tile_id0 += hipGridDim_x) { + const index_type offset_0 = + tile_id0 * m_policy.m_tile[0] + + static_cast(hipThreadIdx_x) + + static_cast(m_policy.m_lower[0]); + if (offset_0 < m_policy.m_upper[0] && + static_cast(hipThreadIdx_x) < + m_policy.m_tile[0]) { + m_func(offset_0, offset_1, offset_2); + } + } + } + } + } + } + } + // LR + else { + for (index_type tile_id0 = static_cast(hipBlockIdx_x); + tile_id0 < m_policy.m_tile_end[0]; tile_id0 += hipGridDim_x) { + const index_type offset_0 = + tile_id0 * m_policy.m_tile[0] + + static_cast(hipThreadIdx_x) + + static_cast(m_policy.m_lower[0]); + if (offset_0 < m_policy.m_upper[0] && + static_cast(hipThreadIdx_x) < m_policy.m_tile[0]) { + for (index_type tile_id1 = static_cast(hipBlockIdx_y); + tile_id1 < m_policy.m_tile_end[1]; tile_id1 += hipGridDim_y) { + const index_type offset_1 = + tile_id1 * m_policy.m_tile[1] + + static_cast(hipThreadIdx_y) + + static_cast(m_policy.m_lower[1]); + if (offset_1 < m_policy.m_upper[1] && + static_cast(hipThreadIdx_y) < m_policy.m_tile[1]) { + for (index_type tile_id2 = static_cast(hipBlockIdx_z); + tile_id2 < m_policy.m_tile_end[2]; + tile_id2 += hipGridDim_z) { + const index_type offset_2 = + tile_id2 * m_policy.m_tile[2] + + static_cast(hipThreadIdx_z) + + static_cast(m_policy.m_lower[2]); + if (offset_2 < m_policy.m_upper[2] && + static_cast(hipThreadIdx_z) < + m_policy.m_tile[2]) { + m_func(offset_0, offset_1, offset_2); + } + } + } + } + } + } + } + } // end exec_range + + private: + const PolicyType& m_policy; + const Functor& m_func; +}; + +// Specializations for void tag type +template +struct DeviceIterateTile<3, PolicyType, Functor, Tag> { + using index_type = typename PolicyType::index_type; + + KOKKOS_INLINE_FUNCTION + DeviceIterateTile(const PolicyType& policy_, const Functor& f_) + : m_policy(policy_), m_func(f_) {} + + KOKKOS_INLINE_FUNCTION + void exec_range() const { + if (PolicyType::inner_direction == PolicyType::Left) { + for (index_type tile_id2 = static_cast(hipBlockIdx_z); + tile_id2 < m_policy.m_tile_end[2]; tile_id2 += hipGridDim_z) { + const index_type offset_2 = + tile_id2 * m_policy.m_tile[2] + + static_cast(hipThreadIdx_z) + + static_cast(m_policy.m_lower[2]); + if (offset_2 < m_policy.m_upper[2] && + static_cast(hipThreadIdx_z) < m_policy.m_tile[2]) { + for (index_type tile_id1 = static_cast(hipBlockIdx_y); + tile_id1 < m_policy.m_tile_end[1]; tile_id1 += hipGridDim_y) { + const index_type offset_1 = + tile_id1 * m_policy.m_tile[1] + + static_cast(hipThreadIdx_y) + + static_cast(m_policy.m_lower[1]); + if (offset_1 < m_policy.m_upper[1] && + static_cast(hipThreadIdx_y) < m_policy.m_tile[1]) { + for (index_type tile_id0 = static_cast(hipBlockIdx_x); + tile_id0 < m_policy.m_tile_end[0]; + tile_id0 += hipGridDim_x) { + const index_type offset_0 = + tile_id0 * m_policy.m_tile[0] + + static_cast(hipThreadIdx_x) + + static_cast(m_policy.m_lower[0]); + if (offset_0 < m_policy.m_upper[0] && + static_cast(hipThreadIdx_x) < + m_policy.m_tile[0]) { + m_func(Tag(), offset_0, offset_1, offset_2); + } + } + } + } + } + } + } else { + for (index_type tile_id0 = static_cast(hipBlockIdx_x); + tile_id0 < m_policy.m_tile_end[0]; tile_id0 += hipGridDim_x) { + const index_type offset_0 = + tile_id0 * m_policy.m_tile[0] + + static_cast(hipThreadIdx_x) + + static_cast(m_policy.m_lower[0]); + if (offset_0 < m_policy.m_upper[0] && + static_cast(hipThreadIdx_x) < m_policy.m_tile[0]) { + for (index_type tile_id1 = static_cast(hipBlockIdx_y); + tile_id1 < m_policy.m_tile_end[1]; tile_id1 += hipGridDim_y) { + const index_type offset_1 = + tile_id1 * m_policy.m_tile[1] + + static_cast(hipThreadIdx_y) + + static_cast(m_policy.m_lower[1]); + if (offset_1 < m_policy.m_upper[1] && + static_cast(hipThreadIdx_y) < m_policy.m_tile[1]) { + for (index_type tile_id2 = static_cast(hipBlockIdx_z); + tile_id2 < m_policy.m_tile_end[2]; + tile_id2 += hipGridDim_z) { + const index_type offset_2 = + tile_id2 * m_policy.m_tile[2] + + static_cast(hipThreadIdx_z) + + static_cast(m_policy.m_lower[2]); + if (offset_2 < m_policy.m_upper[2] && + static_cast(hipThreadIdx_z) < + m_policy.m_tile[2]) { + m_func(Tag(), offset_0, offset_1, offset_2); + } + } + } + } + } + } + } + } // end exec_range + + private: + const PolicyType& m_policy; + const Functor& m_func; +}; + +// Rank 4 +// Specializations for void tag type +template +struct DeviceIterateTile<4, PolicyType, Functor, void> { + using index_type = typename PolicyType::index_type; + + __device__ DeviceIterateTile(const PolicyType& policy_, const Functor& f_) + : m_policy(policy_), m_func(f_) {} + + static constexpr index_type max_blocks = 65535; + + KOKKOS_INLINE_FUNCTION + void exec_range() const { + // LL + if (PolicyType::inner_direction == PolicyType::Left) { + const index_type temp0 = m_policy.m_tile_end[0]; + const index_type temp1 = m_policy.m_tile_end[1]; + const index_type numbl0 = (temp0 <= max_blocks ? temp0 : max_blocks); + const index_type numbl1 = + (temp0 * temp1 > max_blocks + ? index_type(max_blocks / numbl0) + : (temp1 <= max_blocks ? temp1 : max_blocks)); + + const index_type tile_id0 = + static_cast(hipBlockIdx_x) % numbl0; + const index_type tile_id1 = + static_cast(hipBlockIdx_x) / numbl0; + const index_type thr_id0 = + static_cast(hipThreadIdx_x) % m_policy.m_tile[0]; + const index_type thr_id1 = + static_cast(hipThreadIdx_x) / m_policy.m_tile[0]; + + for (index_type tile_id3 = static_cast(hipBlockIdx_z); + tile_id3 < m_policy.m_tile_end[3]; tile_id3 += hipGridDim_z) { + const index_type offset_3 = + tile_id3 * m_policy.m_tile[3] + + static_cast(hipThreadIdx_z) + + static_cast(m_policy.m_lower[3]); + if (offset_3 < m_policy.m_upper[3] && + static_cast(hipThreadIdx_z) < m_policy.m_tile[3]) { + for (index_type tile_id2 = static_cast(hipBlockIdx_y); + tile_id2 < m_policy.m_tile_end[2]; tile_id2 += hipGridDim_y) { + const index_type offset_2 = + tile_id2 * m_policy.m_tile[2] + + static_cast(hipThreadIdx_y) + + static_cast(m_policy.m_lower[2]); + if (offset_2 < m_policy.m_upper[2] && + static_cast(hipThreadIdx_y) < m_policy.m_tile[2]) { + for (index_type j = tile_id1; j < m_policy.m_tile_end[1]; + j += numbl1) { + const index_type offset_1 = + j * m_policy.m_tile[1] + thr_id1 + + static_cast(m_policy.m_lower[1]); + if (offset_1 < m_policy.m_upper[1] && + thr_id1 < m_policy.m_tile[1]) { + for (index_type i = tile_id0; i < m_policy.m_tile_end[0]; + i += numbl0) { + const index_type offset_0 = + i * m_policy.m_tile[0] + thr_id0 + + static_cast(m_policy.m_lower[0]); + if (offset_0 < m_policy.m_upper[0] && + thr_id0 < m_policy.m_tile[0]) { + m_func(offset_0, offset_1, offset_2, offset_3); + } + } + } + } + } + } + } + } + } + // LR + else { + const index_type temp0 = m_policy.m_tile_end[0]; + const index_type temp1 = m_policy.m_tile_end[1]; + const index_type numbl1 = (temp1 <= max_blocks ? temp1 : max_blocks); + const index_type numbl0 = + (temp0 * temp1 > max_blocks + ? index_type(max_blocks / numbl1) + : (temp0 <= max_blocks ? temp0 : max_blocks)); + + const index_type tile_id0 = + static_cast(hipBlockIdx_x) / numbl1; + const index_type tile_id1 = + static_cast(hipBlockIdx_x) % numbl1; + const index_type thr_id0 = + static_cast(hipThreadIdx_x) / m_policy.m_tile[1]; + const index_type thr_id1 = + static_cast(hipThreadIdx_x) % m_policy.m_tile[1]; + + for (index_type i = tile_id0; i < m_policy.m_tile_end[0]; i += numbl0) { + const index_type offset_0 = + i * m_policy.m_tile[0] + thr_id0 + + static_cast(m_policy.m_lower[0]); + if (offset_0 < m_policy.m_upper[0] && thr_id0 < m_policy.m_tile[0]) { + for (index_type j = tile_id1; j < m_policy.m_tile_end[1]; + j += numbl1) { + const index_type offset_1 = + j * m_policy.m_tile[1] + thr_id1 + + static_cast(m_policy.m_lower[1]); + if (offset_1 < m_policy.m_upper[1] && + thr_id1 < m_policy.m_tile[1]) { + for (index_type tile_id2 = static_cast(hipBlockIdx_y); + tile_id2 < m_policy.m_tile_end[2]; + tile_id2 += hipGridDim_y) { + const index_type offset_2 = + tile_id2 * m_policy.m_tile[2] + + static_cast(hipThreadIdx_y) + + static_cast(m_policy.m_lower[2]); + if (offset_2 < m_policy.m_upper[2] && + static_cast(hipThreadIdx_y) < + m_policy.m_tile[2]) { + for (index_type tile_id3 = + static_cast(hipBlockIdx_z); + tile_id3 < m_policy.m_tile_end[3]; + tile_id3 += hipGridDim_z) { + const index_type offset_3 = + tile_id3 * m_policy.m_tile[3] + + static_cast(hipThreadIdx_z) + + static_cast(m_policy.m_lower[3]); + if (offset_3 < m_policy.m_upper[3] && + static_cast(hipThreadIdx_z) < + m_policy.m_tile[3]) { + m_func(offset_0, offset_1, offset_2, offset_3); + } + } + } + } + } + } + } + } + } + } // end exec_range + + private: + const PolicyType& m_policy; + const Functor& m_func; +}; + +// Specializations for void tag type +template +struct DeviceIterateTile<4, PolicyType, Functor, Tag> { + using index_type = typename PolicyType::index_type; + + KOKKOS_INLINE_FUNCTION + DeviceIterateTile(const PolicyType& policy_, const Functor& f_) + : m_policy(policy_), m_func(f_) {} + + static constexpr index_type max_blocks = 65535; + + KOKKOS_INLINE_FUNCTION + void exec_range() const { + if (PolicyType::inner_direction == PolicyType::Left) { + const index_type temp0 = m_policy.m_tile_end[0]; + const index_type temp1 = m_policy.m_tile_end[1]; + const index_type numbl0 = (temp0 <= max_blocks ? temp0 : max_blocks); + const index_type numbl1 = + (temp0 * temp1 > max_blocks + ? static_cast(max_blocks / numbl0) + : (temp1 <= max_blocks ? temp1 : max_blocks)); + + const index_type tile_id0 = + static_cast(hipBlockIdx_x) % numbl0; + const index_type tile_id1 = + static_cast(hipBlockIdx_x) / numbl0; + const index_type thr_id0 = + static_cast(hipThreadIdx_x) % m_policy.m_tile[0]; + const index_type thr_id1 = + static_cast(hipThreadIdx_x) / m_policy.m_tile[0]; + + for (index_type tile_id3 = static_cast(hipBlockIdx_z); + tile_id3 < m_policy.m_tile_end[3]; tile_id3 += hipGridDim_z) { + const index_type offset_3 = + tile_id3 * m_policy.m_tile[3] + + static_cast(hipThreadIdx_z) + + static_cast(m_policy.m_lower[3]); + if (offset_3 < m_policy.m_upper[3] && + static_cast(hipThreadIdx_z) < m_policy.m_tile[3]) { + for (index_type tile_id2 = static_cast(hipBlockIdx_y); + tile_id2 < m_policy.m_tile_end[2]; tile_id2 += hipGridDim_y) { + const index_type offset_2 = + tile_id2 * m_policy.m_tile[2] + + static_cast(hipThreadIdx_y) + + static_cast(m_policy.m_lower[2]); + if (offset_2 < m_policy.m_upper[2] && + static_cast(hipThreadIdx_y) < m_policy.m_tile[2]) { + for (index_type j = tile_id1; j < m_policy.m_tile_end[1]; + j += numbl1) { + const index_type offset_1 = + j * m_policy.m_tile[1] + thr_id1 + + static_cast(m_policy.m_lower[1]); + if (offset_1 < m_policy.m_upper[1] && + thr_id1 < m_policy.m_tile[1]) { + for (index_type i = tile_id0; i < m_policy.m_tile_end[0]; + i += numbl0) { + const index_type offset_0 = + i * m_policy.m_tile[0] + thr_id0 + + static_cast(m_policy.m_lower[0]); + if (offset_0 < m_policy.m_upper[0] && + thr_id0 < m_policy.m_tile[0]) { + m_func(Tag(), offset_0, offset_1, offset_2, offset_3); + } + } + } + } + } + } + } + } + } else { + const index_type temp0 = m_policy.m_tile_end[0]; + const index_type temp1 = m_policy.m_tile_end[1]; + const index_type numbl1 = (temp1 <= max_blocks ? temp1 : max_blocks); + const index_type numbl0 = + (temp0 * temp1 > max_blocks + ? index_type(max_blocks / numbl1) + : (temp0 <= max_blocks ? temp0 : max_blocks)); + + const index_type tile_id0 = + static_cast(hipBlockIdx_x) / numbl1; + const index_type tile_id1 = + static_cast(hipBlockIdx_x) % numbl1; + const index_type thr_id0 = + static_cast(hipThreadIdx_x) / m_policy.m_tile[1]; + const index_type thr_id1 = + static_cast(hipThreadIdx_x) % m_policy.m_tile[1]; + + for (index_type i = tile_id0; i < m_policy.m_tile_end[0]; i += numbl0) { + const index_type offset_0 = + i * m_policy.m_tile[0] + thr_id0 + + static_cast(m_policy.m_lower[0]); + if (offset_0 < m_policy.m_upper[0] && thr_id0 < m_policy.m_tile[0]) { + for (index_type j = tile_id1; j < m_policy.m_tile_end[1]; + j += numbl1) { + const index_type offset_1 = + tile_id1 * m_policy.m_tile[1] + thr_id1 + + static_cast(m_policy.m_lower[1]); + if (offset_1 < m_policy.m_upper[1] && + thr_id1 < m_policy.m_tile[1]) { + for (index_type tile_id2 = static_cast(hipBlockIdx_y); + tile_id2 < m_policy.m_tile_end[2]; + tile_id2 += hipGridDim_y) { + const index_type offset_2 = + tile_id2 * m_policy.m_tile[2] + + static_cast(hipThreadIdx_y) + + static_cast(m_policy.m_lower[2]); + if (offset_2 < m_policy.m_upper[2] && + static_cast(hipThreadIdx_y) < + m_policy.m_tile[2]) { + for (index_type tile_id3 = + static_cast(hipBlockIdx_z); + tile_id3 < m_policy.m_tile_end[3]; + tile_id3 += hipGridDim_z) { + const index_type offset_3 = + tile_id3 * m_policy.m_tile[3] + + static_cast(hipThreadIdx_z) + + static_cast(m_policy.m_lower[3]); + if (offset_3 < m_policy.m_upper[3] && + static_cast(hipThreadIdx_z) < + m_policy.m_tile[3]) { + m_func(Tag(), offset_0, offset_1, offset_2, offset_3); + } + } + } + } + } + } + } + } + } + } // end exec_range + + private: + const PolicyType& m_policy; + const Functor& m_func; +}; + +// Rank 5 +// Specializations for void tag type +template +struct DeviceIterateTile<5, PolicyType, Functor, void> { + using index_type = typename PolicyType::index_type; + + __device__ DeviceIterateTile(const PolicyType& policy_, const Functor& f_) + : m_policy(policy_), m_func(f_) {} + + static constexpr index_type max_blocks = 65535; + + KOKKOS_INLINE_FUNCTION + void exec_range() const { + // LL + if (PolicyType::inner_direction == PolicyType::Left) { + index_type temp0 = m_policy.m_tile_end[0]; + index_type temp1 = m_policy.m_tile_end[1]; + const index_type numbl0 = (temp0 <= max_blocks ? temp0 : max_blocks); + const index_type numbl1 = + (temp0 * temp1 > max_blocks + ? index_type(max_blocks / numbl0) + : (temp1 <= max_blocks ? temp1 : max_blocks)); + + const index_type tile_id0 = + static_cast(hipBlockIdx_x) % numbl0; + const index_type tile_id1 = + static_cast(hipBlockIdx_x) / numbl0; + const index_type thr_id0 = + static_cast(hipThreadIdx_x) % m_policy.m_tile[0]; + const index_type thr_id1 = + static_cast(hipThreadIdx_x) / m_policy.m_tile[0]; + + temp0 = m_policy.m_tile_end[2]; + temp1 = m_policy.m_tile_end[3]; + const index_type numbl2 = (temp0 <= max_blocks ? temp0 : max_blocks); + const index_type numbl3 = + (temp0 * temp1 > max_blocks + ? index_type(max_blocks / numbl2) + : (temp1 <= max_blocks ? temp1 : max_blocks)); + + const index_type tile_id2 = + static_cast(hipBlockIdx_y) % numbl2; + const index_type tile_id3 = + static_cast(hipBlockIdx_y) / numbl2; + const index_type thr_id2 = + static_cast(hipThreadIdx_y) % m_policy.m_tile[2]; + const index_type thr_id3 = + static_cast(hipThreadIdx_y) / m_policy.m_tile[2]; + + for (index_type tile_id4 = static_cast(hipBlockIdx_z); + tile_id4 < m_policy.m_tile_end[4]; tile_id4 += hipGridDim_z) { + const index_type offset_4 = + tile_id4 * m_policy.m_tile[4] + + static_cast(hipThreadIdx_z) + + static_cast(m_policy.m_lower[4]); + if (offset_4 < m_policy.m_upper[4] && + static_cast(hipThreadIdx_z) < m_policy.m_tile[4]) { + for (index_type l = tile_id3; l < m_policy.m_tile_end[3]; + l += numbl3) { + const index_type offset_3 = + l * m_policy.m_tile[3] + thr_id3 + + static_cast(m_policy.m_lower[3]); + if (offset_3 < m_policy.m_upper[3] && + thr_id3 < m_policy.m_tile[3]) { + for (index_type k = tile_id2; k < m_policy.m_tile_end[2]; + k += numbl2) { + const index_type offset_2 = + k * m_policy.m_tile[2] + thr_id2 + + static_cast(m_policy.m_lower[2]); + if (offset_2 < m_policy.m_upper[2] && + thr_id2 < m_policy.m_tile[2]) { + for (index_type j = tile_id1; j < m_policy.m_tile_end[1]; + j += numbl1) { + const index_type offset_1 = + j * m_policy.m_tile[1] + thr_id1 + + static_cast(m_policy.m_lower[1]); + if (offset_1 < m_policy.m_upper[1] && + thr_id1 < m_policy.m_tile[1]) { + for (index_type i = tile_id0; i < m_policy.m_tile_end[0]; + i += numbl0) { + const index_type offset_0 = + i * m_policy.m_tile[0] + thr_id0 + + static_cast(m_policy.m_lower[0]); + if (offset_0 < m_policy.m_upper[0] && + thr_id0 < m_policy.m_tile[0]) { + m_func(offset_0, offset_1, offset_2, offset_3, + offset_4); + } + } + } + } + } + } + } + } + } + } + } + // LR + else { + index_type temp0 = m_policy.m_tile_end[0]; + index_type temp1 = m_policy.m_tile_end[1]; + const index_type numbl1 = (temp1 <= max_blocks ? temp1 : max_blocks); + const index_type numbl0 = + (temp0 * temp1 > max_blocks + ? index_type(max_blocks / numbl1) + : (temp0 <= max_blocks ? temp0 : max_blocks)); + + const index_type tile_id0 = + static_cast(hipBlockIdx_x) / numbl1; + const index_type tile_id1 = + static_cast(hipBlockIdx_x) % numbl1; + const index_type thr_id0 = + static_cast(hipThreadIdx_x) / m_policy.m_tile[1]; + const index_type thr_id1 = + static_cast(hipThreadIdx_x) % m_policy.m_tile[1]; + + temp0 = m_policy.m_tile_end[2]; + temp1 = m_policy.m_tile_end[3]; + const index_type numbl3 = (temp1 <= max_blocks ? temp1 : max_blocks); + const index_type numbl2 = + (temp0 * temp1 > max_blocks + ? index_type(max_blocks / numbl3) + : (temp0 <= max_blocks ? temp0 : max_blocks)); + + const index_type tile_id2 = + static_cast(hipBlockIdx_y) / numbl3; + const index_type tile_id3 = + static_cast(hipBlockIdx_y) % numbl3; + const index_type thr_id2 = + static_cast(hipThreadIdx_y) / m_policy.m_tile[3]; + const index_type thr_id3 = + static_cast(hipThreadIdx_y) % m_policy.m_tile[3]; + + for (index_type i = tile_id0; i < m_policy.m_tile_end[0]; i += numbl0) { + const index_type offset_0 = + i * m_policy.m_tile[0] + thr_id0 + + static_cast(m_policy.m_lower[0]); + if (offset_0 < m_policy.m_upper[0] && thr_id0 < m_policy.m_tile[0]) { + for (index_type j = tile_id1; j < m_policy.m_tile_end[1]; + j += numbl1) { + const index_type offset_1 = + j * m_policy.m_tile[1] + thr_id1 + + static_cast(m_policy.m_lower[1]); + if (offset_1 < m_policy.m_upper[1] && + thr_id1 < m_policy.m_tile[1]) { + for (index_type k = tile_id2; k < m_policy.m_tile_end[2]; + k += numbl2) { + const index_type offset_2 = + k * m_policy.m_tile[2] + thr_id2 + + static_cast(m_policy.m_lower[2]); + if (offset_2 < m_policy.m_upper[2] && + thr_id2 < m_policy.m_tile[2]) { + for (index_type l = tile_id3; l < m_policy.m_tile_end[3]; + l += numbl3) { + const index_type offset_3 = + l * m_policy.m_tile[3] + thr_id3 + + static_cast(m_policy.m_lower[3]); + if (offset_3 < m_policy.m_upper[3] && + thr_id3 < m_policy.m_tile[3]) { + for (index_type tile_id4 = + static_cast(hipBlockIdx_z); + tile_id4 < m_policy.m_tile_end[4]; + tile_id4 += hipGridDim_z) { + const index_type offset_4 = + tile_id4 * m_policy.m_tile[4] + + static_cast(hipThreadIdx_z) + + static_cast(m_policy.m_lower[4]); + if (offset_4 < m_policy.m_upper[4] && + static_cast(hipThreadIdx_z) < + m_policy.m_tile[4]) { + m_func(offset_0, offset_1, offset_2, offset_3, + offset_4); + } + } + } + } + } + } + } + } + } + } + } + } // end exec_range + + private: + const PolicyType& m_policy; + const Functor& m_func; +}; + +// Specializations for tag type +template +struct DeviceIterateTile<5, PolicyType, Functor, Tag> { + using index_type = typename PolicyType::index_type; + + KOKKOS_INLINE_FUNCTION + DeviceIterateTile(const PolicyType& policy_, const Functor& f_) + : m_policy(policy_), m_func(f_) {} + + static constexpr index_type max_blocks = 65535; + + KOKKOS_INLINE_FUNCTION + void exec_range() const { + // LL + if (PolicyType::inner_direction == PolicyType::Left) { + index_type temp0 = m_policy.m_tile_end[0]; + index_type temp1 = m_policy.m_tile_end[1]; + const index_type numbl0 = (temp0 <= max_blocks ? temp0 : max_blocks); + const index_type numbl1 = + (temp0 * temp1 > max_blocks + ? index_type(max_blocks / numbl0) + : (temp1 <= max_blocks ? temp1 : max_blocks)); + + const index_type tile_id0 = + static_cast(hipBlockIdx_x) % numbl0; + const index_type tile_id1 = + static_cast(hipBlockIdx_x) / numbl0; + const index_type thr_id0 = + static_cast(hipThreadIdx_x) % m_policy.m_tile[0]; + const index_type thr_id1 = + static_cast(hipThreadIdx_x) / m_policy.m_tile[0]; + + temp0 = m_policy.m_tile_end[2]; + temp1 = m_policy.m_tile_end[3]; + const index_type numbl2 = (temp0 <= max_blocks ? temp0 : max_blocks); + const index_type numbl3 = + (temp0 * temp1 > max_blocks + ? index_type(max_blocks / numbl2) + : (temp1 <= max_blocks ? temp1 : max_blocks)); + + const index_type tile_id2 = + static_cast(hipBlockIdx_y) % numbl2; + const index_type tile_id3 = + static_cast(hipBlockIdx_y) / numbl2; + const index_type thr_id2 = + static_cast(hipThreadIdx_y) % m_policy.m_tile[2]; + const index_type thr_id3 = + static_cast(hipThreadIdx_y) / m_policy.m_tile[2]; + + for (index_type tile_id4 = static_cast(hipBlockIdx_z); + tile_id4 < m_policy.m_tile_end[4]; tile_id4 += hipGridDim_z) { + const index_type offset_4 = + tile_id4 * m_policy.m_tile[4] + + static_cast(hipThreadIdx_z) + + static_cast(m_policy.m_lower[4]); + if (offset_4 < m_policy.m_upper[4] && + static_cast(hipThreadIdx_z) < m_policy.m_tile[4]) { + for (index_type l = tile_id3; l < m_policy.m_tile_end[3]; + l += numbl3) { + const index_type offset_3 = + l * m_policy.m_tile[3] + thr_id3 + + static_cast(m_policy.m_lower[3]); + if (offset_3 < m_policy.m_upper[3] && + thr_id3 < m_policy.m_tile[3]) { + for (index_type k = tile_id2; k < m_policy.m_tile_end[2]; + k += numbl2) { + const index_type offset_2 = + k * m_policy.m_tile[2] + thr_id2 + + static_cast(m_policy.m_lower[2]); + if (offset_2 < m_policy.m_upper[2] && + thr_id2 < m_policy.m_tile[2]) { + for (index_type j = tile_id1; j < m_policy.m_tile_end[1]; + j += numbl1) { + const index_type offset_1 = + j * m_policy.m_tile[1] + thr_id1 + + static_cast(m_policy.m_lower[1]); + if (offset_1 < m_policy.m_upper[1] && + thr_id1 < m_policy.m_tile[1]) { + for (index_type i = tile_id0; i < m_policy.m_tile_end[0]; + i += numbl0) { + const index_type offset_0 = + i * m_policy.m_tile[0] + thr_id0 + + static_cast(m_policy.m_lower[0]); + if (offset_0 < m_policy.m_upper[0] && + thr_id0 < m_policy.m_tile[0]) { + m_func(Tag(), offset_0, offset_1, offset_2, offset_3, + offset_4); + } + } + } + } + } + } + } + } + } + } + } + // LR + else { + index_type temp0 = m_policy.m_tile_end[0]; + index_type temp1 = m_policy.m_tile_end[1]; + const index_type numbl1 = (temp1 <= max_blocks ? temp1 : max_blocks); + const index_type numbl0 = + (temp0 * temp1 > max_blocks + ? static_cast(max_blocks / numbl1) + : (temp0 <= max_blocks ? temp0 : max_blocks)); + + const index_type tile_id0 = + static_cast(hipBlockIdx_x) / numbl1; + const index_type tile_id1 = + static_cast(hipBlockIdx_x) % numbl1; + const index_type thr_id0 = + static_cast(hipThreadIdx_x) / m_policy.m_tile[1]; + const index_type thr_id1 = + static_cast(hipThreadIdx_x) % m_policy.m_tile[1]; + + temp0 = m_policy.m_tile_end[2]; + temp1 = m_policy.m_tile_end[3]; + const index_type numbl3 = (temp1 <= max_blocks ? temp1 : max_blocks); + const index_type numbl2 = + (temp0 * temp1 > max_blocks + ? index_type(max_blocks / numbl3) + : (temp0 <= max_blocks ? temp0 : max_blocks)); + + const index_type tile_id2 = + static_cast(hipBlockIdx_y) / numbl3; + const index_type tile_id3 = + static_cast(hipBlockIdx_y) % numbl3; + const index_type thr_id2 = + static_cast(hipThreadIdx_y) / m_policy.m_tile[3]; + const index_type thr_id3 = + static_cast(hipThreadIdx_y) % m_policy.m_tile[3]; + + for (index_type i = tile_id0; i < m_policy.m_tile_end[0]; i += numbl0) { + const index_type offset_0 = + i * m_policy.m_tile[0] + thr_id0 + + static_cast(m_policy.m_lower[0]); + if (offset_0 < m_policy.m_upper[0] && thr_id0 < m_policy.m_tile[0]) { + for (index_type j = tile_id1; j < m_policy.m_tile_end[1]; + j += numbl1) { + const index_type offset_1 = + j * m_policy.m_tile[1] + thr_id1 + + static_cast(m_policy.m_lower[1]); + if (offset_1 < m_policy.m_upper[1] && + thr_id1 < m_policy.m_tile[1]) { + for (index_type k = tile_id2; k < m_policy.m_tile_end[2]; + k += numbl2) { + const index_type offset_2 = + k * m_policy.m_tile[2] + thr_id2 + + static_cast(m_policy.m_lower[2]); + if (offset_2 < m_policy.m_upper[2] && + thr_id2 < m_policy.m_tile[2]) { + for (index_type l = tile_id3; l < m_policy.m_tile_end[3]; + l += numbl3) { + const index_type offset_3 = + l * m_policy.m_tile[3] + thr_id3 + + static_cast(m_policy.m_lower[3]); + if (offset_3 < m_policy.m_upper[3] && + thr_id3 < m_policy.m_tile[3]) { + for (index_type tile_id4 = + static_cast(hipBlockIdx_z); + tile_id4 < m_policy.m_tile_end[4]; + tile_id4 += hipGridDim_z) { + const index_type offset_4 = + tile_id4 * m_policy.m_tile[4] + + static_cast(hipThreadIdx_z) + + static_cast(m_policy.m_lower[4]); + if (offset_4 < m_policy.m_upper[4] && + static_cast(hipThreadIdx_z) < + m_policy.m_tile[4]) { + m_func(Tag(), offset_0, offset_1, offset_2, offset_3, + offset_4); + } + } + } + } + } + } + } + } + } + } + } + } // end exec_range + + private: + const PolicyType& m_policy; + const Functor& m_func; +}; + +// Rank 6 +// Specializations for void tag type +template +struct DeviceIterateTile<6, PolicyType, Functor, void> { + using index_type = typename PolicyType::index_type; + + __device__ DeviceIterateTile(const PolicyType& rp_, const Functor& f_) + : m_policy(rp_), m_func(f_) {} + + static constexpr index_type max_blocks = 65535; + + KOKKOS_INLINE_FUNCTION + void exec_range() const { + // LL + if (PolicyType::inner_direction == PolicyType::Left) { + index_type temp0 = m_policy.m_tile_end[0]; + index_type temp1 = m_policy.m_tile_end[1]; + const index_type numbl0 = (temp0 <= max_blocks ? temp0 : max_blocks); + const index_type numbl1 = + (temp0 * temp1 > max_blocks + ? static_cast(max_blocks / numbl0) + : (temp1 <= max_blocks ? temp1 : max_blocks)); + + const index_type tile_id0 = + static_cast(hipBlockIdx_x) % numbl0; + const index_type tile_id1 = + static_cast(hipBlockIdx_x) / numbl0; + const index_type thr_id0 = + static_cast(hipThreadIdx_x) % m_policy.m_tile[0]; + const index_type thr_id1 = + static_cast(hipThreadIdx_x) / m_policy.m_tile[0]; + + temp0 = m_policy.m_tile_end[2]; + temp1 = m_policy.m_tile_end[3]; + const index_type numbl2 = (temp0 <= max_blocks ? temp0 : max_blocks); + const index_type numbl3 = + (temp0 * temp1 > max_blocks + ? index_type(max_blocks / numbl2) + : (temp1 <= max_blocks ? temp1 : max_blocks)); + + const index_type tile_id2 = + static_cast(hipBlockIdx_y) % numbl2; + const index_type tile_id3 = + static_cast(hipBlockIdx_y) / numbl2; + const index_type thr_id2 = + static_cast(hipThreadIdx_y) % m_policy.m_tile[2]; + const index_type thr_id3 = + static_cast(hipThreadIdx_y) / m_policy.m_tile[2]; + + temp0 = m_policy.m_tile_end[4]; + temp1 = m_policy.m_tile_end[5]; + const index_type numbl4 = (temp0 <= max_blocks ? temp0 : max_blocks); + const index_type numbl5 = + (temp0 * temp1 > max_blocks + ? static_cast(max_blocks / numbl4) + : (temp1 <= max_blocks ? temp1 : max_blocks)); + + const index_type tile_id4 = + static_cast(hipBlockIdx_z) % numbl4; + const index_type tile_id5 = + static_cast(hipBlockIdx_z) / numbl4; + const index_type thr_id4 = + static_cast(hipThreadIdx_z) % m_policy.m_tile[4]; + const index_type thr_id5 = + static_cast(hipThreadIdx_z) / m_policy.m_tile[4]; + + for (index_type n = tile_id5; n < m_policy.m_tile_end[5]; n += numbl5) { + const index_type offset_5 = + n * m_policy.m_tile[5] + thr_id5 + + static_cast(m_policy.m_lower[5]); + if (offset_5 < m_policy.m_upper[5] && thr_id5 < m_policy.m_tile[5]) { + for (index_type m = tile_id4; m < m_policy.m_tile_end[4]; + m += numbl4) { + const index_type offset_4 = + m * m_policy.m_tile[4] + thr_id4 + + static_cast(m_policy.m_lower[4]); + if (offset_4 < m_policy.m_upper[4] && + thr_id4 < m_policy.m_tile[4]) { + for (index_type l = tile_id3; l < m_policy.m_tile_end[3]; + l += numbl3) { + const index_type offset_3 = + l * m_policy.m_tile[3] + thr_id3 + + static_cast(m_policy.m_lower[3]); + if (offset_3 < m_policy.m_upper[3] && + thr_id3 < m_policy.m_tile[3]) { + for (index_type k = tile_id2; k < m_policy.m_tile_end[2]; + k += numbl2) { + const index_type offset_2 = + k * m_policy.m_tile[2] + thr_id2 + + static_cast(m_policy.m_lower[2]); + if (offset_2 < m_policy.m_upper[2] && + thr_id2 < m_policy.m_tile[2]) { + for (index_type j = tile_id1; j < m_policy.m_tile_end[1]; + j += numbl1) { + const index_type offset_1 = + j * m_policy.m_tile[1] + thr_id1 + + static_cast(m_policy.m_lower[1]); + if (offset_1 < m_policy.m_upper[1] && + thr_id1 < m_policy.m_tile[1]) { + for (index_type i = tile_id0; + i < m_policy.m_tile_end[0]; i += numbl0) { + const index_type offset_0 = + i * m_policy.m_tile[0] + thr_id0 + + static_cast(m_policy.m_lower[0]); + if (offset_0 < m_policy.m_upper[0] && + thr_id0 < m_policy.m_tile[0]) { + m_func(offset_0, offset_1, offset_2, offset_3, + offset_4, offset_5); + } + } + } + } + } + } + } + } + } + } + } + } + } + // LR + else { + index_type temp0 = m_policy.m_tile_end[0]; + index_type temp1 = m_policy.m_tile_end[1]; + const index_type numbl1 = (temp1 <= max_blocks ? temp1 : max_blocks); + const index_type numbl0 = + (temp0 * temp1 > max_blocks + ? static_cast(max_blocks / numbl1) + : (temp0 <= max_blocks ? temp0 : max_blocks)); + + const index_type tile_id0 = + static_cast(hipBlockIdx_x) / numbl1; + const index_type tile_id1 = + static_cast(hipBlockIdx_x) % numbl1; + const index_type thr_id0 = + static_cast(hipThreadIdx_x) / m_policy.m_tile[1]; + const index_type thr_id1 = + static_cast(hipThreadIdx_x) % m_policy.m_tile[1]; + + temp0 = m_policy.m_tile_end[2]; + temp1 = m_policy.m_tile_end[3]; + const index_type numbl3 = (temp1 <= max_blocks ? temp1 : max_blocks); + const index_type numbl2 = + (temp0 * temp1 > max_blocks + ? index_type(max_blocks / numbl3) + : (temp0 <= max_blocks ? temp0 : max_blocks)); + + const index_type tile_id2 = + static_cast(hipBlockIdx_y) / numbl3; + const index_type tile_id3 = + static_cast(hipBlockIdx_y) % numbl3; + const index_type thr_id2 = + static_cast(hipThreadIdx_y) / m_policy.m_tile[3]; + const index_type thr_id3 = + static_cast(hipThreadIdx_y) % m_policy.m_tile[3]; + + temp0 = m_policy.m_tile_end[4]; + temp1 = m_policy.m_tile_end[5]; + const index_type numbl5 = (temp1 <= max_blocks ? temp1 : max_blocks); + const index_type numbl4 = + (temp0 * temp1 > max_blocks + ? index_type(max_blocks / numbl5) + : (temp0 <= max_blocks ? temp0 : max_blocks)); + + const index_type tile_id4 = + static_cast(hipBlockIdx_z) / numbl5; + const index_type tile_id5 = + static_cast(hipBlockIdx_z) % numbl5; + const index_type thr_id4 = + static_cast(hipThreadIdx_z) / m_policy.m_tile[5]; + const index_type thr_id5 = + static_cast(hipThreadIdx_z) % m_policy.m_tile[5]; + + for (index_type i = tile_id0; i < m_policy.m_tile_end[0]; i += numbl0) { + const index_type offset_0 = + i * m_policy.m_tile[0] + thr_id0 + + static_cast(m_policy.m_lower[0]); + if (offset_0 < m_policy.m_upper[0] && thr_id0 < m_policy.m_tile[0]) { + for (index_type j = tile_id1; j < m_policy.m_tile_end[1]; + j += numbl1) { + const index_type offset_1 = + j * m_policy.m_tile[1] + thr_id1 + + static_cast(m_policy.m_lower[1]); + if (offset_1 < m_policy.m_upper[1] && + thr_id1 < m_policy.m_tile[1]) { + for (index_type k = tile_id2; k < m_policy.m_tile_end[2]; + k += numbl2) { + const index_type offset_2 = + k * m_policy.m_tile[2] + thr_id2 + + static_cast(m_policy.m_lower[2]); + if (offset_2 < m_policy.m_upper[2] && + thr_id2 < m_policy.m_tile[2]) { + for (index_type l = tile_id3; l < m_policy.m_tile_end[3]; + l += numbl3) { + const index_type offset_3 = + l * m_policy.m_tile[3] + thr_id3 + + static_cast(m_policy.m_lower[3]); + if (offset_3 < m_policy.m_upper[3] && + thr_id3 < m_policy.m_tile[3]) { + for (index_type m = tile_id4; m < m_policy.m_tile_end[4]; + m += numbl4) { + const index_type offset_4 = + m * m_policy.m_tile[4] + thr_id4 + + static_cast(m_policy.m_lower[4]); + if (offset_4 < m_policy.m_upper[4] && + thr_id4 < m_policy.m_tile[4]) { + for (index_type n = tile_id5; + n < m_policy.m_tile_end[5]; n += numbl5) { + const index_type offset_5 = + n * m_policy.m_tile[5] + thr_id5 + + static_cast(m_policy.m_lower[5]); + if (offset_5 < m_policy.m_upper[5] && + thr_id5 < m_policy.m_tile[5]) { + m_func(offset_0, offset_1, offset_2, offset_3, + offset_4, offset_5); + } + } + } + } + } + } + } + } + } + } + } + } + } + } // end exec_range + + private: + const PolicyType& m_policy; + const Functor& m_func; +}; + +// Specializations for tag type +template +struct DeviceIterateTile<6, PolicyType, Functor, Tag> { + using index_type = typename PolicyType::index_type; + + KOKKOS_INLINE_FUNCTION + DeviceIterateTile(const PolicyType& policy_, const Functor& f_) + : m_policy(policy_), m_func(f_) {} + + static constexpr index_type max_blocks = 65535; + + KOKKOS_INLINE_FUNCTION + void exec_range() const { + // LL + if (PolicyType::inner_direction == PolicyType::Left) { + index_type temp0 = m_policy.m_tile_end[0]; + index_type temp1 = m_policy.m_tile_end[1]; + const index_type numbl0 = (temp0 <= max_blocks ? temp0 : max_blocks); + const index_type numbl1 = + (temp0 * temp1 > max_blocks + ? static_cast(max_blocks / numbl0) + : (temp1 <= max_blocks ? temp1 : max_blocks)); + + const index_type tile_id0 = + static_cast(hipBlockIdx_x) % numbl0; + const index_type tile_id1 = + static_cast(hipBlockIdx_x) / numbl0; + const index_type thr_id0 = + static_cast(hipThreadIdx_x) % m_policy.m_tile[0]; + const index_type thr_id1 = + static_cast(hipThreadIdx_x) / m_policy.m_tile[0]; + + temp0 = m_policy.m_tile_end[2]; + temp1 = m_policy.m_tile_end[3]; + const index_type numbl2 = (temp0 <= max_blocks ? temp0 : max_blocks); + const index_type numbl3 = + (temp0 * temp1 > max_blocks + ? static_cast(max_blocks / numbl2) + : (temp1 <= max_blocks ? temp1 : max_blocks)); + + const index_type tile_id2 = + static_cast(hipBlockIdx_y) % numbl2; + const index_type tile_id3 = + static_cast(hipBlockIdx_y) / numbl2; + const index_type thr_id2 = + static_cast(hipThreadIdx_y) % m_policy.m_tile[2]; + const index_type thr_id3 = + static_cast(hipThreadIdx_y) / m_policy.m_tile[2]; + + temp0 = m_policy.m_tile_end[4]; + temp1 = m_policy.m_tile_end[5]; + const index_type numbl4 = (temp0 <= max_blocks ? temp0 : max_blocks); + const index_type numbl5 = + (temp0 * temp1 > max_blocks + ? static_cast(max_blocks / numbl4) + : (temp1 <= max_blocks ? temp1 : max_blocks)); + + const index_type tile_id4 = + static_cast(hipBlockIdx_z) % numbl4; + const index_type tile_id5 = + static_cast(hipBlockIdx_z) / numbl4; + const index_type thr_id4 = + static_cast(hipThreadIdx_z) % m_policy.m_tile[4]; + const index_type thr_id5 = + static_cast(hipThreadIdx_z) / m_policy.m_tile[4]; + + for (index_type n = tile_id5; n < m_policy.m_tile_end[5]; n += numbl5) { + const index_type offset_5 = + n * m_policy.m_tile[5] + thr_id5 + + static_cast(m_policy.m_lower[5]); + if (offset_5 < m_policy.m_upper[5] && thr_id5 < m_policy.m_tile[5]) { + for (index_type m = tile_id4; m < m_policy.m_tile_end[4]; + m += numbl4) { + const index_type offset_4 = + m * m_policy.m_tile[4] + thr_id4 + + static_cast(m_policy.m_lower[4]); + if (offset_4 < m_policy.m_upper[4] && + thr_id4 < m_policy.m_tile[4]) { + for (index_type l = tile_id3; l < m_policy.m_tile_end[3]; + l += numbl3) { + const index_type offset_3 = + l * m_policy.m_tile[3] + thr_id3 + + static_cast(m_policy.m_lower[3]); + if (offset_3 < m_policy.m_upper[3] && + thr_id3 < m_policy.m_tile[3]) { + for (index_type k = tile_id2; k < m_policy.m_tile_end[2]; + k += numbl2) { + const index_type offset_2 = + k * m_policy.m_tile[2] + thr_id2 + + static_cast(m_policy.m_lower[2]); + if (offset_2 < m_policy.m_upper[2] && + thr_id2 < m_policy.m_tile[2]) { + for (index_type j = tile_id1; j < m_policy.m_tile_end[1]; + j += numbl1) { + const index_type offset_1 = + j * m_policy.m_tile[1] + thr_id1 + + static_cast(m_policy.m_lower[1]); + if (offset_1 < m_policy.m_upper[1] && + thr_id1 < m_policy.m_tile[1]) { + for (index_type i = tile_id0; + i < m_policy.m_tile_end[0]; i += numbl0) { + const index_type offset_0 = + i * m_policy.m_tile[0] + thr_id0 + + static_cast(m_policy.m_lower[0]); + if (offset_0 < m_policy.m_upper[0] && + thr_id0 < m_policy.m_tile[0]) { + m_func(Tag(), offset_0, offset_1, offset_2, + offset_3, offset_4, offset_5); + } + } + } + } + } + } + } + } + } + } + } + } + } + // LR + else { + index_type temp0 = m_policy.m_tile_end[0]; + index_type temp1 = m_policy.m_tile_end[1]; + const index_type numbl1 = (temp1 <= max_blocks ? temp1 : max_blocks); + const index_type numbl0 = + (temp0 * temp1 > max_blocks + ? static_cast(max_blocks / numbl1) + : (temp0 <= max_blocks ? temp0 : max_blocks)); + + const index_type tile_id0 = + static_cast(hipBlockIdx_x) / numbl1; + const index_type tile_id1 = + static_cast(hipBlockIdx_x) % numbl1; + const index_type thr_id0 = + static_cast(hipThreadIdx_x) / m_policy.m_tile[1]; + const index_type thr_id1 = + static_cast(hipThreadIdx_x) % m_policy.m_tile[1]; + + temp0 = m_policy.m_tile_end[2]; + temp1 = m_policy.m_tile_end[3]; + const index_type numbl3 = (temp1 <= max_blocks ? temp1 : max_blocks); + const index_type numbl2 = + (temp0 * temp1 > max_blocks + ? static_cast(max_blocks / numbl3) + : (temp0 <= max_blocks ? temp0 : max_blocks)); + + const index_type tile_id2 = + static_cast(hipBlockIdx_y) / numbl3; + const index_type tile_id3 = + static_cast(hipBlockIdx_y) % numbl3; + const index_type thr_id2 = + static_cast(hipThreadIdx_y) / m_policy.m_tile[3]; + const index_type thr_id3 = + static_cast(hipThreadIdx_y) % m_policy.m_tile[3]; + + temp0 = m_policy.m_tile_end[4]; + temp1 = m_policy.m_tile_end[5]; + const index_type numbl5 = (temp1 <= max_blocks ? temp1 : max_blocks); + const index_type numbl4 = + (temp0 * temp1 > max_blocks + ? static_cast(max_blocks / numbl5) + : (temp0 <= max_blocks ? temp0 : max_blocks)); + + const index_type tile_id4 = + static_cast(hipBlockIdx_z) / numbl5; + const index_type tile_id5 = + static_cast(hipBlockIdx_z) % numbl5; + const index_type thr_id4 = + static_cast(hipThreadIdx_z) / m_policy.m_tile[5]; + const index_type thr_id5 = + static_cast(hipThreadIdx_z) % m_policy.m_tile[5]; + + for (index_type i = tile_id0; i < m_policy.m_tile_end[0]; i += numbl0) { + const index_type offset_0 = + i * m_policy.m_tile[0] + thr_id0 + + static_cast(m_policy.m_lower[0]); + if (offset_0 < m_policy.m_upper[0] && thr_id0 < m_policy.m_tile[0]) { + for (index_type j = tile_id1; j < m_policy.m_tile_end[1]; + j += numbl1) { + const index_type offset_1 = + j * m_policy.m_tile[1] + thr_id1 + + static_cast(m_policy.m_lower[1]); + if (offset_1 < m_policy.m_upper[1] && + thr_id1 < m_policy.m_tile[1]) { + for (index_type k = tile_id2; k < m_policy.m_tile_end[2]; + k += numbl2) { + const index_type offset_2 = + k * m_policy.m_tile[2] + thr_id2 + + static_cast(m_policy.m_lower[2]); + if (offset_2 < m_policy.m_upper[2] && + thr_id2 < m_policy.m_tile[2]) { + for (index_type l = tile_id3; l < m_policy.m_tile_end[3]; + l += numbl3) { + const index_type offset_3 = + l * m_policy.m_tile[3] + thr_id3 + + static_cast(m_policy.m_lower[3]); + if (offset_3 < m_policy.m_upper[3] && + thr_id3 < m_policy.m_tile[3]) { + for (index_type m = tile_id4; m < m_policy.m_tile_end[4]; + m += numbl4) { + const index_type offset_4 = + m * m_policy.m_tile[4] + thr_id4 + + static_cast(m_policy.m_lower[4]); + if (offset_4 < m_policy.m_upper[4] && + thr_id4 < m_policy.m_tile[4]) { + for (index_type n = tile_id5; + n < m_policy.m_tile_end[5]; n += numbl5) { + const index_type offset_5 = + n * m_policy.m_tile[5] + thr_id5 + + static_cast(m_policy.m_lower[5]); + if (offset_5 < m_policy.m_upper[5] && + thr_id5 < m_policy.m_tile[5]) { + m_func(Tag(), offset_0, offset_1, offset_2, + offset_3, offset_4, offset_5); + } + } + } + } + } + } + } + } + } + } + } + } + } + } // end exec_range + + private: + const PolicyType& m_policy; + const Functor& m_func; +}; + +// ---------------------------------------------------------------------------------- + +namespace Reduce { + +template +using is_void = std::is_same; + +template +struct is_array_type : std::false_type { + using value_type = T; +}; + +template +struct is_array_type : std::true_type { + using value_type = T; +}; + +template +struct is_array_type : std::true_type { + using value_type = T; +}; + +// ------------------------------------------------------------------ // +template +struct DeviceIterateTile; + +// ParallelReduce iteration pattern +// Scalar reductions + +// num_blocks = min( num_tiles, max_num_blocks ); //i.e. determined by number of +// tiles and reduction algorithm constraints extract n-dim tile offsets (i.e. +// tile's global starting mulit-index) from the tileid = blockid using tile +// dimensions local indices within a tile extracted from (index_type)threadIdx_x +// using tile dims, constrained by blocksize combine tile and local id info for +// multi-dim global ids + +// Pattern: +// Each block+thread is responsible for a tile+local_id combo (additional when +// striding by num_blocks) +// 1. create offset arrays +// 2. loop over number of tiles, striding by griddim (equal to num tiles, or max +// num blocks) +// 3. temps set for tile_idx and thrd_idx, which will be modified +// 4. if LL vs LR: +// determine tile starting point offsets (multidim) +// determine local index offsets (multidim) +// concatentate tile offset + local offset for global multi-dim index +// if offset withinin range bounds AND local offset within tile bounds, call +// functor + +// ValueType = T +// Rank 2 +// Specializations for void tag type +template +struct DeviceIterateTile< + 2, PolicyType, Functor, void, ValueType, + typename std::enable_if::value>::type> { + using index_type = typename PolicyType::index_type; + + __device__ DeviceIterateTile(const PolicyType& rp_, const Functor& f_, + ValueType& v_) + : m_policy(rp_), m_func(f_), m_v(v_) {} + + KOKKOS_INLINE_FUNCTION + void exec_range() const { + if (static_cast(hipBlockIdx_x) < m_policy.m_num_tiles && + static_cast(hipThreadIdx_y) < m_policy.m_prod_tile_dims) { + index_type m_offset[PolicyType::rank]; // tile starting global id offset + index_type + m_local_offset[PolicyType::rank]; // tile starting global id offset + + for (index_type tileidx = static_cast(hipBlockIdx_x); + tileidx < m_policy.m_num_tiles; tileidx += hipGridDim_x) { + index_type tile_idx = + tileidx; // temp because tile_idx will be modified while + // determining tile starting point offsets + index_type thrd_idx = static_cast(hipThreadIdx_y); + bool in_bounds = true; + + // LL + if (PolicyType::inner_direction == PolicyType::Left) { + for (int i = 0; i < PolicyType::rank; ++i) { + // Deduce this blocks tile_id + m_offset[i] = + (tile_idx % m_policy.m_tile_end[i]) * m_policy.m_tile[i] + + m_policy.m_lower[i]; + tile_idx /= m_policy.m_tile_end[i]; + + m_local_offset[i] = (thrd_idx % m_policy.m_tile[i]); + thrd_idx /= m_policy.m_tile[i]; + + m_offset[i] += m_local_offset[i]; + if (!(m_offset[i] < m_policy.m_upper[i] && + m_local_offset[i] < m_policy.m_tile[i])) { + in_bounds &= false; + } + } + if (in_bounds) { + m_func(m_offset[0], m_offset[1], m_v); + } + } + // LR + else { + for (int i = PolicyType::rank - 1; i >= 0; --i) { + m_offset[i] = + (tile_idx % m_policy.m_tile_end[i]) * m_policy.m_tile[i] + + m_policy.m_lower[i]; + tile_idx /= m_policy.m_tile_end[i]; + + m_local_offset[i] = (thrd_idx % m_policy.m_tile[i]); + thrd_idx /= m_policy.m_tile[i]; + + m_offset[i] += m_local_offset[i]; + if (!(m_offset[i] < m_policy.m_upper[i] && + m_local_offset[i] < m_policy.m_tile[i])) { + in_bounds &= false; + } + } + if (in_bounds) { + m_func(m_offset[0], m_offset[1], m_v); + } + } + } + } + + } // end exec_range + + private: + const PolicyType& m_policy; + const Functor& m_func; + ValueType& m_v; +}; + +// Specializations for tag type +template +struct DeviceIterateTile< + 2, PolicyType, Functor, Tag, ValueType, + typename std::enable_if::value && + !is_void::value>::type> { + using index_type = typename PolicyType::index_type; + + KOKKOS_INLINE_FUNCTION + DeviceIterateTile(const PolicyType& rp_, const Functor& f_, ValueType& v_) + : m_policy(rp_), m_func(f_), m_v(v_) {} + + KOKKOS_INLINE_FUNCTION + void exec_range() const { + if (static_cast(hipBlockIdx_x) < m_policy.m_num_tiles && + static_cast(hipThreadIdx_y) < m_policy.m_prod_tile_dims) { + index_type m_offset[PolicyType::rank]; // tile starting global id offset + index_type + m_local_offset[PolicyType::rank]; // tile starting global id offset + + for (index_type tileidx = static_cast(hipBlockIdx_x); + tileidx < m_policy.m_num_tiles; tileidx += hipGridDim_x) { + index_type tile_idx = + tileidx; // temp because tile_idx will be modified while + // determining tile starting point offsets + index_type thrd_idx = static_cast(hipThreadIdx_y); + bool in_bounds = true; + + // LL + if (PolicyType::inner_direction == PolicyType::Left) { + for (int i = 0; i < PolicyType::rank; ++i) { + m_offset[i] = + (tile_idx % m_policy.m_tile_end[i]) * m_policy.m_tile[i] + + m_policy.m_lower[i]; + tile_idx /= m_policy.m_tile_end[i]; + + // tile-local indices identified with (index_type)threadIdx_y + m_local_offset[i] = (thrd_idx % m_policy.m_tile[i]); + thrd_idx /= m_policy.m_tile[i]; + + m_offset[i] += m_local_offset[i]; + if (!(m_offset[i] < m_policy.m_upper[i] && + m_local_offset[i] < m_policy.m_tile[i])) { + in_bounds &= false; + } + } + if (in_bounds) { + m_func(Tag(), m_offset[0], m_offset[1], m_v); + } + } + // LR + else { + for (int i = PolicyType::rank - 1; i >= 0; --i) { + m_offset[i] = + (tile_idx % m_policy.m_tile_end[i]) * m_policy.m_tile[i] + + m_policy.m_lower[i]; + tile_idx /= m_policy.m_tile_end[i]; + + // tile-local indices identified with (index_type)threadIdx_y + m_local_offset[i] = + (thrd_idx % + m_policy.m_tile[i]); // Move this to first computation, + // add to m_offset right away + thrd_idx /= m_policy.m_tile[i]; + + m_offset[i] += m_local_offset[i]; + if (!(m_offset[i] < m_policy.m_upper[i] && + m_local_offset[i] < m_policy.m_tile[i])) { + in_bounds &= false; + } + } + if (in_bounds) { + m_func(Tag(), m_offset[0], m_offset[1], m_v); + } + } + } + } + } // end exec_range + + private: + const PolicyType& m_policy; + const Functor& m_func; + ValueType& m_v; +}; + +// Rank 3 +// Specializations for void tag type +template +struct DeviceIterateTile< + 3, PolicyType, Functor, void, ValueType, + typename std::enable_if::value>::type> { + using index_type = typename PolicyType::index_type; + + __device__ DeviceIterateTile(const PolicyType& policy_, const Functor& f_, + ValueType& v_) + : m_policy(policy_), m_func(f_), m_v(v_) {} + + KOKKOS_INLINE_FUNCTION + void exec_range() const { + if (static_cast(hipBlockIdx_x) < m_policy.m_num_tiles && + static_cast(hipThreadIdx_y) < m_policy.m_prod_tile_dims) { + index_type m_offset[PolicyType::rank]; // tile starting global id offset + index_type + m_local_offset[PolicyType::rank]; // tile starting global id offset + + for (index_type tileidx = static_cast(hipBlockIdx_x); + tileidx < m_policy.m_num_tiles; tileidx += hipGridDim_x) { + index_type tile_idx = + tileidx; // temp because tile_idx will be modified while + // determining tile starting point offsets + index_type thrd_idx = static_cast(hipThreadIdx_y); + bool in_bounds = true; + + // LL + if (PolicyType::inner_direction == PolicyType::Left) { + for (int i = 0; i < PolicyType::rank; ++i) { + m_offset[i] = + (tile_idx % m_policy.m_tile_end[i]) * m_policy.m_tile[i] + + m_policy.m_lower[i]; + tile_idx /= m_policy.m_tile_end[i]; + + // tile-local indices identified with (index_type)threadIdx_y + m_local_offset[i] = (thrd_idx % m_policy.m_tile[i]); + thrd_idx /= m_policy.m_tile[i]; + + m_offset[i] += m_local_offset[i]; + if (!(m_offset[i] < m_policy.m_upper[i] && + m_local_offset[i] < m_policy.m_tile[i])) { + in_bounds &= false; + } + } + if (in_bounds) { + m_func(m_offset[0], m_offset[1], m_offset[2], m_v); + } + } + // LR + else { + for (int i = PolicyType::rank - 1; i >= 0; --i) { + m_offset[i] = + (tile_idx % m_policy.m_tile_end[i]) * m_policy.m_tile[i] + + m_policy.m_lower[i]; + tile_idx /= m_policy.m_tile_end[i]; + + // tile-local indices identified with (index_type)threadIdx_y + m_local_offset[i] = + (thrd_idx % + m_policy.m_tile[i]); // Move this to first computation, + // add to m_offset right away + thrd_idx /= m_policy.m_tile[i]; + + m_offset[i] += m_local_offset[i]; + if (!(m_offset[i] < m_policy.m_upper[i] && + m_local_offset[i] < m_policy.m_tile[i])) { + in_bounds &= false; + } + } + if (in_bounds) { + m_func(m_offset[0], m_offset[1], m_offset[2], m_v); + } + } + } + } + } // end exec_range + + private: + const PolicyType& m_policy; + const Functor& m_func; + ValueType& m_v; +}; + +// Specializations for void tag type +template +struct DeviceIterateTile< + 3, PolicyType, Functor, Tag, ValueType, + typename std::enable_if::value && + !is_void::value>::type> { + using index_type = typename PolicyType::index_type; + + KOKKOS_INLINE_FUNCTION + DeviceIterateTile(const PolicyType& policy_, const Functor& f_, ValueType& v_) + : m_policy(policy_), m_func(f_), m_v(v_) {} + + KOKKOS_INLINE_FUNCTION + void exec_range() const { + if (static_cast(hipBlockIdx_x) < m_policy.m_num_tiles && + static_cast(hipThreadIdx_y) < m_policy.m_prod_tile_dims) { + index_type m_offset[PolicyType::rank]; // tile starting global id offset + index_type + m_local_offset[PolicyType::rank]; // tile starting global id offset + + for (index_type tileidx = static_cast(hipBlockIdx_x); + tileidx < m_policy.m_num_tiles; tileidx += hipGridDim_x) { + index_type tile_idx = + tileidx; // temp because tile_idx will be modified while + // determining tile starting point offsets + index_type thrd_idx = static_cast(hipThreadIdx_y); + bool in_bounds = true; + + // LL + if (PolicyType::inner_direction == PolicyType::Left) { + for (int i = 0; i < PolicyType::rank; ++i) { + m_offset[i] = + (tile_idx % m_policy.m_tile_end[i]) * m_policy.m_tile[i] + + m_policy.m_lower[i]; + tile_idx /= m_policy.m_tile_end[i]; + + // tile-local indices identified with (index_type)threadIdx_y + m_local_offset[i] = (thrd_idx % m_policy.m_tile[i]); + thrd_idx /= m_policy.m_tile[i]; + + m_offset[i] += m_local_offset[i]; + if (!(m_offset[i] < m_policy.m_upper[i] && + m_local_offset[i] < m_policy.m_tile[i])) { + in_bounds &= false; + } + } + if (in_bounds) { + m_func(Tag(), m_offset[0], m_offset[1], m_offset[2], m_v); + } + } + // LR + else { + for (int i = PolicyType::rank - 1; i >= 0; --i) { + m_offset[i] = + (tile_idx % m_policy.m_tile_end[i]) * m_policy.m_tile[i] + + m_policy.m_lower[i]; + tile_idx /= m_policy.m_tile_end[i]; + + // tile-local indices identified with (index_type)threadIdx_y + m_local_offset[i] = + (thrd_idx % + m_policy.m_tile[i]); // Move this to first computation, + // add to m_offset right away + thrd_idx /= m_policy.m_tile[i]; + + m_offset[i] += m_local_offset[i]; + if (!(m_offset[i] < m_policy.m_upper[i] && + m_local_offset[i] < m_policy.m_tile[i])) { + in_bounds &= false; + } + } + if (in_bounds) { + m_func(Tag(), m_offset[0], m_offset[1], m_offset[2], m_v); + } + } + } + } + } // end exec_range + + private: + const PolicyType& m_policy; + const Functor& m_func; + ValueType& m_v; +}; + +// Rank 4 +// Specializations for void tag type +template +struct DeviceIterateTile< + 4, PolicyType, Functor, void, ValueType, + typename std::enable_if::value>::type> { + using index_type = typename PolicyType::index_type; + + __device__ DeviceIterateTile(const PolicyType& policy_, const Functor& f_, + ValueType& v_) + : m_policy(policy_), m_func(f_), m_v(v_) {} + + static constexpr index_type max_blocks = 65535; + + KOKKOS_INLINE_FUNCTION + void exec_range() const { + if (static_cast(hipBlockIdx_x) < m_policy.m_num_tiles && + static_cast(hipThreadIdx_y) < m_policy.m_prod_tile_dims) { + index_type m_offset[PolicyType::rank]; // tile starting global id offset + index_type + m_local_offset[PolicyType::rank]; // tile starting global id offset + + for (index_type tileidx = static_cast(hipBlockIdx_x); + tileidx < m_policy.m_num_tiles; tileidx += hipGridDim_x) { + index_type tile_idx = + tileidx; // temp because tile_idx will be modified while + // determining tile starting point offsets + index_type thrd_idx = static_cast(hipThreadIdx_y); + bool in_bounds = true; + + // LL + if (PolicyType::inner_direction == PolicyType::Left) { + for (int i = 0; i < PolicyType::rank; ++i) { + m_offset[i] = + (tile_idx % m_policy.m_tile_end[i]) * m_policy.m_tile[i] + + m_policy.m_lower[i]; + tile_idx /= m_policy.m_tile_end[i]; + + // tile-local indices identified with (index_type)hipThreadIdx_y + m_local_offset[i] = (thrd_idx % m_policy.m_tile[i]); + thrd_idx /= m_policy.m_tile[i]; + + m_offset[i] += m_local_offset[i]; + if (!(m_offset[i] < m_policy.m_upper[i] && + m_local_offset[i] < m_policy.m_tile[i])) { + in_bounds &= false; + } + } + if (in_bounds) { + m_func(m_offset[0], m_offset[1], m_offset[2], m_offset[3], m_v); + } + } + // LR + else { + for (int i = PolicyType::rank - 1; i >= 0; --i) { + m_offset[i] = + (tile_idx % m_policy.m_tile_end[i]) * m_policy.m_tile[i] + + m_policy.m_lower[i]; + tile_idx /= m_policy.m_tile_end[i]; + + // tile-local indices identified with (index_type)threadIdx_y + m_local_offset[i] = (thrd_idx % m_policy.m_tile[i]); + thrd_idx /= m_policy.m_tile[i]; + + m_offset[i] += m_local_offset[i]; + if (!(m_offset[i] < m_policy.m_upper[i] && + m_local_offset[i] < m_policy.m_tile[i])) { + in_bounds &= false; + } + } + if (in_bounds) { + m_func(m_offset[0], m_offset[1], m_offset[2], m_offset[3], m_v); + } + } + } + } + } // end exec_range + + private: + const PolicyType& m_policy; + const Functor& m_func; + ValueType& m_v; +}; + +// Specializations for void tag type +template +struct DeviceIterateTile< + 4, PolicyType, Functor, Tag, ValueType, + typename std::enable_if::value && + !is_void::value>::type> { + using index_type = typename PolicyType::index_type; + + KOKKOS_INLINE_FUNCTION + DeviceIterateTile(const PolicyType& policy_, const Functor& f_, ValueType& v_) + : m_policy(policy_), m_func(f_), m_v(v_) {} + + static constexpr index_type max_blocks = 65535; + + KOKKOS_INLINE_FUNCTION + void exec_range() const { + if (static_cast(hipBlockIdx_x) < m_policy.m_num_tiles && + static_cast(hipThreadIdx_y) < m_policy.m_prod_tile_dims) { + index_type m_offset[PolicyType::rank]; // tile starting global id offset + index_type + m_local_offset[PolicyType::rank]; // tile starting global id offset + + for (index_type tileidx = static_cast(hipBlockIdx_x); + tileidx < m_policy.m_num_tiles; tileidx += hipGridDim_x) { + index_type tile_idx = + tileidx; // temp because tile_idx will be modified while + // determining tile starting point offsets + index_type thrd_idx = static_cast(hipThreadIdx_y); + bool in_bounds = true; + + // LL + if (PolicyType::inner_direction == PolicyType::Left) { + for (int i = 0; i < PolicyType::rank; ++i) { + m_offset[i] = + (tile_idx % m_policy.m_tile_end[i]) * m_policy.m_tile[i] + + m_policy.m_lower[i]; + tile_idx /= m_policy.m_tile_end[i]; + + // tile-local indices identified with hipThreadIdx_y + m_local_offset[i] = (thrd_idx % m_policy.m_tile[i]); + thrd_idx /= m_policy.m_tile[i]; + + m_offset[i] += m_local_offset[i]; + if (!(m_offset[i] < m_policy.m_upper[i] && + m_local_offset[i] < m_policy.m_tile[i])) { + in_bounds &= false; + } + } + if (in_bounds) { + m_func(Tag(), m_offset[0], m_offset[1], m_offset[2], m_offset[3], + m_v); + } + } + // LR + else { + for (int i = PolicyType::rank - 1; i >= 0; --i) { + m_offset[i] = + (tile_idx % m_policy.m_tile_end[i]) * m_policy.m_tile[i] + + m_policy.m_lower[i]; + tile_idx /= m_policy.m_tile_end[i]; + + // tile-local indices identified with hipThreadIdx_y + m_local_offset[i] = (thrd_idx % m_policy.m_tile[i]); + thrd_idx /= m_policy.m_tile[i]; + + m_offset[i] += m_local_offset[i]; + if (!(m_offset[i] < m_policy.m_upper[i] && + m_local_offset[i] < m_policy.m_tile[i])) { + in_bounds &= false; + } + } + if (in_bounds) { + m_func(Tag(), m_offset[0], m_offset[1], m_offset[2], m_offset[3], + m_v); + } + } + } + } + } // end exec_range + + private: + const PolicyType& m_policy; + const Functor& m_func; + ValueType& m_v; +}; + +// Rank 5 +// Specializations for void tag type +template +struct DeviceIterateTile< + 5, PolicyType, Functor, void, ValueType, + typename std::enable_if::value>::type> { + using index_type = typename PolicyType::index_type; + + __device__ DeviceIterateTile(const PolicyType& policy_, const Functor& f_, + ValueType& v_) + : m_policy(policy_), m_func(f_), m_v(v_) {} + + static constexpr index_type max_blocks = 65535; + + KOKKOS_INLINE_FUNCTION + void exec_range() const { + if (static_cast(hipBlockIdx_x) < m_policy.m_num_tiles && + static_cast(hipThreadIdx_y) < m_policy.m_prod_tile_dims) { + index_type m_offset[PolicyType::rank]; // tile starting global id offset + index_type + m_local_offset[PolicyType::rank]; // tile starting global id offset + + for (index_type tileidx = static_cast(hipBlockIdx_x); + tileidx < m_policy.m_num_tiles; tileidx += hipGridDim_x) { + index_type tile_idx = + tileidx; // temp because tile_idx will be modified while + // determining tile starting point offsets + index_type thrd_idx = static_cast(hipThreadIdx_y); + bool in_bounds = true; + + // LL + if (PolicyType::inner_direction == PolicyType::Left) { + for (int i = 0; i < PolicyType::rank; ++i) { + m_offset[i] = + (tile_idx % m_policy.m_tile_end[i]) * m_policy.m_tile[i] + + m_policy.m_lower[i]; + tile_idx /= m_policy.m_tile_end[i]; + + // tile-local indices identified with (index_type)threadIdx_y + m_local_offset[i] = (thrd_idx % m_policy.m_tile[i]); + thrd_idx /= m_policy.m_tile[i]; + + m_offset[i] += m_local_offset[i]; + if (!(m_offset[i] < m_policy.m_upper[i] && + m_local_offset[i] < m_policy.m_tile[i])) { + in_bounds &= false; + } + } + if (in_bounds) { + m_func(m_offset[0], m_offset[1], m_offset[2], m_offset[3], + m_offset[4], m_v); + } + } + // LR + else { + for (int i = PolicyType::rank - 1; i >= 0; --i) { + m_offset[i] = + (tile_idx % m_policy.m_tile_end[i]) * m_policy.m_tile[i] + + m_policy.m_lower[i]; + tile_idx /= m_policy.m_tile_end[i]; + + // tile-local indices identified with hipThreadIdx_y + m_local_offset[i] = (thrd_idx % m_policy.m_tile[i]); + thrd_idx /= m_policy.m_tile[i]; + + m_offset[i] += m_local_offset[i]; + if (!(m_offset[i] < m_policy.m_upper[i] && + m_local_offset[i] < m_policy.m_tile[i])) { + in_bounds &= false; + } + } + if (in_bounds) { + m_func(m_offset[0], m_offset[1], m_offset[2], m_offset[3], + m_offset[4], m_v); + } + } + } + } + } // end exec_range + + private: + const PolicyType& m_policy; + const Functor& m_func; + ValueType& m_v; +}; + +// Specializations for tag type +template +struct DeviceIterateTile< + 5, PolicyType, Functor, Tag, ValueType, + typename std::enable_if::value && + !is_void::value>::type> { + using index_type = typename PolicyType::index_type; + + __device__ DeviceIterateTile(const PolicyType& policy_, const Functor& f_, + ValueType& v_) + : m_policy(policy_), m_func(f_), m_v(v_) {} + + static constexpr index_type max_blocks = 65535; + + KOKKOS_INLINE_FUNCTION + void exec_range() const { + if (static_cast(hipBlockIdx_x) < m_policy.m_num_tiles && + static_cast(hipThreadIdx_y) < m_policy.m_prod_tile_dims) { + index_type m_offset[PolicyType::rank]; // tile starting global id offset + index_type + m_local_offset[PolicyType::rank]; // tile starting global id offset + + for (index_type tileidx = static_cast(hipBlockIdx_x); + tileidx < m_policy.m_num_tiles; tileidx += hipGridDim_x) { + index_type tile_idx = + tileidx; // temp because tile_idx will be modified while + // determining tile starting point offsets + index_type thrd_idx = static_cast(hipThreadIdx_y); + bool in_bounds = true; + + // LL + if (PolicyType::inner_direction == PolicyType::Left) { + for (int i = 0; i < PolicyType::rank; ++i) { + m_offset[i] = + (tile_idx % m_policy.m_tile_end[i]) * m_policy.m_tile[i] + + m_policy.m_lower[i]; + tile_idx /= m_policy.m_tile_end[i]; + + // tile-local indices identified with hipThreadIdx_y + m_local_offset[i] = (thrd_idx % m_policy.m_tile[i]); + thrd_idx /= m_policy.m_tile[i]; + + m_offset[i] += m_local_offset[i]; + if (!(m_offset[i] < m_policy.m_upper[i] && + m_local_offset[i] < m_policy.m_tile[i])) { + in_bounds &= false; + } + } + if (in_bounds) { + m_func(Tag(), m_offset[0], m_offset[1], m_offset[2], m_offset[3], + m_offset[4], m_v); + } + } + // LR + else { + for (int i = PolicyType::rank - 1; i >= 0; --i) { + m_offset[i] = + (tile_idx % m_policy.m_tile_end[i]) * m_policy.m_tile[i] + + m_policy.m_lower[i]; + tile_idx /= m_policy.m_tile_end[i]; + + // tile-local indices identified with hipThreadIdx_y + m_local_offset[i] = (thrd_idx % m_policy.m_tile[i]); + thrd_idx /= m_policy.m_tile[i]; + + m_offset[i] += m_local_offset[i]; + if (!(m_offset[i] < m_policy.m_upper[i] && + m_local_offset[i] < m_policy.m_tile[i])) { + in_bounds &= false; + } + } + if (in_bounds) { + m_func(Tag(), m_offset[0], m_offset[1], m_offset[2], m_offset[3], + m_offset[4], m_v); + } + } + } + } + } // end exec_range + + private: + const PolicyType& m_policy; + const Functor& m_func; + ValueType& m_v; +}; + +// Rank 6 +// Specializations for void tag type +template +struct DeviceIterateTile< + 6, PolicyType, Functor, void, ValueType, + typename std::enable_if::value>::type> { + using index_type = typename PolicyType::index_type; + + __device__ DeviceIterateTile(const PolicyType& policy_, const Functor& f_, + ValueType& v_) + : m_policy(policy_), m_func(f_), m_v(v_) {} + + static constexpr index_type max_blocks = 65535; + + KOKKOS_INLINE_FUNCTION + void exec_range() const { + if (static_cast(hipBlockIdx_x) < m_policy.m_num_tiles && + static_cast(hipThreadIdx_y) < m_policy.m_prod_tile_dims) { + index_type m_offset[PolicyType::rank]; // tile starting global id offset + index_type + m_local_offset[PolicyType::rank]; // tile starting global id offset + + for (index_type tileidx = static_cast(hipBlockIdx_x); + tileidx < m_policy.m_num_tiles; tileidx += hipGridDim_x) { + index_type tile_idx = + tileidx; // temp because tile_idx will be modified while + // determining tile starting point offsets + index_type thrd_idx = static_cast(hipThreadIdx_y); + bool in_bounds = true; + + // LL + if (PolicyType::inner_direction == PolicyType::Left) { + for (int i = 0; i < PolicyType::rank; ++i) { + m_offset[i] = + (tile_idx % m_policy.m_tile_end[i]) * m_policy.m_tile[i] + + m_policy.m_lower[i]; + tile_idx /= m_policy.m_tile_end[i]; + + // tile-local indices identified with hipThreadIdx_y + m_local_offset[i] = (thrd_idx % m_policy.m_tile[i]); + thrd_idx /= m_policy.m_tile[i]; + + m_offset[i] += m_local_offset[i]; + if (!(m_offset[i] < m_policy.m_upper[i] && + m_local_offset[i] < m_policy.m_tile[i])) { + in_bounds &= false; + } + } + if (in_bounds) { + m_func(m_offset[0], m_offset[1], m_offset[2], m_offset[3], + m_offset[4], m_offset[5], m_v); + } + } + // LR + else { + for (int i = PolicyType::rank - 1; i >= 0; --i) { + m_offset[i] = + (tile_idx % m_policy.m_tile_end[i]) * m_policy.m_tile[i] + + m_policy.m_lower[i]; + tile_idx /= m_policy.m_tile_end[i]; + + // tile-local indices identified with hipThreadIdx_y + m_local_offset[i] = (thrd_idx % m_policy.m_tile[i]); + thrd_idx /= m_policy.m_tile[i]; + + m_offset[i] += m_local_offset[i]; + if (!(m_offset[i] < m_policy.m_upper[i] && + m_local_offset[i] < m_policy.m_tile[i])) { + in_bounds &= false; + } + } + if (in_bounds) { + m_func(m_offset[0], m_offset[1], m_offset[2], m_offset[3], + m_offset[4], m_offset[5], m_v); + } + } + } + } + } // end exec_range + + private: + const PolicyType& m_policy; + const Functor& m_func; + ValueType& m_v; +}; + +// Specializations for tag type +template +struct DeviceIterateTile< + 6, PolicyType, Functor, Tag, ValueType, + typename std::enable_if::value && + !is_void::value>::type> { + using index_type = typename PolicyType::index_type; + + __device__ DeviceIterateTile(const PolicyType& rp_, const Functor& f_, + ValueType& v_) + : m_policy(rp_), m_func(f_), m_v(v_) {} + + static constexpr index_type max_blocks = 65535; + + KOKKOS_INLINE_FUNCTION + void exec_range() const { + if (static_cast(hipBlockIdx_x) < m_policy.m_num_tiles && + static_cast(hipThreadIdx_y) < m_policy.m_prod_tile_dims) { + index_type m_offset[PolicyType::rank]; // tile starting global id offset + index_type + m_local_offset[PolicyType::rank]; // tile starting global id offset + + for (index_type tileidx = static_cast(hipBlockIdx_x); + tileidx < m_policy.m_num_tiles; tileidx += hipGridDim_x) { + index_type tile_idx = + tileidx; // temp because tile_idx will be modified while + // determining tile starting point offsets + index_type thrd_idx = static_cast(hipThreadIdx_y); + bool in_bounds = true; + + // LL + if (PolicyType::inner_direction == PolicyType::Left) { + for (int i = 0; i < PolicyType::rank; ++i) { + m_offset[i] = + (tile_idx % m_policy.m_tile_end[i]) * m_policy.m_tile[i] + + m_policy.m_lower[i]; + tile_idx /= m_policy.m_tile_end[i]; + + // tile-local indices identified with (index_type)threadIdx_y + m_local_offset[i] = (thrd_idx % m_policy.m_tile[i]); + thrd_idx /= m_policy.m_tile[i]; + + m_offset[i] += m_local_offset[i]; + if (!(m_offset[i] < m_policy.m_upper[i] && + m_local_offset[i] < m_policy.m_tile[i])) { + in_bounds &= false; + } + } + if (in_bounds) { + m_func(Tag(), m_offset[0], m_offset[1], m_offset[2], m_offset[3], + m_offset[4], m_offset[5], m_v); + } + } + // LR + else { + for (int i = PolicyType::rank - 1; i >= 0; --i) { + m_offset[i] = + (tile_idx % m_policy.m_tile_end[i]) * m_policy.m_tile[i] + + m_policy.m_lower[i]; + tile_idx /= m_policy.m_tile_end[i]; + + // tile-local indices identified with (index_type)threadIdx_y + m_local_offset[i] = (thrd_idx % m_policy.m_tile[i]); + thrd_idx /= m_policy.m_tile[i]; + + m_offset[i] += m_local_offset[i]; + if (!(m_offset[i] < m_policy.m_upper[i] && + m_local_offset[i] < m_policy.m_tile[i])) { + in_bounds &= false; + } + } + if (in_bounds) { + m_func(Tag(), m_offset[0], m_offset[1], m_offset[2], m_offset[3], + m_offset[4], m_offset[5], m_v); + } + } + } + } + } // end exec_range + + private: + const PolicyType& m_policy; + const Functor& m_func; + ValueType& m_v; +}; + +// ValueType = T[], T* +// Rank 2 +// Specializations for void tag type +template +struct DeviceIterateTile< + 2, PolicyType, Functor, void, ValueType, + typename std::enable_if::value>::type> { + using index_type = typename PolicyType::index_type; + using value_type = typename is_array_type::value_type; + + __device__ DeviceIterateTile(const PolicyType& policy_, const Functor& f_, + value_type* v_) + : m_policy(policy_), m_func(f_), m_v(v_) {} + + KOKKOS_INLINE_FUNCTION + void exec_range() const { + if (static_cast(hipBlockIdx_x) < m_policy.m_num_tiles && + static_cast(hipThreadIdx_y) < m_policy.m_prod_tile_dims) { + index_type m_offset[PolicyType::rank]; // tile starting global id offset + index_type + m_local_offset[PolicyType::rank]; // tile starting global id offset + + for (index_type tileidx = static_cast(hipBlockIdx_x); + tileidx < m_policy.m_num_tiles; tileidx += hipGridDim_x) { + index_type tile_idx = + tileidx; // temp because tile_idx will be modified while + // determining tile starting point offsets + index_type thrd_idx = static_cast(hipThreadIdx_y); + bool in_bounds = true; + + // LL + if (PolicyType::inner_direction == PolicyType::Left) { + for (int i = 0; i < PolicyType::rank; ++i) { + m_offset[i] = + (tile_idx % m_policy.m_tile_end[i]) * m_policy.m_tile[i] + + m_policy.m_lower[i]; + tile_idx /= m_policy.m_tile_end[i]; + + m_local_offset[i] = (thrd_idx % m_policy.m_tile[i]); + thrd_idx /= m_policy.m_tile[i]; + + m_offset[i] += m_local_offset[i]; + if (!(m_offset[i] < m_policy.m_upper[i] && + m_local_offset[i] < m_policy.m_tile[i])) { + in_bounds &= false; + } + } + if (in_bounds) { + m_func(m_offset[0], m_offset[1], m_v); + } + } + // LR + else { + for (int i = PolicyType::rank - 1; i >= 0; --i) { + m_offset[i] = + (tile_idx % m_policy.m_tile_end[i]) * m_policy.m_tile[i] + + m_policy.m_lower[i]; + tile_idx /= m_policy.m_tile_end[i]; + + // tile-local indices identified with (index_type)threadIdx_y + m_local_offset[i] = + (thrd_idx % + m_policy.m_tile[i]); // Move this to first computation, + // add to m_offset right away + thrd_idx /= m_policy.m_tile[i]; + + m_offset[i] += m_local_offset[i]; + if (!(m_offset[i] < m_policy.m_upper[i] && + m_local_offset[i] < m_policy.m_tile[i])) { + in_bounds &= false; + } + } + if (in_bounds) { + m_func(m_offset[0], m_offset[1], m_v); + } + } + } + } + } // end exec_range + + private: + const PolicyType& m_policy; + const Functor& m_func; + value_type* m_v; +}; + +// Specializations for tag type +template +struct DeviceIterateTile< + 2, PolicyType, Functor, Tag, ValueType, + typename std::enable_if::value && + !is_void::value>::type> { + using index_type = typename PolicyType::index_type; + using value_type = typename is_array_type::value_type; + + KOKKOS_INLINE_FUNCTION + DeviceIterateTile(const PolicyType& rp_, const Functor& f_, value_type* v_) + : m_policy(rp_), m_func(f_), m_v(v_) {} + + KOKKOS_INLINE_FUNCTION + void exec_range() const { + if (static_cast(hipBlockIdx_x) < m_policy.m_num_tiles && + static_cast(hipThreadIdx_y) < m_policy.m_prod_tile_dims) { + index_type m_offset[PolicyType::rank]; // tile starting global id offset + index_type + m_local_offset[PolicyType::rank]; // tile starting global id offset + + for (index_type tileidx = static_cast(hipBlockIdx_x); + tileidx < m_policy.m_num_tiles; tileidx += hipGridDim_x) { + index_type tile_idx = + tileidx; // temp because tile_idx will be modified while + // determining tile starting point offsets + index_type thrd_idx = static_cast(hipThreadIdx_y); + bool in_bounds = true; + + // LL + if (PolicyType::inner_direction == PolicyType::Left) { + for (int i = 0; i < PolicyType::rank; ++i) { + m_offset[i] = + (tile_idx % m_policy.m_tile_end[i]) * m_policy.m_tile[i] + + m_policy.m_lower[i]; + tile_idx /= m_policy.m_tile_end[i]; + + // tile-local indices identified with (index_type)threadIdx_y + m_local_offset[i] = (thrd_idx % m_policy.m_tile[i]); + thrd_idx /= m_policy.m_tile[i]; + + m_offset[i] += m_local_offset[i]; + if (!(m_offset[i] < m_policy.m_upper[i] && + m_local_offset[i] < m_policy.m_tile[i])) { + in_bounds &= false; + } + } + if (in_bounds) { + m_func(Tag(), m_offset[0], m_offset[1], m_v); + } + } + // LR + else { + for (int i = PolicyType::rank - 1; i >= 0; --i) { + m_offset[i] = + (tile_idx % m_policy.m_tile_end[i]) * m_policy.m_tile[i] + + m_policy.m_lower[i]; + tile_idx /= m_policy.m_tile_end[i]; + + // tile-local indices identified with (index_type)threadIdx_y + m_local_offset[i] = (thrd_idx % m_policy.m_tile[i]); + thrd_idx /= m_policy.m_tile[i]; + + m_offset[i] += m_local_offset[i]; + if (!(m_offset[i] < m_policy.m_upper[i] && + m_local_offset[i] < m_policy.m_tile[i])) { + in_bounds &= false; + } + } + if (in_bounds) { + m_func(Tag(), m_offset[0], m_offset[1], m_v); + } + } + } // end for loop over num_tiles - product of tiles in each direction + } + } // end exec_range + + private: + const PolicyType& m_policy; + const Functor& m_func; + value_type* m_v; +}; + +// Rank 3 +// Specializations for void tag type +template +struct DeviceIterateTile< + 3, PolicyType, Functor, void, ValueType, + typename std::enable_if::value>::type> { + using index_type = typename PolicyType::index_type; + using value_type = typename is_array_type::value_type; + + KOKKOS_INLINE_FUNCTION + DeviceIterateTile(const PolicyType& policy_, const Functor& f_, + value_type* v_) + : m_policy(policy_), m_func(f_), m_v(v_) {} + + KOKKOS_INLINE_FUNCTION + void exec_range() const { + if (static_cast(hipBlockIdx_x) < m_policy.m_num_tiles && + static_cast(hipThreadIdx_y) < m_policy.m_prod_tile_dims) { + index_type m_offset[PolicyType::rank]; // tile starting global id offset + index_type + m_local_offset[PolicyType::rank]; // tile starting global id offset + + for (index_type tileidx = static_cast(hipBlockIdx_x); + tileidx < m_policy.m_num_tiles; tileidx += hipGridDim_x) { + index_type tile_idx = + tileidx; // temp because tile_idx will be modified while + // determining tile starting point offsets + index_type thrd_idx = static_cast(hipThreadIdx_y); + bool in_bounds = true; + + // LL + if (PolicyType::inner_direction == PolicyType::Left) { + for (int i = 0; i < PolicyType::rank; ++i) { + m_offset[i] = + (tile_idx % m_policy.m_tile_end[i]) * m_policy.m_tile[i] + + m_policy.m_lower[i]; + tile_idx /= m_policy.m_tile_end[i]; + + // tile-local indices identified with (index_type)threadIdx_y + m_local_offset[i] = + (thrd_idx % + m_policy.m_tile[i]); // Move this to first computation, + // add to m_offset right away + thrd_idx /= m_policy.m_tile[i]; + + m_offset[i] += m_local_offset[i]; + if (!(m_offset[i] < m_policy.m_upper[i] && + m_local_offset[i] < m_policy.m_tile[i])) { + in_bounds &= false; + } + } + if (in_bounds) { + m_func(m_offset[0], m_offset[1], m_offset[2], m_v); + } + } + // LR + else { + for (int i = PolicyType::rank - 1; i >= 0; --i) { + m_offset[i] = + (tile_idx % m_policy.m_tile_end[i]) * m_policy.m_tile[i] + + m_policy.m_lower[i]; + tile_idx /= m_policy.m_tile_end[i]; + + // tile-local indices identified with (index_type)threadIdx_y + m_local_offset[i] = + (thrd_idx % + m_policy.m_tile[i]); // Move this to first computation, + // add to m_offset right away + thrd_idx /= m_policy.m_tile[i]; + + m_offset[i] += m_local_offset[i]; + if (!(m_offset[i] < m_policy.m_upper[i] && + m_local_offset[i] < m_policy.m_tile[i])) { + in_bounds &= false; + } + } + if (in_bounds) { + m_func(m_offset[0], m_offset[1], m_offset[2], m_v); + } + } + } + } + } // end exec_range + + private: + const PolicyType& m_policy; + const Functor& m_func; + value_type* m_v; +}; + +// Specializations for void tag type +template +struct DeviceIterateTile< + 3, PolicyType, Functor, Tag, ValueType, + typename std::enable_if::value && + !is_void::value>::type> { + using index_type = typename PolicyType::index_type; + using value_type = typename is_array_type::value_type; + + KOKKOS_INLINE_FUNCTION + DeviceIterateTile(const PolicyType& policy_, const Functor& f_, + value_type* v_) + : m_policy(policy_), m_func(f_), m_v(v_) {} + + KOKKOS_INLINE_FUNCTION + void exec_range() const { + if (static_cast(hipBlockIdx_x) < m_policy.m_num_tiles && + static_cast(hipThreadIdx_y) < m_policy.m_prod_tile_dims) { + index_type m_offset[PolicyType::rank]; // tile starting global id offset + index_type + m_local_offset[PolicyType::rank]; // tile starting global id offset + + for (index_type tileidx = static_cast(hipBlockIdx_x); + tileidx < m_policy.m_num_tiles; tileidx += hipGridDim_x) { + index_type tile_idx = + tileidx; // temp because tile_idx will be modified while + // determining tile starting point offsets + index_type thrd_idx = static_cast(hipThreadIdx_y); + bool in_bounds = true; + + // LL + if (PolicyType::inner_direction == PolicyType::Left) { + for (int i = 0; i < PolicyType::rank; ++i) { + m_offset[i] = + (tile_idx % m_policy.m_tile_end[i]) * m_policy.m_tile[i] + + m_policy.m_lower[i]; + tile_idx /= m_policy.m_tile_end[i]; + + // tile-local indices identified with hipThreadIdx_y + m_local_offset[i] = (thrd_idx % m_policy.m_tile[i]); + thrd_idx /= m_policy.m_tile[i]; + + m_offset[i] += m_local_offset[i]; + if (!(m_offset[i] < m_policy.m_upper[i] && + m_local_offset[i] < m_policy.m_tile[i])) { + in_bounds &= false; + } + } + if (in_bounds) { + m_func(Tag(), m_offset[0], m_offset[1], m_offset[2], m_v); + } + } + // LR + else { + for (int i = PolicyType::rank - 1; i >= 0; --i) { + m_offset[i] = + (tile_idx % m_policy.m_tile_end[i]) * m_policy.m_tile[i] + + m_policy.m_lower[i]; + tile_idx /= m_policy.m_tile_end[i]; + + // tile-local indices identified with (index_type)threadIdx_y + m_local_offset[i] = (thrd_idx % m_policy.m_tile[i]); + thrd_idx /= m_policy.m_tile[i]; + + m_offset[i] += m_local_offset[i]; + if (!(m_offset[i] < m_policy.m_upper[i] && + m_local_offset[i] < m_policy.m_tile[i])) { + in_bounds &= false; + } + } + if (in_bounds) { + m_func(Tag(), m_offset[0], m_offset[1], m_offset[2], m_v); + } + } + } + } + } // end exec_range + + private: + const PolicyType& m_policy; + const Functor& m_func; + value_type* m_v; +}; + +// Rank 4 +// Specializations for void tag type +template +struct DeviceIterateTile< + 4, PolicyType, Functor, void, ValueType, + typename std::enable_if::value>::type> { + using index_type = typename PolicyType::index_type; + using value_type = typename is_array_type::value_type; + + KOKKOS_INLINE_FUNCTION + DeviceIterateTile(const PolicyType& policy_, const Functor& f_, + value_type* v_) + : m_policy(policy_), m_func(f_), m_v(v_) {} + + static constexpr index_type max_blocks = 65535; + + KOKKOS_INLINE_FUNCTION + void exec_range() const { + if (static_cast(hipBlockIdx_x) < m_policy.m_num_tiles && + static_cast(hipThreadIdx_y) < m_policy.m_prod_tile_dims) { + index_type m_offset[PolicyType::rank]; // tile starting global id offset + index_type + m_local_offset[PolicyType::rank]; // tile starting global id offset + + for (index_type tileidx = static_cast(hipBlockIdx_x); + tileidx < m_policy.m_num_tiles; tileidx += hipGridDim_x) { + index_type tile_idx = + tileidx; // temp because tile_idx will be modified while + // determining tile starting point offsets + index_type thrd_idx = static_cast(hipThreadIdx_y); + bool in_bounds = true; + + // LL + if (PolicyType::inner_direction == PolicyType::Left) { + for (int i = 0; i < PolicyType::rank; ++i) { + m_offset[i] = + (tile_idx % m_policy.m_tile_end[i]) * m_policy.m_tile[i] + + m_policy.m_lower[i]; + tile_idx /= m_policy.m_tile_end[i]; + + // tile-local indices identified with hipThreadIdx_y + m_local_offset[i] = (thrd_idx % m_policy.m_tile[i]); + thrd_idx /= m_policy.m_tile[i]; + + m_offset[i] += m_local_offset[i]; + if (!(m_offset[i] < m_policy.m_upper[i] && + m_local_offset[i] < m_policy.m_tile[i])) { + in_bounds &= false; + } + } + if (in_bounds) { + m_func(m_offset[0], m_offset[1], m_offset[2], m_offset[3], m_v); + } + } + // LR + else { + for (int i = PolicyType::rank - 1; i >= 0; --i) { + m_offset[i] = + (tile_idx % m_policy.m_tile_end[i]) * m_policy.m_tile[i] + + m_policy.m_lower[i]; + tile_idx /= m_policy.m_tile_end[i]; + + // tile-local indices identified with hipThreadIdx_y + m_local_offset[i] = (thrd_idx % m_policy.m_tile[i]); + thrd_idx /= m_policy.m_tile[i]; + + m_offset[i] += m_local_offset[i]; + if (!(m_offset[i] < m_policy.m_upper[i] && + m_local_offset[i] < m_policy.m_tile[i])) { + in_bounds &= false; + } + } + if (in_bounds) { + m_func(m_offset[0], m_offset[1], m_offset[2], m_offset[3], m_v); + } + } + } + } + } // end exec_range + + private: + const PolicyType& m_policy; + const Functor& m_func; + value_type* m_v; +}; + +// Specializations for void tag type +template +struct DeviceIterateTile< + 4, PolicyType, Functor, Tag, ValueType, + typename std::enable_if::value && + !is_void::value>::type> { + using index_type = typename PolicyType::index_type; + using value_type = typename is_array_type::value_type; + + KOKKOS_INLINE_FUNCTION + DeviceIterateTile(const PolicyType& policy_, const Functor& f_, + value_type* v_) + : m_policy(policy_), m_func(f_), m_v(v_) {} + + static constexpr index_type max_blocks = 65535; + + KOKKOS_INLINE_FUNCTION + void exec_range() const { + if (static_cast(hipBlockIdx_x) < m_policy.m_num_tiles && + static_cast(hipThreadIdx_y) < m_policy.m_prod_tile_dims) { + index_type m_offset[PolicyType::rank]; // tile starting global id offset + index_type + m_local_offset[PolicyType::rank]; // tile starting global id offset + + for (index_type tileidx = static_cast(hipBlockIdx_x); + tileidx < m_policy.m_num_tiles; tileidx += hipGridDim_x) { + index_type tile_idx = + tileidx; // temp because tile_idx will be modified while + // determining tile starting point offsets + index_type thrd_idx = static_cast(hipThreadIdx_y); + bool in_bounds = true; + + // LL + if (PolicyType::inner_direction == PolicyType::Left) { + for (int i = 0; i < PolicyType::rank; ++i) { + m_offset[i] = + (tile_idx % m_policy.m_tile_end[i]) * m_policy.m_tile[i] + + m_policy.m_lower[i]; + tile_idx /= m_policy.m_tile_end[i]; + + // tile-local indices identified with (index_type)threadIdx_y + m_local_offset[i] = (thrd_idx % m_policy.m_tile[i]); + thrd_idx /= m_policy.m_tile[i]; + + m_offset[i] += m_local_offset[i]; + if (!(m_offset[i] < m_policy.m_upper[i] && + m_local_offset[i] < m_policy.m_tile[i])) { + in_bounds &= false; + } + } + if (in_bounds) { + m_func(Tag(), m_offset[0], m_offset[1], m_offset[2], m_offset[3], + m_v); + } + } + // LR + else { + for (int i = PolicyType::rank - 1; i >= 0; --i) { + m_offset[i] = + (tile_idx % m_policy.m_tile_end[i]) * m_policy.m_tile[i] + + m_policy.m_lower[i]; + tile_idx /= m_policy.m_tile_end[i]; + + // tile-local indices identified with (index_type)threadIdx_y + m_local_offset[i] = (thrd_idx % m_policy.m_tile[i]); + thrd_idx /= m_policy.m_tile[i]; + + m_offset[i] += m_local_offset[i]; + if (!(m_offset[i] < m_policy.m_upper[i] && + m_local_offset[i] < m_policy.m_tile[i])) { + in_bounds &= false; + } + } + if (in_bounds) { + m_func(Tag(), m_offset[0], m_offset[1], m_offset[2], m_offset[3], + m_v); + } + } + } + } + } // end exec_range + + private: + const PolicyType& m_policy; + const Functor& m_func; + value_type* m_v; +}; + +// Rank 5 +// Specializations for void tag type +template +struct DeviceIterateTile< + 5, PolicyType, Functor, void, ValueType, + typename std::enable_if::value>::type> { + using index_type = typename PolicyType::index_type; + using value_type = typename is_array_type::value_type; + + KOKKOS_INLINE_FUNCTION + DeviceIterateTile(const PolicyType& policy_, const Functor& f_, + value_type* v_) + : m_policy(policy_), m_func(f_), m_v(v_) {} + + static constexpr index_type max_blocks = 65535; + + KOKKOS_INLINE_FUNCTION + void exec_range() const { + if (static_cast(hipBlockIdx_x) < m_policy.m_num_tiles && + static_cast(hipThreadIdx_y) < m_policy.m_prod_tile_dims) { + index_type m_offset[PolicyType::rank]; // tile starting global id offset + index_type + m_local_offset[PolicyType::rank]; // tile starting global id offset + + for (index_type tileidx = static_cast(hipBlockIdx_x); + tileidx < m_policy.m_num_tiles; tileidx += hipGridDim_x) { + index_type tile_idx = + tileidx; // temp because tile_idx will be modified while + // determining tile starting point offsets + index_type thrd_idx = static_cast(hipThreadIdx_y); + bool in_bounds = true; + + // LL + if (PolicyType::inner_direction == PolicyType::Left) { + for (int i = 0; i < PolicyType::rank; ++i) { + m_offset[i] = + (tile_idx % m_policy.m_tile_end[i]) * m_policy.m_tile[i] + + m_policy.m_lower[i]; + tile_idx /= m_policy.m_tile_end[i]; + + // tile-local indices identified with hipThreadIdx_y + m_local_offset[i] = (thrd_idx % m_policy.m_tile[i]); + thrd_idx /= m_policy.m_tile[i]; + + m_offset[i] += m_local_offset[i]; + if (!(m_offset[i] < m_policy.m_upper[i] && + m_local_offset[i] < m_policy.m_tile[i])) { + in_bounds &= false; + } + } + if (in_bounds) { + m_func(m_offset[0], m_offset[1], m_offset[2], m_offset[3], + m_offset[4], m_v); + } + } + // LR + else { + for (int i = PolicyType::rank - 1; i >= 0; --i) { + m_offset[i] = + (tile_idx % m_policy.m_tile_end[i]) * m_policy.m_tile[i] + + m_policy.m_lower[i]; + tile_idx /= m_policy.m_tile_end[i]; + + // tile-local indices identified with hipThreadIdx_y + m_local_offset[i] = (thrd_idx % m_policy.m_tile[i]); + thrd_idx /= m_policy.m_tile[i]; + + m_offset[i] += m_local_offset[i]; + if (!(m_offset[i] < m_policy.m_upper[i] && + m_local_offset[i] < m_policy.m_tile[i])) { + in_bounds &= false; + } + } + if (in_bounds) { + m_func(m_offset[0], m_offset[1], m_offset[2], m_offset[3], + m_offset[4], m_v); + } + } + } + } + } // end exec_range + + private: + const PolicyType& m_policy; + const Functor& m_func; + value_type* m_v; +}; + +// Specializations for tag type +template +struct DeviceIterateTile< + 5, PolicyType, Functor, Tag, ValueType, + typename std::enable_if::value && + !is_void::value>::type> { + using index_type = typename PolicyType::index_type; + using value_type = typename is_array_type::value_type; + + KOKKOS_INLINE_FUNCTION + DeviceIterateTile(const PolicyType& policy_, const Functor& f_, + value_type* v_) + : m_policy(policy_), m_func(f_), m_v(v_) {} + + static constexpr index_type max_blocks = 65535; + + KOKKOS_INLINE_FUNCTION + void exec_range() const { + if (static_cast(hipBlockIdx_x) < m_policy.m_num_tiles && + static_cast(hipThreadIdx_y) < m_policy.m_prod_tile_dims) { + index_type m_offset[PolicyType::rank]; // tile starting global id offset + index_type + m_local_offset[PolicyType::rank]; // tile starting global id offset + + for (index_type tileidx = static_cast(hipBlockIdx_x); + tileidx < m_policy.m_num_tiles; tileidx += hipGridDim_x) { + index_type tile_idx = + tileidx; // temp because tile_idx will be modified while + // determining tile starting point offsets + index_type thrd_idx = static_cast(hipThreadIdx_y); + bool in_bounds = true; + + // LL + if (PolicyType::inner_direction == PolicyType::Left) { + for (int i = 0; i < PolicyType::rank; ++i) { + m_offset[i] = + (tile_idx % m_policy.m_tile_end[i]) * m_policy.m_tile[i] + + m_policy.m_lower[i]; + tile_idx /= m_policy.m_tile_end[i]; + + // tile-local indices identified with hipThreadIdx_y + m_local_offset[i] = (thrd_idx % m_policy.m_tile[i]); + thrd_idx /= m_policy.m_tile[i]; + + m_offset[i] += m_local_offset[i]; + if (!(m_offset[i] < m_policy.m_upper[i] && + m_local_offset[i] < m_policy.m_tile[i])) { + in_bounds &= false; + } + } + if (in_bounds) { + m_func(Tag(), m_offset[0], m_offset[1], m_offset[2], m_offset[3], + m_offset[4], m_v); + } + } + // LR + else { + for (int i = PolicyType::rank - 1; i >= 0; --i) { + m_offset[i] = + (tile_idx % m_policy.m_tile_end[i]) * m_policy.m_tile[i] + + m_policy.m_lower[i]; + tile_idx /= m_policy.m_tile_end[i]; + + // tile-local indices identified with hipThreadIdx_y + m_local_offset[i] = (thrd_idx % m_policy.m_tile[i]); + thrd_idx /= m_policy.m_tile[i]; + + m_offset[i] += m_local_offset[i]; + if (!(m_offset[i] < m_policy.m_upper[i] && + m_local_offset[i] < m_policy.m_tile[i])) { + in_bounds &= false; + } + } + if (in_bounds) { + m_func(Tag(), m_offset[0], m_offset[1], m_offset[2], m_offset[3], + m_offset[4], m_v); + } + } + } + } + } // end exec_range + + private: + const PolicyType& m_policy; + const Functor& m_func; + value_type* m_v; +}; + +// Rank 6 +// Specializations for void tag type +template +struct DeviceIterateTile< + 6, PolicyType, Functor, void, ValueType, + typename std::enable_if::value>::type> { + using index_type = typename PolicyType::index_type; + using value_type = typename is_array_type::value_type; + + KOKKOS_INLINE_FUNCTION + DeviceIterateTile(const PolicyType& policy_, const Functor& f_, + value_type* v_) + : m_policy(policy_), m_func(f_), m_v(v_) {} + + static constexpr index_type max_blocks = 65535; + + KOKKOS_INLINE_FUNCTION + void exec_range() const { + if (static_cast(hipBlockIdx_x) < m_policy.m_num_tiles && + static_cast(hipThreadIdx_y) < m_policy.m_prod_tile_dims) { + index_type m_offset[PolicyType::rank]; // tile starting global id offset + index_type + m_local_offset[PolicyType::rank]; // tile starting global id offset + + for (index_type tileidx = static_cast(hipBlockIdx_x); + tileidx < m_policy.m_num_tiles; tileidx += hipGridDim_x) { + index_type tile_idx = + tileidx; // temp because tile_idx will be modified while + // determining tile starting point offsets + index_type thrd_idx = static_cast(hipThreadIdx_y); + bool in_bounds = true; + + // LL + if (PolicyType::inner_direction == PolicyType::Left) { + for (int i = 0; i < PolicyType::rank; ++i) { + m_offset[i] = + (tile_idx % m_policy.m_tile_end[i]) * m_policy.m_tile[i] + + m_policy.m_lower[i]; + tile_idx /= m_policy.m_tile_end[i]; + + // tile-local indices identified with hipThreadIdx_y + m_local_offset[i] = (thrd_idx % m_policy.m_tile[i]); + thrd_idx /= m_policy.m_tile[i]; + + m_offset[i] += m_local_offset[i]; + if (!(m_offset[i] < m_policy.m_upper[i] && + m_local_offset[i] < m_policy.m_tile[i])) { + in_bounds &= false; + } + } + if (in_bounds) { + m_func(m_offset[0], m_offset[1], m_offset[2], m_offset[3], + m_offset[4], m_offset[5], m_v); + } + } + // LR + else { + for (int i = PolicyType::rank - 1; i >= 0; --i) { + m_offset[i] = + (tile_idx % m_policy.m_tile_end[i]) * m_policy.m_tile[i] + + m_policy.m_lower[i]; + tile_idx /= m_policy.m_tile_end[i]; + + // tile-local indices identified with hipThreadIdx_y + m_local_offset[i] = (thrd_idx % m_policy.m_tile[i]); + thrd_idx /= m_policy.m_tile[i]; + + m_offset[i] += m_local_offset[i]; + if (!(m_offset[i] < m_policy.m_upper[i] && + m_local_offset[i] < m_policy.m_tile[i])) { + in_bounds &= false; + } + } + if (in_bounds) { + m_func(m_offset[0], m_offset[1], m_offset[2], m_offset[3], + m_offset[4], m_offset[5], m_v); + } + } + } + } + } // end exec_range + + private: + const PolicyType& m_policy; + const Functor& m_func; + value_type* m_v; +}; + +// Specializations for tag type +template +struct DeviceIterateTile< + 6, PolicyType, Functor, Tag, ValueType, + typename std::enable_if::value && + !is_void::value>::type> { + using index_type = typename PolicyType::index_type; + using value_type = typename is_array_type::value_type; + + KOKKOS_INLINE_FUNCTION + DeviceIterateTile(const PolicyType& policy_, const Functor& f_, + value_type* v_) + : m_policy(policy_), m_func(f_), m_v(v_) {} + + static constexpr index_type max_blocks = 65535; + + KOKKOS_INLINE_FUNCTION + void exec_range() const { + if (static_cast(hipBlockIdx_x) < m_policy.m_num_tiles && + static_cast(hipThreadIdx_y) < m_policy.m_prod_tile_dims) { + index_type m_offset[PolicyType::rank]; // tile starting global id offset + index_type + m_local_offset[PolicyType::rank]; // tile starting global id offset + + for (index_type tileidx = static_cast(hipBlockIdx_x); + tileidx < m_policy.m_num_tiles; tileidx += hipGridDim_x) { + index_type tile_idx = + tileidx; // temp because tile_idx will be modified while + // determining tile starting point offsets + index_type thrd_idx = static_cast(hipThreadIdx_y); + bool in_bounds = true; + + // LL + if (PolicyType::inner_direction == PolicyType::Left) { + for (int i = 0; i < PolicyType::rank; ++i) { + m_offset[i] = + (tile_idx % m_policy.m_tile_end[i]) * m_policy.m_tile[i] + + m_policy.m_lower[i]; + tile_idx /= m_policy.m_tile_end[i]; + + // tile-local indices identified with hipThreadIdx_y + m_local_offset[i] = (thrd_idx % m_policy.m_tile[i]); + thrd_idx /= m_policy.m_tile[i]; + + m_offset[i] += m_local_offset[i]; + if (!(m_offset[i] < m_policy.m_upper[i] && + m_local_offset[i] < m_policy.m_tile[i])) { + in_bounds &= false; + } + } + if (in_bounds) { + m_func(Tag(), m_offset[0], m_offset[1], m_offset[2], m_offset[3], + m_offset[4], m_offset[5], m_v); + } + } + // LR + else { + for (int i = PolicyType::rank - 1; i >= 0; --i) { + m_offset[i] = + (tile_idx % m_policy.m_tile_end[i]) * m_policy.m_tile[i] + + m_policy.m_lower[i]; + tile_idx /= m_policy.m_tile_end[i]; + + // tile-local indices identified with hipThreadIdx_y + m_local_offset[i] = (thrd_idx % m_policy.m_tile[i]); + thrd_idx /= m_policy.m_tile[i]; + + m_offset[i] += m_local_offset[i]; + if (!(m_offset[i] < m_policy.m_upper[i] && + m_local_offset[i] < m_policy.m_tile[i])) { + in_bounds &= false; + } + } + if (in_bounds) { + m_func(Tag(), m_offset[0], m_offset[1], m_offset[2], m_offset[3], + m_offset[4], m_offset[5], m_v); + } + } + } + } + } // end exec_range + + private: + const PolicyType& m_policy; + const Functor& m_func; + value_type* m_v; +}; + +} // namespace Reduce +} // namespace Impl +} // namespace Kokkos +#endif +#endif diff --git a/core/src/HIP/Kokkos_HIP_Abort.hpp b/core/src/HIP/Kokkos_HIP_Abort.hpp new file mode 100644 index 00000000000..1eaae383024 --- /dev/null +++ b/core/src/HIP/Kokkos_HIP_Abort.hpp @@ -0,0 +1,68 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_HIP_ABORT_HPP +#define KOKKOS_HIP_ABORT_HPP + +#include +#if defined(KOKKOS_ENABLE_HIP) + +#include + +namespace Kokkos { +namespace Impl { + +__device__ inline void hip_abort(char const *msg) { + printf("%s", msg); + // FIXME_HIP both abort and the __assertfail system call are currently + // implemented with __builtin_trap which causes the program to exit abnormally + // without printing the error message. + // abort(); +} + +} // namespace Impl +} // namespace Kokkos + +#endif +#endif diff --git a/core/src/HIP/Kokkos_HIP_Atomic.hpp b/core/src/HIP/Kokkos_HIP_Atomic.hpp new file mode 100644 index 00000000000..c09e09f5004 --- /dev/null +++ b/core/src/HIP/Kokkos_HIP_Atomic.hpp @@ -0,0 +1,576 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_HIP_ATOMIC_HPP +#define KOKKOS_HIP_ATOMIC_HPP + +#ifdef KOKKOS_ENABLE_HIP_ATOMICS + +namespace Kokkos { +// HIP can do: +// Types int/unsigned int +// variants: +// atomic_exchange/compare_exchange/fetch_add/fetch_sub/fetch_max/fetch_min/fetch_and/fetch_or/fetch_xor/fetch_inc/fetch_dec + +// atomic_exchange ------------------------------------------------------------- + +__inline__ __device__ int atomic_exchange(volatile int *const dest, + const int val) { + return atomicExch(const_cast(dest), val); +} + +__inline__ __device__ unsigned int atomic_exchange( + volatile unsigned int *const dest, const unsigned int val) { + return atomicExch(const_cast(dest), val); +} + +__inline__ __device__ unsigned long long int atomic_exchange( + volatile unsigned long long int *const dest, + const unsigned long long int val) { + return atomicExch(const_cast(dest), val); +} + +__inline__ __device__ float atomic_exchange(volatile float *const dest, + const float val) { + return atomicExch(const_cast(dest), val); +} + +template +__inline__ __device__ T atomic_exchange( + volatile T *const dest, + typename std::enable_if::type val) { + int tmp = atomicExch(reinterpret_cast(const_cast(dest)), + *reinterpret_cast(const_cast(&val))); + return reinterpret_cast(tmp); +} + +template +__inline__ __device__ T atomic_exchange( + volatile T *const dest, + typename std::enable_if::type val) { + typedef unsigned long long int type; + + type tmp = atomicExch(reinterpret_cast(const_cast(dest)), + *reinterpret_cast(const_cast(&val))); + return reinterpret_cast(tmp); +} + +template +__inline__ __device__ T +atomic_exchange(volatile T *const dest, + typename std::enable_if::type &val) { + // FIXME_HIP + Kokkos::abort("atomic_exchange not implemented for large types.\n"); + T return_val; + int done = 0; + unsigned int active = __ballot(1); + unsigned int done_active = 0; + while (active != done_active) { + if (!done) { + // if (Impl::lock_address_hip_space((void*)dest)) + { + return_val = *dest; + *dest = val; + // Impl::unlock_address_hip_space((void*)dest); + done = 1; + } + } + done_active = __ballot(done); + } + return return_val; +} + +// atomic_assign --------------------------------------------------------------- + +template +__inline__ __device__ void atomic_assign( + volatile T *const dest, + typename std::enable_if::type val) { + atomicExch(reinterpret_cast(const_cast(dest)), + *reinterpret_cast(const_cast(&val))); +} + +template +__inline__ __device__ void atomic_assign( + volatile T *const dest, + typename std::enable_if::type val) { + typedef unsigned long long int type; + atomicExch(reinterpret_cast(const_cast(dest)), + *reinterpret_cast(const_cast(&val))); +} + +template +__inline__ __device__ void atomic_assign( + volatile T *const dest, + typename std::enable_if::type val) { + atomic_exchange(dest, val); +} + +// atomic_compare_exchange ----------------------------------------------------- + +inline __device__ int atomic_compare_exchange(volatile int *dest, int compare, + const int &val) { + return atomicCAS(const_cast(dest), compare, val); +} + +inline __device__ unsigned int atomic_compare_exchange( + volatile unsigned int *dest, unsigned int compare, + const unsigned int &val) { + return atomicCAS(const_cast(dest), compare, val); +} + +inline __device__ unsigned long long int atomic_compare_exchange( + volatile unsigned long long int *dest, unsigned long long int compare, + const unsigned long long int &val) { + return atomicCAS(const_cast(dest), compare, val); +} + +template +__inline__ __device__ T atomic_compare_exchange( + volatile T *dest, T compare, + typename std::enable_if::type val) { + // FIXME_HIP UB + union U { + int i; + T f; + __inline__ __device__ U() {} + } idest, icompare, ival; + icompare.f = compare; + ival.f = val; + idest.i = atomicCAS(reinterpret_cast(const_cast(dest)), + icompare.i, ival.i); + return idest.f; +} + +template +__inline__ __device__ T atomic_compare_exchange( + volatile T *dest, T compare, + typename std::enable_if::type val) { + // FIXME_HIP UB + union U { + unsigned long long int i; + T f; + __inline__ __device__ U() {} + } idest, icompare, ival; + icompare.f = compare; + ival.f = val; + idest.i = atomicCAS( + reinterpret_cast(const_cast(dest)), + icompare.i, ival.i); + return idest.f; +} + +template +__inline__ __device__ T atomic_compare_exchange( + volatile T *const dest, const T &compare, + typename std::enable_if::type &val) { + // FIXME_HIP + Kokkos::abort("atomic_compare_exchange not implemented for large types.\n"); + T return_val; + int done = 0; + unsigned int active = __ballot(1); + unsigned int done_active = 0; + while (active != done_active) { + if (!done) { + // if (Impl::lock_address_hip_space((void*)dest)) + { + return_val = *dest; + if (return_val == compare) *dest = val; + // Impl::unlock_address_hip_space((void*)dest); + done = 1; + } + } + done_active = __ballot(done); + } + return return_val; +} + +// atomic_fetch_add ------------------------------------------------------------ + +inline __device__ int atomic_fetch_add(volatile int *dest, const int &val) { + return atomicAdd(const_cast(dest), val); +} + +inline __device__ unsigned int atomic_fetch_add(volatile unsigned int *dest, + const unsigned int &val) { + return atomicAdd(const_cast(dest), val); +} + +inline __device__ unsigned long long atomic_fetch_add( + volatile unsigned long long *dest, const unsigned long long &val) { + return atomicAdd(const_cast(dest), val); +} + +inline __device__ float atomic_fetch_add(volatile float *dest, + const float &val) { + return atomicAdd(const_cast(dest), val); +} + +template +inline __device__ T atomic_fetch_add( + volatile T *const dest, + typename std::enable_if::type val) { + // FIXME_HIP UB + union U { + int i; + T t; + __inline__ __device__ U() {} + } assume, oldval, newval; + + oldval.t = *dest; + + do { + assume.i = oldval.i; + newval.t = assume.t + val; + oldval.i = atomicCAS(reinterpret_cast(const_cast(dest)), + assume.i, newval.i); + } while (assume.i != oldval.i); + + return oldval.t; +} + +template +inline __device__ T atomic_fetch_add( + volatile T *const dest, + typename std::enable_if::type + val) { + // FIXME_HIP UB + union U { + unsigned long long i; + T t; + __inline__ __device__ U() {} + } assume, oldval, newval; + + oldval.t = *dest; + + do { + assume.i = oldval.i; + newval.t = assume.t + val; + oldval.i = atomic_compare_exchange( + reinterpret_cast(dest), assume.i, + newval.i); + } while (assume.i != oldval.i); + + return oldval.t; +} + +__inline__ __device__ char atomic_fetch_add(volatile char *dest, + const char &val) { + unsigned int oldval, newval, assume; + oldval = *reinterpret_cast(&dest); + + do { + assume = oldval; + newval = assume & 0x7fffff00 + ((assume & 0xff) + val) & 0xff; + oldval = + atomicCAS(reinterpret_cast(const_cast(dest)), + assume, newval); + } while (assume != oldval); + + return oldval; +} + +__inline__ __device__ short atomic_fetch_add(volatile short *dest, + const short &val) { + unsigned int oldval, newval, assume; + oldval = *reinterpret_cast(&dest); + + do { + assume = oldval; + newval = assume & 0x7fff0000 + ((assume & 0xffff) + val) & 0xffff; + oldval = + atomicCAS(reinterpret_cast(const_cast(dest)), + assume, newval); + } while (assume != oldval); + + return oldval; +} + +__inline__ __device__ long long atomic_fetch_add(volatile long long *dest, + const long long &val) { + return atomicAdd( + reinterpret_cast(const_cast(dest)), + val); +} + +template +__inline__ __device__ T +atomic_fetch_add(volatile T *dest, + typename std::enable_if::type val) { + // FIXME_HIP + Kokkos::abort("atomic_fetch_add not implemented for large types.\n"); + T return_val; + int done = 0; + unsigned int active = __ballot(1); + unsigned int done_active = 0; + while (active != done_active) { + if (!done) { + // if(Kokkos::Impl::lock_address_hip_space((void *)dest)) + { + return_val = *dest; + *dest = return_val + val; + // Kokkos::Impl::unlock_address_hip_space((void *)dest); + done = 1; + } + } + done_active = __ballot(done); + } + return return_val; +} + +// atmic_fetch_sub ------------------------------------------------------------- + +__inline__ __device__ int atomic_fetch_sub(volatile int *dest, int const &val) { + return atomicSub(const_cast(dest), val); +} + +__inline__ __device__ unsigned int atomic_fetch_sub(volatile unsigned int *dest, + unsigned int const &val) { + return atomicSub(const_cast(dest), val); +} + +__inline__ __device__ unsigned long long atomic_fetch_sub( + unsigned long long *dest, int64_t const &val) { + return atomicAdd(reinterpret_cast(dest), + -reinterpret_cast(val)); +} + +__inline__ __device__ char atomic_fetch_sub(volatile char *dest, + const char &val) { + unsigned int oldval, newval, assume; + oldval = *reinterpret_cast(dest); + + do { + assume = oldval; + newval = assume & 0x7fffff00 + ((assume & 0xff) - val) & 0xff; + oldval = + atomicCAS(reinterpret_cast(const_cast(dest)), + assume, newval); + } while (assume != oldval); + + return oldval; +} + +__inline__ __device__ short atomic_fetch_sub(volatile short *dest, + const short &val) { + unsigned int oldval, newval, assume; + oldval = *reinterpret_cast(dest); + + do { + assume = oldval; + newval = assume & 0x7fff0000 + ((assume & 0xffff) - val) & 0xffff; + oldval = + atomicCAS(reinterpret_cast(const_cast(dest)), + assume, newval); + } while (assume != oldval); + + return oldval; +} + +__inline__ __device__ long long atomic_fetch_sub(volatile long long *dest, + const long long &val) { + return static_cast(atomicAdd( + reinterpret_cast(const_cast(dest)), + -reinterpret_cast(val))); +} + +template +__inline__ __device__ T atomic_fetch_sub( + volatile T *dest, + typename std::enable_if::type val) { + // FIXME_HIP UB + union U { + int i; + T t; + __inline__ __device__ U() {} + } assume, oldval, newval; + + oldval.t = *dest; + + do { + assume.i = oldval.i; + newval.t = assume.t - val; + oldval.i = atomic_compare_exchange(reinterpret_cast(dest), + assume.i, newval.i); + } while (assume.i != oldval.i); + + return oldval.t; +} + +template +inline __device__ T atomic_fetch_sub( + volatile T *const dest, + typename std::enable_if::type + val) { + // FIXME_HIP UB + union U { + unsigned long long i; + T t; + __inline__ __device__ U() {} + } assume, oldval, newval; + + oldval.t = *dest; + + do { + assume.i = oldval.i; + newval.t = assume.t - val; + oldval.i = atomic_compare_exchange( + reinterpret_cast(dest), assume.i, + newval.i); + } while (assume.i != oldval.i); + + return oldval.t; +} + +template +__inline__ __device__ T atomic_fetch_sub( + volatile T *dest, + typename std::enable_if::type val) { + unsigned int oldval, newval, assume; + oldval = *reinterpret_cast(dest); + + do { + assume = oldval; + newval = assume & 0x7fffff00 + ((assume & 0xff) - val) & 0xff; + oldval = atomicCAS(reinterpret_cast(dest), assume, newval); + } while (assume != oldval); + + return reinterpret_cast(oldval) & 0xff; +} + +template +__inline__ __device__ T atomic_fetch_sub( + volatile T *dest, + typename std::enable_if::type val) { + unsigned int oldval, newval, assume; + oldval = *reinterpret_cast(dest); + + do { + assume = oldval; + newval = assume & 0x7fff0000 + ((assume & 0xffff) - val) & 0xffff; + oldval = atomicCAS(reinterpret_cast(dest), assume, newval); + } while (assume != oldval); + + return reinterpret_cast(oldval) & 0xffff; +} + +template +__inline__ __device__ T +atomic_fetch_sub(volatile T *const dest, + typename std::enable_if::type &val) { + // FIXME_HIP + Kokkos::abort("atomic_fetch_sub not implemented for large types.\n"); + T return_val; + int done = 0; + unsigned int active = __ballot(1); + unsigned int done_active = 0; + while (active != done_active) { + if (!done) { + /*if (Impl::lock_address_hip_space((void*)dest)) */ + { + return_val = *dest; + *dest = return_val - val; + // Impl::unlock_address_hip_space((void*)dest); + done = 1; + } + } + done_active = __ballot(done); + } + return return_val; +} + +// atomic_fetch_or ------------------------------------------------------------- + +__inline__ __device__ int atomic_fetch_or(volatile int *const dest, + int const val) { + return atomicOr(const_cast(dest), val); +} + +__inline__ __device__ unsigned int atomic_fetch_or( + volatile unsigned int *const dest, unsigned int const val) { + return atomicOr(const_cast(dest), val); +} + +__inline__ __device__ unsigned long long int atomic_fetch_or( + volatile unsigned long long int *const dest, + unsigned long long int const val) { + return atomicOr(const_cast(dest), val); +} + +// atomic_fetch_and ------------------------------------------------------------ + +__inline__ __device__ int atomic_fetch_and(volatile int *const dest, + int const val) { + return atomicAnd(const_cast(dest), val); +} + +__inline__ __device__ unsigned int atomic_fetch_and( + volatile unsigned int *const dest, unsigned int const val) { + return atomicAnd(const_cast(dest), val); +} + +__inline__ __device__ unsigned long long int atomic_fetch_and( + volatile unsigned long long int *const dest, + unsigned long long int const val) { + return atomicAnd(const_cast(dest), val); +} +} // namespace Kokkos +#endif + +#endif diff --git a/core/src/HIP/Kokkos_HIP_BlockSize_Deduction.hpp b/core/src/HIP/Kokkos_HIP_BlockSize_Deduction.hpp new file mode 100644 index 00000000000..8799d359ff0 --- /dev/null +++ b/core/src/HIP/Kokkos_HIP_BlockSize_Deduction.hpp @@ -0,0 +1,339 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_HIP_BLOCKSIZE_DEDUCTION_HPP +#define KOKKOS_HIP_BLOCKSIZE_DEDUCTION_HPP + +#include + +#if defined(__HIPCC__) + +#include +#include + +namespace Kokkos { +namespace Experimental { +namespace Impl { +template +struct HIPGetMaxBlockSize; + +template +int hip_get_max_block_size(typename DriverType::functor_type const &f, + size_t const vector_length, + size_t const shmem_extra_block, + size_t const shmem_extra_thread) { + return HIPGetMaxBlockSize::get_block_size( + f, vector_length, shmem_extra_block, shmem_extra_thread); +} + +template +int hip_get_max_block_size(const HIPInternal * /*hip_instance*/, + const hipFuncAttributes &attr, + const FunctorType & /*f*/, + const size_t /*vector_length*/, + const size_t /*shmem_block*/, + const size_t /*shmem_thread*/) { + // FIXME_HIP find a better algorithm. Be aware that + // maxThreadsPerMultiProcessor, regsPerBlock, and l2CacheSize are bugged and + // always return zero + // https://github.com/ROCm-Developer-Tools/HIP/blob/6c5fa32815650cc20a4f783d09b013610348a4d5/include/hip/hcc_detail/hip_runtime_api.h#L438-L440 + // and we don't have access to the same information than we do for CUDA + + int const max_threads_per_block_mi60 = 1024; + int const max_threads_per_block = LaunchBounds::maxTperB == 0 + ? max_threads_per_block_mi60 + : LaunchBounds::maxTperB; + return std::min(attr.maxThreadsPerBlock, max_threads_per_block); +} + +template +struct HIPGetMaxBlockSize, true> { + static int get_block_size(typename DriverType::functor_type const &f, + size_t const vector_length, + size_t const shmem_extra_block, + size_t const shmem_extra_thread) { + unsigned int numBlocks = 0; + int blockSize = 1024; + int sharedmem = + shmem_extra_block + shmem_extra_thread * (blockSize / vector_length) + + ::Kokkos::Impl::FunctorTeamShmemSize< + typename DriverType::functor_type>::value(f, blockSize / + vector_length); + hipOccupancyMaxActiveBlocksPerMultiprocessor( + &numBlocks, hip_parallel_launch_constant_memory, blockSize, + sharedmem); + + if (numBlocks > 0) return blockSize; + while (blockSize > HIPTraits::WarpSize && numBlocks == 0) { + blockSize /= 2; + sharedmem = + shmem_extra_block + shmem_extra_thread * (blockSize / vector_length) + + ::Kokkos::Impl::FunctorTeamShmemSize< + typename DriverType::functor_type>::value(f, blockSize / + vector_length); + + hipOccupancyMaxActiveBlocksPerMultiprocessor( + &numBlocks, hip_parallel_launch_constant_memory, + blockSize, sharedmem); + } + int blockSizeUpperBound = blockSize * 2; + while (blockSize < blockSizeUpperBound && numBlocks > 0) { + blockSize += HIPTraits::WarpSize; + sharedmem = + shmem_extra_block + shmem_extra_thread * (blockSize / vector_length) + + ::Kokkos::Impl::FunctorTeamShmemSize< + typename DriverType::functor_type>::value(f, blockSize / + vector_length); + + hipOccupancyMaxActiveBlocksPerMultiprocessor( + &numBlocks, hip_parallel_launch_constant_memory, + blockSize, sharedmem); + } + return blockSize - HIPTraits::WarpSize; + } +}; + +template +struct HIPGetOptBlockSize; + +template +int hip_get_opt_block_size(typename DriverType::functor_type const &f, + size_t const vector_length, + size_t const shmem_extra_block, + size_t const shmem_extra_thread) { + return HIPGetOptBlockSize< + DriverType, LaunchBounds, + (HIPTraits::ConstantMemoryUseThreshold < + sizeof(DriverType))>::get_block_size(f, vector_length, shmem_extra_block, + shmem_extra_thread); +} + +template +int hip_get_opt_block_size(HIPInternal const * /*hip_instance*/, + hipFuncAttributes const &attr, + FunctorType const & /*f*/, + size_t const /*vector_length*/, + size_t const /*shmem_block*/, + size_t const /*shmem_thread*/) { + // FIXME_HIP find a better algorithm. Be aware that + // maxThreadsPerMultiProcessor, regsPerBlock, and l2CacheSize are bugged and + // always return zero + // https://github.com/ROCm-Developer-Tools/HIP/blob/6c5fa32815650cc20a4f783d09b013610348a4d5/include/hip/hcc_detail/hip_runtime_api.h#L438-L440 + // and we don't have access to the same information than we do for CUDA + + int const max_threads_per_block_mi60 = 1024; + int const max_threads_per_block = LaunchBounds::maxTperB == 0 + ? max_threads_per_block_mi60 + : LaunchBounds::maxTperB; + return std::min(attr.maxThreadsPerBlock, max_threads_per_block); +} + +// FIXME_HIP the code is identical to the false struct except for +// hip_parallel_launch_constant_memory +template +struct HIPGetOptBlockSize, true> { + static int get_block_size(typename DriverType::functor_type const &f, + size_t const vector_length, + size_t const shmem_extra_block, + size_t const shmem_extra_thread) { + int blockSize = HIPTraits::WarpSize / 2; + int numBlocks; + int sharedmem; + int maxOccupancy = 0; + int bestBlockSize = 0; + + while (blockSize < 1024) { + blockSize *= 2; + + // calculate the occupancy with that optBlockSize and check whether its + // larger than the largest one found so far + sharedmem = + shmem_extra_block + shmem_extra_thread * (blockSize / vector_length) + + ::Kokkos::Impl::FunctorTeamShmemSize< + typename DriverType::functor_type>::value(f, blockSize / + vector_length); + hipOccupancyMaxActiveBlocksPerMultiprocessor( + &numBlocks, hip_parallel_launch_constant_memory, + blockSize, sharedmem); + if (maxOccupancy < numBlocks * blockSize) { + maxOccupancy = numBlocks * blockSize; + bestBlockSize = blockSize; + } + } + return bestBlockSize; + } +}; + +template +struct HIPGetOptBlockSize, false> { + static int get_block_size(const typename DriverType::functor_type &f, + const size_t vector_length, + const size_t shmem_extra_block, + const size_t shmem_extra_thread) { + int blockSize = HIPTraits::WarpSize / 2; + int numBlocks; + int sharedmem; + int maxOccupancy = 0; + int bestBlockSize = 0; + + while (blockSize < 1024) { + blockSize *= 2; + sharedmem = + shmem_extra_block + shmem_extra_thread * (blockSize / vector_length) + + ::Kokkos::Impl::FunctorTeamShmemSize< + typename DriverType::functor_type>::value(f, blockSize / + vector_length); + + hipOccupancyMaxActiveBlocksPerMultiprocessor( + &numBlocks, hip_parallel_launch_local_memory, blockSize, + sharedmem); + + if (maxOccupancy < numBlocks * blockSize) { + maxOccupancy = numBlocks * blockSize; + bestBlockSize = blockSize; + } + } + return bestBlockSize; + } +}; + +// FIXME_HIP the code is identical to the false struct except for +// hip_parallel_launch_constant_memory +template +struct HIPGetOptBlockSize< + DriverType, Kokkos::LaunchBounds, + true> { + static int get_block_size(const typename DriverType::functor_type &f, + const size_t vector_length, + const size_t shmem_extra_block, + const size_t shmem_extra_thread) { + int blockSize = HIPTraits::WarpSize / 2; + int numBlocks; + int sharedmem; + int maxOccupancy = 0; + int bestBlockSize = 0; + int max_threads_per_block = + std::min(MaxThreadsPerBlock, + hip_internal_maximum_warp_count() * HIPTraits::WarpSize); + + while (blockSize < max_threads_per_block) { + blockSize *= 2; + + // calculate the occupancy with that optBlockSize and check whether its + // larger than the largest one found so far + sharedmem = + shmem_extra_block + shmem_extra_thread * (blockSize / vector_length) + + ::Kokkos::Impl::FunctorTeamShmemSize< + typename DriverType::functor_type>::value(f, blockSize / + vector_length); + hipOccupancyMaxActiveBlocksPerMultiprocessor( + &numBlocks, + hip_parallel_launch_constant_memory, + blockSize, sharedmem); + if (numBlocks >= static_cast(MinBlocksPerSM) && + blockSize <= static_cast(MaxThreadsPerBlock)) { + if (maxOccupancy < numBlocks * blockSize) { + maxOccupancy = numBlocks * blockSize; + bestBlockSize = blockSize; + } + } + } + if (maxOccupancy > 0) return bestBlockSize; + return -1; + } +}; + +template +struct HIPGetOptBlockSize< + DriverType, Kokkos::LaunchBounds, + false> { + static int get_block_size(const typename DriverType::functor_type &f, + const size_t vector_length, + const size_t shmem_extra_block, + const size_t shmem_extra_thread) { + int blockSize = HIPTraits::WarpSize / 2; + int numBlocks; + int sharedmem; + int maxOccupancy = 0; + int bestBlockSize = 0; + int max_threads_per_block = + std::min(MaxThreadsPerBlock, + hip_internal_maximum_warp_count() * HIPTraits::WarpSize); + + while (blockSize < max_threads_per_block) { + blockSize *= 2; + sharedmem = + shmem_extra_block + shmem_extra_thread * (blockSize / vector_length) + + ::Kokkos::Impl::FunctorTeamShmemSize< + typename DriverType::functor_type>::value(f, blockSize / + vector_length); + + hipOccupancyMaxActiveBlocksPerMultiprocessor( + &numBlocks, + hip_parallel_launch_local_memory, + blockSize, sharedmem); + if (numBlocks >= int(MinBlocksPerSM) && + blockSize <= int(MaxThreadsPerBlock)) { + if (maxOccupancy < numBlocks * blockSize) { + maxOccupancy = numBlocks * blockSize; + bestBlockSize = blockSize; + } + } + } + if (maxOccupancy > 0) return bestBlockSize; + return -1; + } +}; + +} // namespace Impl +} // namespace Experimental +} // namespace Kokkos + +#endif + +#endif diff --git a/core/unit_test/qthreads/TestQthreads_ViewAPI_b.cpp b/core/src/HIP/Kokkos_HIP_Error.hpp similarity index 50% rename from core/unit_test/qthreads/TestQthreads_ViewAPI_b.cpp rename to core/src/HIP/Kokkos_HIP_Error.hpp index b37937369a6..2abded0e99a 100644 --- a/core/unit_test/qthreads/TestQthreads_ViewAPI_b.cpp +++ b/core/src/HIP/Kokkos_HIP_Error.hpp @@ -42,91 +42,69 @@ //@HEADER */ -#include +#ifndef KOKKOS_HIP_ERROR_HPP +#define KOKKOS_HIP_ERROR_HPP -namespace Test { +#include +#include -TEST_F(qthreads, impl_shared_alloc) { -#if 0 - test_shared_alloc< Kokkos::HostSpace, Kokkos::Qthreads >(); -#endif -} - -TEST_F(qthreads, impl_view_mapping_b) { -#if 0 - test_view_mapping_subview< Kokkos::Qthreads >(); - TestViewMappingAtomic< Kokkos::Qthreads >::run(); -#endif -} +#include -TEST_F(qthreads, view_api) { -#if 0 - TestViewAPI< double, Kokkos::Qthreads >(); -#endif -} +#include -TEST_F(qthreads, view_nested_view) { -#if 0 - ::Test::view_nested_view< Kokkos::Qthreads >(); -#endif -} +namespace Kokkos { +namespace Impl { -TEST_F(qthreads, view_remap) { -#if 0 - enum { N0 = 3, N1 = 2, N2 = 8, N3 = 9 }; +void hip_internal_error_throw(hipError_t e, const char* name, + const char* file = NULL, const int line = 0); - typedef Kokkos::View< double*[N1][N2][N3], - Kokkos::LayoutRight, - Kokkos::Qthreads > output_type; +inline void hip_internal_safe_call(hipError_t e, const char* name, + const char* file = NULL, + const int line = 0) { + if (hipSuccess != e) { + hip_internal_error_throw(e, name, file, line); + } +} - typedef Kokkos::View< int**[N2][N3], - Kokkos::LayoutLeft, - Kokkos::Qthreads > input_type; +} // namespace Impl +} // namespace Kokkos - typedef Kokkos::View< int*[N0][N2][N3], - Kokkos::LayoutLeft, - Kokkos::Qthreads > diff_type; +#define HIP_SAFE_CALL(call) \ + Kokkos::Impl::hip_internal_safe_call(call, #call, __FILE__, __LINE__) - output_type output( "output", N0 ); - input_type input ( "input", N0, N1 ); - diff_type diff ( "diff", N0 ); +namespace Kokkos { +namespace Experimental { - int value = 0; +class HIPRawMemoryAllocationFailure : public RawMemoryAllocationFailure { + private: + hipError_t m_error_code = hipSuccess; - for ( size_t i3 = 0; i3 < N3; ++i3 ) - for ( size_t i2 = 0; i2 < N2; ++i2 ) - for ( size_t i1 = 0; i1 < N1; ++i1 ) - for ( size_t i0 = 0; i0 < N0; ++i0 ) - { - input( i0, i1, i2, i3 ) = ++value; + static FailureMode get_failure_mode(hipError_t error_code) { + switch (error_code) { + case hipErrorMemoryAllocation: return FailureMode::OutOfMemoryError; + case hipErrorInvalidValue: return FailureMode::InvalidAllocationSize; + default: return FailureMode::Unknown; + } } - // Kokkos::deep_copy( diff, input ); // Throw with incompatible shape. - Kokkos::deep_copy( output, input ); - - value = 0; - - for ( size_t i3 = 0; i3 < N3; ++i3 ) - for ( size_t i2 = 0; i2 < N2; ++i2 ) - for ( size_t i1 = 0; i1 < N1; ++i1 ) - for ( size_t i0 = 0; i0 < N0; ++i0 ) - { - ++value; - ASSERT_EQ( value, ( (int) output( i0, i1, i2, i3 ) ) ); + public: + HIPRawMemoryAllocationFailure(size_t arg_attempted_size, + hipError_t arg_error_code, + AllocationMechanism arg_mechanism) noexcept + : RawMemoryAllocationFailure( + arg_attempted_size, /* HIPSpace doesn't handle alignment? */ 1, + get_failure_mode(arg_error_code), arg_mechanism), + m_error_code(arg_error_code) {} + + void append_additional_error_information(std::ostream& o) const override { + if (m_error_code != hipSuccess) { + o << " The HIP allocation returned the error code \"\"" + << hipGetErrorName(m_error_code) << "\"."; + } } -#endif -} +}; -TEST_F(qthreads, view_aggregate) { -#if 0 - TestViewAggregate< Kokkos::Qthreads >(); -#endif -} +} // namespace Experimental +} // namespace Kokkos -TEST_F(qthreads, template_meta_functions) { -#if 0 - TestTemplateMetaFunctions< int, Kokkos::Qthreads >(); #endif -} - -} // namespace Test diff --git a/core/src/HIP/Kokkos_HIP_Instance.cpp b/core/src/HIP/Kokkos_HIP_Instance.cpp new file mode 100644 index 00000000000..1dcba0ff3e7 --- /dev/null +++ b/core/src/HIP/Kokkos_HIP_Instance.cpp @@ -0,0 +1,373 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +/*--------------------------------------------------------------------------*/ +/* Kokkos interfaces */ + +#include + +#include +#include +#include +#include + +/*--------------------------------------------------------------------------*/ +/* Standard 'C' libraries */ +#include + +/* Standard 'C++' libraries */ +#include +#include +#include +#include + +namespace Kokkos { +namespace Experimental { +namespace { +class HIPInternalDevices { + public: + enum { MAXIMUM_DEVICE_COUNT = 64 }; + struct hipDeviceProp_t m_hipProp[MAXIMUM_DEVICE_COUNT]; + int m_hipDevCount; + + HIPInternalDevices(); + + static HIPInternalDevices const &singleton(); +}; + +HIPInternalDevices::HIPInternalDevices() { + HIP_SAFE_CALL(hipGetDeviceCount(&m_hipDevCount)); + + if (m_hipDevCount > MAXIMUM_DEVICE_COUNT) { + Kokkos::abort( + "Sorry, you have more GPUs per node than we thought anybody would ever " + "have. Please report this to github.com/kokkos/kokkos."); + } + for (int i = 0; i < m_hipDevCount; ++i) { + HIP_SAFE_CALL(hipGetDeviceProperties(m_hipProp + i, i)); + } +} + +const HIPInternalDevices &HIPInternalDevices::singleton() { + static HIPInternalDevices self; + return self; +} +} // namespace + +namespace Impl { + +int HIPInternal::was_initialized = 0; +int HIPInternal::was_finalized = 0; +//---------------------------------------------------------------------------- + +void HIPInternal::print_configuration(std::ostream & /*s*/) const { + // FIXME_HIP + Kokkos::abort("print_configuration not implemented!\n"); + /*const HIPInternalDevices & dev_info = HIPInternalDevices::singleton(); + +#if defined( KOKKOS_ENABLE_HIP ) + s << "macro KOKKOS_ENABLE_HIP : defined" << std::endl ; +#endif +#if defined( __hcc_version__ ) + s << "macro __hcc_version__ = " << __hcc_version__ + << std::endl ; +#endif + + for ( int i = 0 ; i < dev_info.m_hipDevCount ; ++i ) { + s << "Kokkos::Experimental::HIP[ " << i << " ] " + << dev_info.m_hipProp[i].name + << " version " << (dev_info.m_hipProp[i].major) << "." << +dev_info.m_hipProp[i].minor + << ", Total Global Memory: " << +human_memory_size(dev_info.m_hipProp[i].totalGlobalMem) + << ", Shared Memory per Wavefront: " << +human_memory_size(dev_info.m_hipProp[i].sharedMemPerWavefront); if ( m_hipDev == +i ) s << " : Selected" ; s << std::endl ; + }*/ +} + +//---------------------------------------------------------------------------- + +HIPInternal::~HIPInternal() { + if (m_scratchSpace || m_scratchFlags) { + std::cerr << "Kokkos::Experimental::HIP ERROR: Failed to call " + "Kokkos::Experimental::HIP::finalize()" + << std::endl; + std::cerr.flush(); + } + + m_hipDev = -1; + m_hipArch = -1; + m_multiProcCount = 0; + m_maxWarpCount = 0; + m_maxSharedWords = 0; + m_maxShmemPerBlock = 0; + m_scratchSpaceCount = 0; + m_scratchFlagsCount = 0; + m_scratchSpace = 0; + m_scratchFlags = 0; +} + +int HIPInternal::verify_is_initialized(const char *const label) const { + if (m_hipDev < 0) { + std::cerr << "Kokkos::Experimental::HIP::" << label + << " : ERROR device not initialized" << std::endl; + } + return 0 <= m_hipDev; +} + +HIPInternal &HIPInternal::singleton() { + static HIPInternal *self = nullptr; + if (!self) { + self = new HIPInternal(); + } + return *self; +} + +void HIPInternal::initialize(int hip_device_id) { + if (was_finalized) + Kokkos::abort("Calling HIP::initialize after HIP::finalize is illegal\n"); + + if (is_initialized()) return; + + enum { WordSize = sizeof(size_type) }; + + if (!HostSpace::execution_space::impl_is_initialized()) { + const std::string msg( + "HIP::initialize ERROR : HostSpace::execution_space " + "is not initialized"); + Kokkos::Impl::throw_runtime_exception(msg); + } + + const HIPInternalDevices &dev_info = HIPInternalDevices::singleton(); + + const bool ok_init = 0 == m_scratchSpace || 0 == m_scratchFlags; + + // Need at least a GPU device + const bool ok_id = + 0 <= hip_device_id && hip_device_id < dev_info.m_hipDevCount; + + if (ok_init && ok_id) { + const struct hipDeviceProp_t &hipProp = dev_info.m_hipProp[hip_device_id]; + + m_hipDev = hip_device_id; + + hipSetDevice(m_hipDev); + + // FIXME_HIP for now always uses default stream + m_stream = 0; + + // number of multiprocessors + m_multiProcCount = hipProp.multiProcessorCount; + + //---------------------------------- + // Maximum number of warps, + // at most one warp per thread in a warp for reduction. + m_maxWarpCount = hipProp.maxThreadsPerBlock / Impl::HIPTraits::WarpSize; + if (HIPTraits::WarpSize < m_maxWarpCount) { + m_maxWarpCount = Impl::HIPTraits::WarpSize; + } + m_maxSharedWords = hipProp.sharedMemPerBlock / WordSize; + + //---------------------------------- + // Maximum number of blocks + m_maxBlock = hipProp.maxGridSize[0]; + + m_shmemPerSM = hipProp.maxSharedMemoryPerMultiProcessor; + m_maxShmemPerBlock = hipProp.sharedMemPerBlock; + m_maxThreadsPerSM = hipProp.maxThreadsPerMultiProcessor; + m_maxThreadsPerBlock = hipProp.maxThreadsPerBlock; + + //---------------------------------- + // Multiblock reduction uses scratch flags for counters + // and scratch space for partial reduction values. + // Allocate some initial space. This will grow as needed. + { + const unsigned reduce_block_count = + m_maxWarpCount * Impl::HIPTraits::WarpSize; + + (void)scratch_flags(reduce_block_count * 2 * sizeof(size_type)); + (void)scratch_space(reduce_block_count * 16 * sizeof(size_type)); + } + //---------------------------------- + } else { + std::ostringstream msg; + msg << "Kokkos::Experimental::HIP::initialize(" << hip_device_id + << ") FAILED"; + + if (!ok_init) { + msg << " : Already initialized"; + } + if (!ok_id) { + msg << " : Device identifier out of range " + << "[0.." << dev_info.m_hipDevCount - 1 << "]"; + } + Kokkos::Impl::throw_runtime_exception(msg.str()); + } + + // Init the array for used for arbitrarily sized atomics + // FIXME_HIP uncomment this when global variable works + // if (m_stream == 0) ::Kokkos::Impl::initialize_host_hip_lock_arrays(); +} + +//---------------------------------------------------------------------------- + +typedef Kokkos::Experimental::HIP::size_type + ScratchGrain[Impl::HIPTraits::WarpSize]; +enum { sizeScratchGrain = sizeof(ScratchGrain) }; + +Kokkos::Experimental::HIP::size_type *HIPInternal::scratch_space( + const Kokkos::Experimental::HIP::size_type size) { + if (verify_is_initialized("scratch_space") && + m_scratchSpaceCount * sizeScratchGrain < size) { + m_scratchSpaceCount = (size + sizeScratchGrain - 1) / sizeScratchGrain; + + typedef Kokkos::Impl::SharedAllocationRecord + Record; + + static Record *const r = Record::allocate( + Kokkos::Experimental::HIPSpace(), "InternalScratchSpace", + (sizeScratchGrain * m_scratchSpaceCount)); + + Record::increment(r); + + m_scratchSpace = reinterpret_cast(r->data()); + } + + return m_scratchSpace; +} + +Kokkos::Experimental::HIP::size_type *HIPInternal::scratch_flags( + const Kokkos::Experimental::HIP::size_type size) { + if (verify_is_initialized("scratch_flags") && + m_scratchFlagsCount * sizeScratchGrain < size) { + m_scratchFlagsCount = (size + sizeScratchGrain - 1) / sizeScratchGrain; + + typedef Kokkos::Impl::SharedAllocationRecord + Record; + + Record *const r = Record::allocate( + Kokkos::Experimental::HIPSpace(), "InternalScratchFlags", + (sizeScratchGrain * m_scratchFlagsCount)); + + Record::increment(r); + + m_scratchFlags = reinterpret_cast(r->data()); + + hipMemset(m_scratchFlags, 0, m_scratchFlagsCount * sizeScratchGrain); + } + + return m_scratchFlags; +} + +//---------------------------------------------------------------------------- + +void HIPInternal::finalize() { + HIP().fence(); + was_finalized = 1; + if (0 != m_scratchSpace || 0 != m_scratchFlags) { + typedef Kokkos::Impl::SharedAllocationRecord + RecordHIP; + + RecordHIP::decrement(RecordHIP::get_record(m_scratchFlags)); + RecordHIP::decrement(RecordHIP::get_record(m_scratchSpace)); + + m_hipDev = -1; + m_hipArch = -1; + m_multiProcCount = 0; + m_maxWarpCount = 0; + m_maxBlock = 0; + m_maxSharedWords = 0; + m_maxShmemPerBlock = 0; + m_scratchSpaceCount = 0; + m_scratchFlagsCount = 0; + m_scratchSpace = 0; + m_scratchFlags = 0; + } +} + +//---------------------------------------------------------------------------- + +Kokkos::Experimental::HIP::size_type hip_internal_maximum_warp_count() { + return HIPInternal::singleton().m_maxWarpCount; +} + +Kokkos::Experimental::HIP::size_type hip_internal_maximum_grid_count() { + return HIPInternal::singleton().m_maxBlock; +} + +Kokkos::Experimental::HIP::size_type *hip_internal_scratch_space( + const Kokkos::Experimental::HIP::size_type size) { + return HIPInternal::singleton().scratch_space(size); +} + +Kokkos::Experimental::HIP::size_type *hip_internal_scratch_flags( + const Kokkos::Experimental::HIP::size_type size) { + return HIPInternal::singleton().scratch_flags(size); +} + +} // namespace Impl +} // namespace Experimental +} // namespace Kokkos + +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { +void hip_device_synchronize() { HIP_SAFE_CALL(hipDeviceSynchronize()); } + +void hip_internal_error_throw(hipError_t e, const char *name, const char *file, + const int line) { + std::ostringstream out; + out << name << " error( " << hipGetErrorName(e) + << "): " << hipGetErrorString(e); + if (file) { + out << " " << file << ":" << line; + } + throw_runtime_exception(out.str()); +} +} // namespace Impl +} // namespace Kokkos diff --git a/core/src/HIP/Kokkos_HIP_Instance.hpp b/core/src/HIP/Kokkos_HIP_Instance.hpp new file mode 100644 index 00000000000..c66fb2776f5 --- /dev/null +++ b/core/src/HIP/Kokkos_HIP_Instance.hpp @@ -0,0 +1,141 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +/*--------------------------------------------------------------------------*/ + +#ifndef KOKKOS_HIP_INSTANCE_HPP +#define KOKKOS_HIP_INSTANCE_HPP + +#include + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +struct HIPTraits { + static int constexpr WarpSize = 64; + static int constexpr WarpIndexShift = 6; /* WarpSize == 1 << WarpShift*/ + + static int constexpr ConstantMemoryUsage = 0x008000; /* 32k bytes */ + static int constexpr ConstantMemoryUseThreshold = 0x000200; /* 512 bytes */ +}; + +//---------------------------------------------------------------------------- + +HIP::size_type hip_internal_maximum_warp_count(); +HIP::size_type hip_internal_maximum_grid_count(); + +HIP::size_type *hip_internal_scratch_space(const HIP::size_type size); +HIP::size_type *hip_internal_scratch_flags(const HIP::size_type size); + +//---------------------------------------------------------------------------- + +class HIPInternal { + private: + HIPInternal(const HIPInternal &); + HIPInternal &operator=(const HIPInternal &); + + public: + using size_type = ::Kokkos::Experimental::HIP::size_type; + + int m_hipDev; + int m_hipArch; + unsigned m_multiProcCount; + unsigned m_maxWarpCount; + unsigned m_maxBlock; + unsigned m_maxSharedWords; + int m_shmemPerSM; + int m_maxShmemPerBlock; + int m_maxThreadsPerSM; + int m_maxThreadsPerBlock; + size_type m_scratchSpaceCount; + size_type m_scratchFlagsCount; + size_type *m_scratchSpace; + size_type *m_scratchFlags; + + hipStream_t m_stream; + + static int was_initialized; + static int was_finalized; + + static HIPInternal &singleton(); + + int verify_is_initialized(const char *const label) const; + + int is_initialized() const { + return m_hipDev >= 0; + } // 0 != m_scratchSpace && 0 != m_scratchFlags ; } + + void initialize(int hip_device_id); + void finalize(); + + void print_configuration(std::ostream &) const; + + ~HIPInternal(); + + HIPInternal() + : m_hipDev(-1), + m_hipArch(-1), + m_multiProcCount(0), + m_maxWarpCount(0), + m_maxBlock(0), + m_maxSharedWords(0), + m_shmemPerSM(0), + m_maxShmemPerBlock(0), + m_maxThreadsPerSM(0), + m_maxThreadsPerBlock(0), + m_scratchSpaceCount(0), + m_scratchFlagsCount(0), + m_scratchSpace(0), + m_scratchFlags(0) {} + + size_type *scratch_space(const size_type size); + size_type *scratch_flags(const size_type size); +}; + +} // namespace Impl +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/core/src/HIP/Kokkos_HIP_KernelLaunch.cpp b/core/src/HIP/Kokkos_HIP_KernelLaunch.cpp new file mode 100644 index 00000000000..7c8582ef0ea --- /dev/null +++ b/core/src/HIP/Kokkos_HIP_KernelLaunch.cpp @@ -0,0 +1,72 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include +#include + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +void *hip_resize_scratch_space(std::int64_t bytes, bool force_shrink) { + static void *ptr = nullptr; + static std::int64_t current_size = 0; + if (bytes > current_size) { + current_size = bytes; + if (ptr) Kokkos::kokkos_free<::Kokkos::Experimental::HIPSpace>(ptr); + ptr = Kokkos::kokkos_malloc( + "HIPSpace::ScratchMemory", current_size); + } + if ((bytes < current_size) && (force_shrink)) { + current_size = bytes; + Kokkos::kokkos_free<::Kokkos::Experimental::HIPSpace>(ptr); + ptr = Kokkos::kokkos_malloc( + "HIPSpace::ScratchMemory", current_size); + } + return ptr; +} +} // namespace Impl +} // namespace Experimental +} // namespace Kokkos diff --git a/core/src/HIP/Kokkos_HIP_KernelLaunch.hpp b/core/src/HIP/Kokkos_HIP_KernelLaunch.hpp new file mode 100644 index 00000000000..5c19a3e0da5 --- /dev/null +++ b/core/src/HIP/Kokkos_HIP_KernelLaunch.hpp @@ -0,0 +1,212 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_HIP_KERNEL_LAUNCH_HPP +#define KOKKOS_HIP_KERNEL_LAUNCH_HPP + +#include + +#if defined(__HIPCC__) + +#include +#include +#include + +// FIXME_HIP cannot use global variable on the device with ROCm 2.9 +//__device__ __constant__ unsigned long kokkos_impl_hip_constant_memory_buffer +// [Kokkos::Experimental::Impl::HIPTraits::ConstantMemoryUsage / +// sizeof(unsigned long)]; + +namespace Kokkos { +namespace Experimental { +template +inline __device__ T *kokkos_impl_hip_shared_memory() { + extern __shared__ HIPSpace::size_type sh[]; + return (T *)sh; +} +} // namespace Experimental +} // namespace Kokkos + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +void *hip_resize_scratch_space(std::int64_t bytes, bool force_shrink = false); + +template +__global__ static void hip_parallel_launch_constant_memory() { + __device__ __constant__ unsigned long kokkos_impl_hip_constant_memory_buffer + [Kokkos::Experimental::Impl::HIPTraits::ConstantMemoryUsage / + sizeof(unsigned long)]; + + const DriverType &driver = *(reinterpret_cast( + kokkos_impl_hip_constant_memory_buffer)); + + driver(); +} + +template +__global__ static void hip_parallel_launch_local_memory( + const DriverType driver) { + driver(); +} + +template +__global__ __launch_bounds__( + maxTperB, + minBperSM) static void hip_parallel_launch_local_memory(const DriverType + driver) { + driver(); +} + +enum class HIPLaunchMechanism : unsigned { + Default = 0, + ConstantMemory = 1, + GlobalMemory = 2, + LocalMemory = 4 +}; + +constexpr inline HIPLaunchMechanism operator|(HIPLaunchMechanism p1, + HIPLaunchMechanism p2) { + return static_cast(static_cast(p1) | + static_cast(p2)); +} +constexpr inline HIPLaunchMechanism operator&(HIPLaunchMechanism p1, + HIPLaunchMechanism p2) { + return static_cast(static_cast(p1) & + static_cast(p2)); +} + +template +struct HIPDispatchProperties { + HIPLaunchMechanism launch_mechanism = l; +}; + +template , + HIPLaunchMechanism LaunchMechanism = HIPLaunchMechanism::LocalMemory> +struct HIPParallelLaunch; + +template +struct HIPParallelLaunch< + DriverType, Kokkos::LaunchBounds, + HIPLaunchMechanism::LocalMemory> { + inline HIPParallelLaunch(const DriverType &driver, const dim3 &grid, + const dim3 &block, const int shmem, + const HIPInternal *hip_instance, + const bool /*prefer_shmem*/) { + if ((grid.x != 0) && ((block.x * block.y * block.z) != 0)) { + if (hip_instance->m_maxShmemPerBlock < shmem) { + Kokkos::Impl::throw_runtime_exception( + "HIPParallelLaunch FAILED: shared memory request is too large"); + } + + // Invoke the driver function on the device + printf("%i %i %i | %i %i %i | %i\n", grid.x, grid.y, grid.z, block.x, + block.y, block.z, shmem); + printf("Pre Launch Error: %s\n", hipGetErrorName(hipGetLastError())); + + hipLaunchKernelGGL( + (hip_parallel_launch_local_memory), + grid, block, shmem, hip_instance->m_stream, driver); + + Kokkos::Experimental::HIP().fence(); + printf("Post Launch Error: %s\n", hipGetErrorName(hipGetLastError())); +#if defined(KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK) + HIP_SAFE_CALL(hipGetLastError()); + Kokkos::Experimental::HIP().fence(); +#endif + } + } + + static hipFuncAttributes get_hip_func_attributes() { + hipFuncAttributes attr; + hipFuncGetAttributes( + &attr, hip_parallel_launch_local_memory); + return attr; + } +}; + +template +struct HIPParallelLaunch, + HIPLaunchMechanism::LocalMemory> { + inline HIPParallelLaunch(const DriverType &driver, const dim3 &grid, + const dim3 &block, const int shmem, + const HIPInternal *hip_instance, + const bool /*prefer_shmem*/) { + if ((grid.x != 0) && ((block.x * block.y * block.z) != 0)) { + if (hip_instance->m_maxShmemPerBlock < shmem) { + Kokkos::Impl::throw_runtime_exception(std::string( + "HIPParallelLaunch FAILED: shared memory request is too large")); + } + + // Invoke the driver function on the device + hipLaunchKernelGGL(hip_parallel_launch_local_memory, grid, + block, shmem, hip_instance->m_stream, driver); + + Kokkos::Experimental::HIP().fence(); +#if defined(KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK) + HIP_SAFE_CALL(hipGetLastError()); + Kokkos::Experimental::HIP().fence(); +#endif + } + } + + static hipFuncAttributes get_hip_func_attributes() { + hipFuncAttributes attr; + hipFuncGetAttributes(&attr, + reinterpret_cast( + &hip_parallel_launch_local_memory)); + return attr; + } +}; +} // namespace Impl +} // namespace Experimental +} // namespace Kokkos + +#endif + +#endif diff --git a/core/src/HIP/Kokkos_HIP_Locks.cpp b/core/src/HIP/Kokkos_HIP_Locks.cpp new file mode 100644 index 00000000000..0a34ed505b7 --- /dev/null +++ b/core/src/HIP/Kokkos_HIP_Locks.cpp @@ -0,0 +1,118 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include + +#include +#include +#include + +#include + +#include + +#ifdef KOKKOS_ENABLE_HIP_RELOCATABLE_DEVICE_CODE +__device__ __constant__ HIPLockArrays g_device_hip_lock_arrays = {nullptr, + nullptr, 0}; +#endif + +namespace Kokkos { + +namespace { + +__global__ void init_lock_array_kernel_atomic() { + unsigned i = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x; + if (i < KOKKOS_IMPL_HIP_SPACE_ATOMIC_MASK + 1) { + g_device_hip_lock_arrays.atomic[i] = 0; + } +} + +__global__ void init_lock_array_kernel_threadid(int N) { + unsigned i = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x; + if (i < static_cast(N)) { + g_device_hip_lock_arrays.scratch[i] = 0; + } +} + +} // namespace + +namespace Impl { + +HIPLockArrays g_host_hip_lock_arrays = {nullptr, nullptr, 0}; + +void initialize_host_hip_lock_arrays() { + if (g_host_hip_lock_arrays.atomic != nullptr) return; + HIP_SAFE_CALL(hipMalloc( + &g_host_hip_lock_arrays.atomic, + sizeof(std::int32_t) * (KOKKOS_IMPL_HIP_SPACE_ATOMIC_MASK + 1))); + HIP_SAFE_CALL(hipMalloc( + &g_host_hip_lock_arrays.scratch, + sizeof(std::int32_t) * (::Kokkos::Experimental::HIP::concurrency()))); + + g_host_hip_lock_arrays.n = ::Kokkos::Experimental::HIP::concurrency(); + + KOKKOS_COPY_HIP_LOCK_ARRAYS_TO_DEVICE(); + hipLaunchKernelGGL(init_lock_array_kernel_atomic, + (KOKKOS_IMPL_HIP_SPACE_ATOMIC_MASK + 1 + 255) / 256, 256, + 0, 0); + hipLaunchKernelGGL(init_lock_array_kernel_threadid, + (::Kokkos::Experimental::HIP::concurrency() + 255) / 256, + 256, 0, 0, ::Kokkos::Experimental::HIP::concurrency()); +} + +void finalize_host_hip_lock_arrays() { + if (g_host_hip_lock_arrays.atomic == nullptr) return; + hipFree(g_host_hip_lock_arrays.atomic); + g_host_hip_lock_arrays.atomic = nullptr; + hipFree(g_host_hip_lock_arrays.scratch); + g_host_hip_lock_arrays.scratch = nullptr; + g_host_hip_lock_arrays.n = 0; +#ifdef KOKKOS_ENABLE_HIP_RELOCATABLE_DEVICE_CODE + KOKKOS_COPY_HIP_LOCK_ARRAYS_TO_DEVICE(); +#endif +} + +} // namespace Impl + +} // namespace Kokkos diff --git a/core/src/HIP/Kokkos_HIP_Locks.hpp b/core/src/HIP/Kokkos_HIP_Locks.hpp new file mode 100644 index 00000000000..fb6728ea14a --- /dev/null +++ b/core/src/HIP/Kokkos_HIP_Locks.hpp @@ -0,0 +1,174 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_HIP_LOCKS_HPP +#define KOKKOS_HIP_LOCKS_HPP + +#include + +#include + +#include + +// FIXME_HIP We cannot use global variables defined in a namespace + +struct HIPLockArrays { + std::int32_t* atomic; + std::int32_t* scratch; + std::int32_t n; +}; + +/// \brief This global variable in Host space is the central definition +/// of these arrays. +extern HIPLockArrays g_host_hip_lock_arrays; + +namespace Kokkos { +namespace Impl { + +/// \brief After this call, the g_host_hip_lock_arrays variable has +/// valid, initialized arrays. +/// +/// This call is idempotent. +void initialize_host_hip_lock_arrays(); + +/// \brief After this call, the g_host_hip_lock_arrays variable has +/// all null pointers, and all array memory has been freed. +/// +/// This call is idempotent. +void finalize_host_hip_lock_arrays(); + +} // namespace Impl +} // namespace Kokkos + +#if defined(__HIPCC__) + +/// \brief This global variable in HIP space is what kernels use +/// to get access to the lock arrays. +/// +/// When relocatable device code is enabled, there can be one single +/// instance of this global variable for the entire executable, +/// whose definition will be in Kokkos_HIP_Locks.cpp (and whose declaration +/// here must then be extern). +/// This one instance will be initialized by initialize_host_HIP_lock_arrays +/// and need not be modified afterwards. +/// +/// When relocatable device code is disabled, an instance of this variable +/// will be created in every translation unit that sees this header file. +/// Since the Kokkos_HIP_Locks.cpp translation unit cannot initialize the +/// instances in other translation units, we must update this HIP global +/// variable based on the Host global variable prior to running any kernels +/// that will use it. +/// That is the purpose of the KOKKOS_ENSURE_HIP_LOCK_ARRAYS_ON_DEVICE macro. +__device__ +#ifdef KOKKOS_ENABLE_HIP_RELOCATABLE_DEVICE_CODE + __constant__ extern +#endif + HIPLockArrays g_device_hip_lock_arrays; + +#define KOKKOS_IMPL_HIP_SPACE_ATOMIC_MASK 0x1FFFF + +namespace Kokkos { +namespace Impl { + +/// \brief Acquire a lock for the address +/// +/// This function tries to acquire the lock for the hash value derived +/// from the provided ptr. If the lock is successfully acquired the +/// function returns true. Otherwise it returns false. +__device__ inline bool lock_address_hip_space(void* ptr) { + auto offset = reinterpret_cast(ptr); + offset = offset >> 2; + offset = offset & KOKKOS_IMPL_HIP_SPACE_ATOMIC_MASK; + return (0 == atomicCAS(&g_device_hip_lock_arrays.atomic[offset], 0, 1)); +} + +/// \brief Release lock for the address +/// +/// This function releases the lock for the hash value derived +/// from the provided ptr. This function should only be called +/// after previously successfully aquiring a lock with +/// lock_address. +__device__ inline void unlock_address_hip_space(void* ptr) { + auto offset = reinterpret_cast(ptr); + offset = offset >> 2; + offset = offset & KOKKOS_IMPL_HIP_SPACE_ATOMIC_MASK; + atomicExch(&g_device_hip_lock_arrays.atomic[offset], 0); +} + +} // namespace Impl +} // namespace Kokkos + +// Make lock_array_copied an explicit translation unit scope thingy +namespace Kokkos { +namespace Impl { +namespace { +static int lock_array_copied = 0; +inline int eliminate_warning_for_lock_array() { return lock_array_copied; } +} // namespace +} // namespace Impl +} // namespace Kokkos + +/* Dan Ibanez: it is critical that this code be a macro, so that it will + capture the right address for g_device_hip_lock_arrays! + putting this in an inline function will NOT do the right thing! */ +#define KOKKOS_COPY_HIP_LOCK_ARRAYS_TO_DEVICE() \ + { \ + if (::Kokkos::Impl::lock_array_copied == 0) { \ + HIP_SAFE_CALL(hipMemcpyToSymbol(HIP_SYMBOL(g_device_hip_lock_arrays), \ + &g_host_hip_lock_arrays, \ + sizeof(HIPLockArrays))); \ + } \ + lock_array_copied = 1; \ + } + +#ifdef KOKKOS_ENABLE_HIP_RELOCATABLE_DEVICE_CODE +#define KOKKOS_ENSURE_HIP_LOCK_ARRAYS_ON_DEVICE() +#else +#define KOKKOS_ENSURE_HIP_LOCK_ARRAYS_ON_DEVICE() \ + KOKKOS_COPY_HIP_LOCK_ARRAYS_TO_DEVICE() +#endif + +#endif /* defined( __HIPCC__ ) */ + +#endif /* #ifndef KOKKOS_HIP_LOCKS_HPP */ diff --git a/core/src/HIP/Kokkos_HIP_Parallel_MDRange.hpp b/core/src/HIP/Kokkos_HIP_Parallel_MDRange.hpp new file mode 100644 index 00000000000..7a6161346c7 --- /dev/null +++ b/core/src/HIP/Kokkos_HIP_Parallel_MDRange.hpp @@ -0,0 +1,411 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_HIP_PARALLEL_MDRANGE_HPP +#define KOKKOS_HIP_PARALLEL_MDRANGE_HPP + +#include +#include +#include +#include +#include + +namespace Kokkos { +namespace Impl { +// ParallelFor +template +class ParallelFor, + Kokkos::Experimental::HIP> { + public: + using Policy = Kokkos::MDRangePolicy; + + private: + using array_index_type = typename Policy::array_index_type; + using index_type = typename Policy::index_type; + using LaunchBounds = typename Policy::launch_bounds; + + const FunctorType m_functor; + const Policy m_policy; + + ParallelFor() = delete; + ParallelFor& operator=(ParallelFor const&) = delete; + + public: + inline __device__ void operator()(void) const { + Kokkos::Impl::DeviceIterateTile(m_policy, + m_functor) + .exec_range(); + } + + inline void execute() const { + if (m_policy.m_num_tiles == 0) return; + array_index_type const maxblocks = static_cast( + m_policy.space().impl_internal_space_instance()->m_maxBlock); + if (Policy::rank == 2) { + dim3 const block(m_policy.m_tile[0], m_policy.m_tile[1], 1); + dim3 const grid( + std::min((m_policy.m_upper[0] - m_policy.m_lower[0] + block.x - 1) / + block.x, + maxblocks), + std::min((m_policy.m_upper[1] - m_policy.m_lower[1] + block.y - 1) / + block.y, + maxblocks), + 1); + Kokkos::Experimental::Impl::HIPParallelLaunch( + *this, grid, block, 0, + m_policy.space().impl_internal_space_instance(), false); + } else if (Policy::rank == 3) { + dim3 const block(m_policy.m_tile[0], m_policy.m_tile[1], + m_policy.m_tile[2]); + dim3 const grid( + std::min((m_policy.m_upper[0] - m_policy.m_lower[0] + block.x - 1) / + block.x, + maxblocks), + std::min((m_policy.m_upper[1] - m_policy.m_lower[1] + block.y - 1) / + block.y, + maxblocks), + std::min((m_policy.m_upper[2] - m_policy.m_lower[2] + block.z - 1) / + block.z, + maxblocks)); + Kokkos::Experimental::Impl::HIPParallelLaunch( + *this, grid, block, 0, + m_policy.space().impl_internal_space_instance(), false); + } else if (Policy::rank == 4) { + // id0,id1 encoded within hipThreadIdx_x; id2 to hipThreadIdx_y; id3 to + // hipThreadIdx_z + dim3 const block(m_policy.m_tile[0] * m_policy.m_tile[1], + m_policy.m_tile[2], m_policy.m_tile[3]); + dim3 const grid( + std::min(static_cast(m_policy.m_tile_end[0] * + m_policy.m_tile_end[1]), + static_cast(maxblocks)), + std::min((m_policy.m_upper[2] - m_policy.m_lower[2] + block.y - 1) / + block.y, + maxblocks), + std::min((m_policy.m_upper[3] - m_policy.m_lower[3] + block.z - 1) / + block.z, + maxblocks)); + Kokkos::Experimental::Impl::HIPParallelLaunch( + *this, grid, block, 0, + m_policy.space().impl_internal_space_instance(), false); + } else if (Policy::rank == 5) { + // id0,id1 encoded within hipThreadIdx_x; id2,id3 to hipThreadIdx_y; id4 + // to hipThreadIdx_z + dim3 const block(m_policy.m_tile[0] * m_policy.m_tile[1], + m_policy.m_tile[2] * m_policy.m_tile[3], + m_policy.m_tile[4]); + dim3 const grid( + std::min(static_cast(m_policy.m_tile_end[0] * + m_policy.m_tile_end[1]), + static_cast(maxblocks)), + std::min(static_cast(m_policy.m_tile_end[2] * + m_policy.m_tile_end[3]), + static_cast(maxblocks)), + std::min((m_policy.m_upper[4] - m_policy.m_lower[4] + block.z - 1) / + block.z, + maxblocks)); + Kokkos::Experimental::Impl::HIPParallelLaunch( + *this, grid, block, 0, + m_policy.space().impl_internal_space_instance(), false); + } else if (Policy::rank == 6) { + // id0,id1 encoded within hipThreadIdx_x; id2,id3 to hipThreadIdx_y; + // id4,id5 to hipThreadIdx_z + dim3 const block(m_policy.m_tile[0] * m_policy.m_tile[1], + m_policy.m_tile[2] * m_policy.m_tile[3], + m_policy.m_tile[4] * m_policy.m_tile[5]); + dim3 const grid(std::min(static_cast(m_policy.m_tile_end[0] * + m_policy.m_tile_end[1]), + static_cast(maxblocks)), + std::min(static_cast(m_policy.m_tile_end[2] * + m_policy.m_tile_end[3]), + static_cast(maxblocks)), + std::min(static_cast(m_policy.m_tile_end[4] * + m_policy.m_tile_end[5]), + static_cast(maxblocks))); + Kokkos::Experimental::Impl::HIPParallelLaunch( + *this, grid, block, 0, + m_policy.space().impl_internal_space_instance(), false); + } else { + printf("Kokkos::MDRange Error: Exceeded rank bounds with HIP\n"); + Kokkos::abort("Aborting"); + } + + } // end execute + + ParallelFor(FunctorType const& arg_functor, Policy const& arg_policy) + : m_functor(arg_functor), m_policy(arg_policy) {} +}; + +// ParallelReduce +template +class ParallelReduce, ReducerType, + Kokkos::Experimental::HIP> { + public: + using Policy = Kokkos::MDRangePolicy; + + private: + using array_index_type = typename Policy::array_index_type; + using index_type = typename Policy::index_type; + + using WorkTag = typename Policy::work_tag; + using Member = typename Policy::member_type; + using LaunchBounds = typename Policy::launch_bounds; + + using ReducerConditional = + Kokkos::Impl::if_c::value, + FunctorType, ReducerType>; + using ReducerTypeFwd = typename ReducerConditional::type; + using WorkTagFwd = + typename Kokkos::Impl::if_c::value, + WorkTag, void>::type; + + using ValueTraits = + Kokkos::Impl::FunctorValueTraits; + using ValueInit = Kokkos::Impl::FunctorValueInit; + using ValueJoin = Kokkos::Impl::FunctorValueJoin; + + public: + using pointer_type = typename ValueTraits::pointer_type; + using value_type = typename ValueTraits::value_type; + using reference_type = typename ValueTraits::reference_type; + using functor_type = FunctorType; + using size_type = Experimental::HIP::size_type; + + // Algorithmic constraints: blockSize is a power of two AND hipBlockDim_y == + // hipBlockDim_z == 1 + + const FunctorType m_functor; + const Policy m_policy; // used for workrange and nwork + const ReducerType m_reducer; + const pointer_type m_result_ptr; + const bool m_result_ptr_device_accessible; + size_type* m_scratch_space; + size_type* m_scratch_flags; + + using DeviceIteratePattern = typename Kokkos::Impl::Reduce::DeviceIterateTile< + Policy::rank, Policy, FunctorType, WorkTag, reference_type>; + + // Shall we use the shfl based reduction or not (only use it for static sized + // types of more than 128bit + enum { + UseShflReduction = ((sizeof(value_type) > 2 * sizeof(double)) && + (ValueTraits::StaticValueSize != 0)) + }; + // Some crutch to do function overloading + private: + using DummyShflReductionType = double; + using DummySHMEMReductionType = int; + + public: + inline __device__ void exec_range(reference_type update) const { + DeviceIteratePattern(m_policy, m_functor, update).exec_range(); + } + + inline __device__ void operator()(void) const { + const integral_nonzero_constant + word_count(ValueTraits::value_size( + ReducerConditional::select(m_functor, m_reducer)) / + sizeof(size_type)); + + { + reference_type value = ValueInit::init( + ReducerConditional::select(m_functor, m_reducer), + Experimental::kokkos_impl_hip_shared_memory() + + hipThreadIdx_y * word_count.value); + + // Number of blocks is bounded so that the reduction can be limited to two + // passes. Each thread block is given an approximately equal amount of + // work to perform. Accumulate the values for this block. The accumulation + // ordering does not match the final pass, but is arithmatically + // equivalent. + + this->exec_range(value); + } + + // Reduce with final value at hipBlockDim_y - 1 location. + // Problem: non power-of-two blockDim + if (::Kokkos::Impl::hip_single_inter_block_reduce_scan< + false, ReducerTypeFwd, WorkTagFwd>( + ReducerConditional::select(m_functor, m_reducer), hipBlockIdx_x, + hipGridDim_x, + Experimental::kokkos_impl_hip_shared_memory(), + m_scratch_space, m_scratch_flags)) { + // This is the final block with the final result at the final threads' + // location + size_type* const shared = + Experimental::kokkos_impl_hip_shared_memory() + + (hipBlockDim_y - 1) * word_count.value; + size_type* const global = m_result_ptr_device_accessible + ? reinterpret_cast(m_result_ptr) + : m_scratch_space; + + if (hipThreadIdx_y == 0) { + Kokkos::Impl::FunctorFinal::final( + ReducerConditional::select(m_functor, m_reducer), shared); + } + + if (Experimental::Impl::HIPTraits::WarpSize < word_count.value) { + __syncthreads(); + } + + for (unsigned i = hipThreadIdx_y; i < word_count.value; + i += hipBlockDim_y) { + global[i] = shared[i]; + } + } + } + + // Determine block size constrained by shared memory: + // This is copy/paste from Kokkos_HIP_Parallel_Range + inline unsigned local_block_size(const FunctorType& f) { + unsigned n = Experimental::Impl::HIPTraits::WarpSize * 8; + int shmem_size = ::Kokkos::Impl::hip_single_inter_block_reduce_scan_shmem< + false, FunctorType, WorkTag>(f, n); + while ( + (n && + (m_policy.space().impl_internal_space_instance()->m_maxShmemPerBlock < + shmem_size)) || + (n > static_cast( + ::Kokkos::Experimental::Impl::hip_get_max_block_size< + ParallelReduce, LaunchBounds>(f, 1, shmem_size, 0)))) { + n >>= 1; + shmem_size = ::Kokkos::Impl::hip_single_inter_block_reduce_scan_shmem< + false, FunctorType, WorkTag>(f, n); + } + return n; + } + + inline void execute() { + const int nwork = m_policy.m_num_tiles; + if (nwork) { + int block_size = m_policy.m_prod_tile_dims; + // CONSTRAINT: Algorithm requires block_size >= product of tile dimensions + // Nearest power of two + int exponent_pow_two = std::ceil(std::log2(block_size)); + block_size = std::pow(2, exponent_pow_two); + int suggested_blocksize = local_block_size(m_functor); + + block_size = (block_size > suggested_blocksize) + ? block_size + : suggested_blocksize; // Note: block_size must be less + // than or equal to 512 + + m_scratch_space = + ::Kokkos::Experimental::Impl::hip_internal_scratch_space( + ValueTraits::value_size( + ReducerConditional::select(m_functor, m_reducer)) * + block_size /* block_size == max block_count */); + m_scratch_flags = + ::Kokkos::Experimental::Impl::hip_internal_scratch_flags( + sizeof(size_type)); + + // REQUIRED ( 1 , N , 1 ) + const dim3 block(1, block_size, 1); + // Required grid.x <= block.y + const dim3 grid(std::min(int(block.y), int(nwork)), 1, 1); + + const int shmem = + UseShflReduction + ? 0 + : ::Kokkos::Impl::hip_single_inter_block_reduce_scan_shmem< + false, FunctorType, WorkTag>(m_functor, block.y); + + Kokkos::Experimental::Impl::HIPParallelLaunch( + *this, grid, block, shmem, + m_policy.space().impl_internal_space_instance(), + false); // copy to device and execute + + if (!m_result_ptr_device_accessible) { + Experimental::HIP().fence(); + + if (m_result_ptr) { + const int size = ValueTraits::value_size( + ReducerConditional::select(m_functor, m_reducer)); + DeepCopy(m_result_ptr, + m_scratch_space, size); + } + } + } else { + if (m_result_ptr) { + ValueInit::init(ReducerConditional::select(m_functor, m_reducer), + m_result_ptr); + } + } + } + + template + ParallelReduce(const FunctorType& arg_functor, const Policy& arg_policy, + const ViewType& arg_result, + typename std::enable_if::value, + void*>::type = NULL) + : m_functor(arg_functor), + m_policy(arg_policy), + m_reducer(InvalidType()), + m_result_ptr(arg_result.data()), + m_result_ptr_device_accessible( + MemorySpaceAccess::accessible), + m_scratch_space(0), + m_scratch_flags(0) {} + + ParallelReduce(const FunctorType& arg_functor, const Policy& arg_policy, + const ReducerType& reducer) + : m_functor(arg_functor), + m_policy(arg_policy), + m_reducer(reducer), + m_result_ptr(reducer.view().data()), + m_result_ptr_device_accessible( + MemorySpaceAccess::accessible), + m_scratch_space(0), + m_scratch_flags(0) {} +}; +} // namespace Impl +} // namespace Kokkos + +#endif diff --git a/core/src/HIP/Kokkos_HIP_Parallel_Range.hpp b/core/src/HIP/Kokkos_HIP_Parallel_Range.hpp new file mode 100644 index 00000000000..a9c44606e4a --- /dev/null +++ b/core/src/HIP/Kokkos_HIP_Parallel_Range.hpp @@ -0,0 +1,655 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKO_HIP_PARALLEL_RANGE_HPP +#define KOKKO_HIP_PARALLEL_RANGE_HPP + +#include + +#if defined(__HIPCC__) + +#include +#include +#include + +namespace Kokkos { +namespace Impl { + +template +class ParallelFor, + Kokkos::Experimental::HIP> { + public: + using Policy = Kokkos::RangePolicy; + + private: + using Member = typename Policy::member_type; + using WorkTag = typename Policy::work_tag; + using LaunchBounds = typename Policy::launch_bounds; + + const FunctorType m_functor; + const Policy m_policy; + + ParallelFor() = delete; + ParallelFor& operator=(const ParallelFor&) = delete; + + template + inline __device__ + typename std::enable_if::value>::type + exec_range(const Member i) const { + m_functor(i); + } + + template + inline __device__ + typename std::enable_if::value>::type + exec_range(const Member i) const { + m_functor(TagType(), i); + } + + public: + using functor_type = FunctorType; + + inline __device__ void operator()(void) const { + const Member work_stride = hipBlockDim_y * hipGridDim_x; + const Member work_end = m_policy.end(); + + for (Member iwork = + m_policy.begin() + hipThreadIdx_y + hipBlockDim_y * hipBlockIdx_x; + iwork < work_end; + iwork = iwork < work_end - work_stride ? iwork + work_stride + : work_end) { + this->template exec_range(iwork); + } + } + + inline void execute() const { + const typename Policy::index_type nwork = m_policy.end() - m_policy.begin(); + + const int block_size = 256; // FIXME_HIP Choose block_size better + const dim3 block(1, block_size, 1); + const dim3 grid( + typename Policy::index_type((nwork + block.y - 1) / block.y), 1, 1); + + Kokkos::Experimental::Impl::HIPParallelLaunch( + *this, grid, block, 0, m_policy.space().impl_internal_space_instance(), + false); + } + + ParallelFor(const FunctorType& arg_functor, const Policy& arg_policy) + : m_functor(arg_functor), m_policy(arg_policy) {} +}; + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +template +class ParallelReduce, ReducerType, + Kokkos::Experimental::HIP> { + public: + using Policy = Kokkos::RangePolicy; + + private: + using WorkRange = typename Policy::WorkRange; + using WorkTag = typename Policy::work_tag; + using Member = typename Policy::member_type; + using LaunchBounds = typename Policy::launch_bounds; + + using ReducerConditional = + Kokkos::Impl::if_c::value, + FunctorType, ReducerType>; + using ReducerTypeFwd = typename ReducerConditional::type; + using WorkTagFwd = + typename Kokkos::Impl::if_c::value, + WorkTag, void>::type; + + using ValueTraits = + Kokkos::Impl::FunctorValueTraits; + using ValueInit = Kokkos::Impl::FunctorValueInit; + using ValueJoin = Kokkos::Impl::FunctorValueJoin; + + public: + using pointer_type = typename ValueTraits::pointer_type; + using value_type = typename ValueTraits::value_type; + using reference_type = typename ValueTraits::reference_type; + using functor_type = FunctorType; + using size_type = Kokkos::Experimental::HIP::size_type; + using index_type = typename Policy::index_type; + + // Algorithmic constraints: blockSize is a power of two AND hipBlockDim_y == + // hipBlockDim_z == 1 + + const FunctorType m_functor; + const Policy m_policy; + const ReducerType m_reducer; + const pointer_type m_result_ptr; + const bool m_result_ptr_device_accessible; + size_type* m_scratch_space = nullptr; + size_type* m_scratch_flags = nullptr; + + // Shall we use the shfl based reduction or not (only use it for static sized + // types of more than 128bit) + enum { + UseShflReduction = false + }; //((sizeof(value_type)>2*sizeof(double)) && ValueTraits::StaticValueSize) + //}; + // Some crutch to do function overloading + private: + using DummyShflReductionType = double; + using DummySHMEMReductionType = int; + + public: + // Make the exec_range calls call to Reduce::DeviceIterateTile + template + __device__ inline + typename std::enable_if::value>::type + exec_range(const Member& i, reference_type update) const { + m_functor(i, update); + } + + template + __device__ inline + typename std::enable_if::value>::type + exec_range(const Member& i, reference_type update) const { + m_functor(TagType(), i, update); + } + + __device__ inline void operator()() const { + const integral_nonzero_constant + word_count(ValueTraits::value_size( + ReducerConditional::select(m_functor, m_reducer)) / + sizeof(size_type)); + + { + reference_type value = ValueInit::init( + ReducerConditional::select(m_functor, m_reducer), + ::Kokkos::Experimental::kokkos_impl_hip_shared_memory() + + hipThreadIdx_y * word_count.value); + + // Number of blocks is bounded so that the reduction can be limited to two + // passes. Each thread block is given an approximately equal amount of + // work to perform. Accumulate the values for this block. The accumulation + // ordering does not match the final pass, but is arithmetically + // equivalent. + + const WorkRange range(m_policy, hipBlockIdx_x, hipGridDim_x); + + for (Member iwork = range.begin() + hipThreadIdx_y, + iwork_end = range.end(); + iwork < iwork_end; iwork += hipBlockDim_y) { + this->template exec_range(iwork, value); + } + } + + // Reduce with final value at hipBlockDim_y - 1 location. + if (hip_single_inter_block_reduce_scan( + ReducerConditional::select(m_functor, m_reducer), hipBlockIdx_x, + hipGridDim_x, + ::Kokkos::Experimental::kokkos_impl_hip_shared_memory(), + m_scratch_space, m_scratch_flags)) { + // This is the final block with the final result at the final threads' + // location + + size_type* const shared = + ::Kokkos::Experimental::kokkos_impl_hip_shared_memory() + + (hipBlockDim_y - 1) * word_count.value; + size_type* const global = m_result_ptr_device_accessible + ? reinterpret_cast(m_result_ptr) + : m_scratch_space; + + if (hipThreadIdx_y == 0) { + Kokkos::Impl::FunctorFinal::final( + ReducerConditional::select(m_functor, m_reducer), shared); + } + + if (::Kokkos::Experimental::Impl::HIPTraits::WarpSize < + word_count.value) { + __syncthreads(); + } + + for (unsigned i = hipThreadIdx_y; i < word_count.value; + i += hipBlockDim_y) { + global[i] = shared[i]; + } + } + } + + // Determine block size constrained by shared memory: + inline unsigned local_block_size(const FunctorType& f) { + // FIXME_HIP I don't know where 8 comes from + unsigned int n = ::Kokkos::Experimental::Impl::HIPTraits::WarpSize * 8; + int shmem_size = + hip_single_inter_block_reduce_scan_shmem( + f, n); + while ( + (n && + (m_policy.space().impl_internal_space_instance()->m_maxShmemPerBlock < + shmem_size)) || + (n > static_cast( + Kokkos::Experimental::Impl::hip_get_max_block_size< + ParallelReduce, LaunchBounds>(f, 1, shmem_size, 0)))) { + n >>= 1; + shmem_size = + hip_single_inter_block_reduce_scan_shmem( + f, n); + } + return n; + } + + inline void execute() { + const index_type nwork = m_policy.end() - m_policy.begin(); + if (nwork) { + const int block_size = local_block_size(m_functor); + + m_scratch_space = + ::Kokkos::Experimental::Impl::hip_internal_scratch_space( + ValueTraits::value_size( + ReducerConditional::select(m_functor, m_reducer)) * + block_size /* block_size == max block_count */); + m_scratch_flags = + ::Kokkos::Experimental::Impl::hip_internal_scratch_flags( + sizeof(size_type)); + + // REQUIRED ( 1 , N , 1 ) + const dim3 block(1, block_size, 1); + // Required grid.x <= block.y + const dim3 grid( + std::min(int(block.y), int((nwork + block.y - 1) / block.y)), 1, 1); + + const int shmem = + UseShflReduction + ? 0 + : hip_single_inter_block_reduce_scan_shmem(m_functor, + block.y); + + Kokkos::Experimental::Impl::HIPParallelLaunch( + *this, grid, block, shmem, + m_policy.space().impl_internal_space_instance(), + false); // copy to device and execute + + if (!m_result_ptr_device_accessible) { + ::Kokkos::Experimental::HIP().fence(); + + if (m_result_ptr) { + const int size = ValueTraits::value_size( + ReducerConditional::select(m_functor, m_reducer)); + DeepCopy( + m_result_ptr, m_scratch_space, size); + } + } + } else { + if (m_result_ptr) { + ValueInit::init(ReducerConditional::select(m_functor, m_reducer), + m_result_ptr); + } + } + } + + template + ParallelReduce(const FunctorType& arg_functor, const Policy& arg_policy, + const ViewType& arg_result, + typename std::enable_if::value, + void*>::type = NULL) + : m_functor(arg_functor), + m_policy(arg_policy), + m_reducer(InvalidType()), + m_result_ptr(arg_result.data()), + m_result_ptr_device_accessible( + MemorySpaceAccess::accessible) {} + + ParallelReduce(const FunctorType& arg_functor, const Policy& arg_policy, + const ReducerType& reducer) + : m_functor(arg_functor), + m_policy(arg_policy), + m_reducer(reducer), + m_result_ptr(reducer.view().data()), + m_result_ptr_device_accessible( + MemorySpaceAccess::accessible) {} +}; + +template +class ParallelScanHIPBase { + public: + using Policy = Kokkos::RangePolicy; + + protected: + using Member = typename Policy::member_type; + using WorkTag = typename Policy::work_tag; + using WorkRange = typename Policy::WorkRange; + using LaunchBounds = typename Policy::launch_bounds; + + using ValueTraits = Kokkos::Impl::FunctorValueTraits; + using ValueInit = Kokkos::Impl::FunctorValueInit; + using ValueOps = Kokkos::Impl::FunctorValueOps; + + public: + using pointer_type = typename ValueTraits::pointer_type; + using reference_type = typename ValueTraits::reference_type; + using functor_type = FunctorType; + using size_type = Kokkos::Experimental::HIP::size_type; + using index_type = typename Policy::index_type; + + protected: + // Algorithmic constraints: + // (a) hipBlockDim_y is a power of two + // (b) hipBlockDim_x == hipBlockDim_z == 1 + // (c) hipGridDim_x <= hipBlockDim_y * hipBlockDim_y + // (d) hipGridDim_y == hipGridDim_z == 1 + + const FunctorType m_functor; + const Policy m_policy; + size_type* m_scratch_space = nullptr; + size_type* m_scratch_flags = nullptr; + size_type m_final = false; + int m_grid_x = 0; + + private: + template + __device__ inline + typename std::enable_if::value>::type + exec_range(const Member& i, reference_type update, + const bool final_result) const { + m_functor(i, update, final_result); + } + + template + __device__ inline + typename std::enable_if::value>::type + exec_range(const Member& i, reference_type update, + const bool final_result) const { + m_functor(TagType(), i, update, final_result); + } + + //---------------------------------------- + + __device__ inline void initial(void) const { + const integral_nonzero_constant + word_count(ValueTraits::value_size(m_functor) / sizeof(size_type)); + + size_type* const shared_value = + Kokkos::Experimental::kokkos_impl_hip_shared_memory() + + word_count.value * hipThreadIdx_y; + + ValueInit::init(m_functor, shared_value); + + // Number of blocks is bounded so that the reduction can be limited to two + // passes. Each thread block is given an approximately equal amount of work + // to perform. Accumulate the values for this block. The accumulation + // ordering does not match the final pass, but is arithmetically equivalent. + + const WorkRange range(m_policy, hipBlockIdx_x, hipGridDim_x); + + for (Member iwork = range.begin() + hipThreadIdx_y, iwork_end = range.end(); + iwork < iwork_end; iwork += hipBlockDim_y) { + this->template exec_range( + iwork, ValueOps::reference(shared_value), false); + } + + // Reduce and scan, writing out scan of blocks' totals and block-groups' + // totals. Blocks' scan values are written to 'hipBlockIdx_x' location. + // Block-groups' scan values are at: i = ( j * hipBlockDim_y - 1 ) for i < + // hipGridDim_x + hip_single_inter_block_reduce_scan( + m_functor, hipBlockIdx_x, hipGridDim_x, + Kokkos::Experimental::kokkos_impl_hip_shared_memory(), + m_scratch_space, m_scratch_flags); + } + + //---------------------------------------- + + __device__ inline void final(void) const { + const integral_nonzero_constant + word_count(ValueTraits::value_size(m_functor) / sizeof(size_type)); + + // Use shared memory as an exclusive scan: { 0 , value[0] , value[1] , + // value[2] , ... } + size_type* const shared_data = + Kokkos::Experimental::kokkos_impl_hip_shared_memory(); + size_type* const shared_prefix = + shared_data + word_count.value * hipThreadIdx_y; + size_type* const shared_accum = + shared_data + word_count.value * (hipBlockDim_y + 1); + + // Starting value for this thread block is the previous block's total. + if (hipBlockIdx_x) { + size_type* const block_total = + m_scratch_space + word_count.value * (hipBlockIdx_x - 1); + for (unsigned i = hipThreadIdx_y; i < word_count.value; ++i) { + shared_accum[i] = block_total[i]; + } + } else if (0 == hipThreadIdx_y) { + ValueInit::init(m_functor, shared_accum); + } + + const WorkRange range(m_policy, hipBlockIdx_x, hipGridDim_x); + + for (typename Policy::member_type iwork_base = range.begin(); + iwork_base < range.end(); iwork_base += hipBlockDim_y) { + const typename Policy::member_type iwork = iwork_base + hipThreadIdx_y; + + __syncthreads(); // Don't overwrite previous iteration values until they + // are used + + ValueInit::init(m_functor, shared_prefix + word_count.value); + + // Copy previous block's accumulation total into thread[0] prefix and + // inclusive scan value of this block + for (unsigned i = hipThreadIdx_y; i < word_count.value; ++i) { + shared_data[i + word_count.value] = shared_data[i] = shared_accum[i]; + } + + // Make sure the write is seen by all threads + __threadfence_block(); + + // Call functor to accumulate inclusive scan value for this work item + const bool doWork = (iwork < range.end()); + if (doWork) { + this->template exec_range( + iwork, ValueOps::reference(shared_prefix + word_count.value), + false); + } + + // Scan block values into locations shared_data[1..hipBlockDim_y] + hip_intra_block_reduce_scan( + m_functor, + typename ValueTraits::pointer_type(shared_data + word_count.value)); + + { + size_type* const block_total = + shared_data + word_count.value * hipBlockDim_y; + for (unsigned i = hipThreadIdx_y; i < word_count.value; ++i) { + shared_accum[i] = block_total[i]; + } + } + + // Call functor with exclusive scan value + if (doWork) { + this->template exec_range( + iwork, ValueOps::reference(shared_prefix), true); + } + } + } + + public: + //---------------------------------------- + + __device__ inline void operator()(void) const { + if (!m_final) { + initial(); + } else { + final(); + } + } + + // Determine block size constrained by shared memory: + inline unsigned local_block_size(const FunctorType& f) { + // hipBlockDim_y must be power of two = 128 (2 warps) or 256 (4 warps) or + // 512 (8 warps) hipGridDim_x <= hipBlockDim_y * hipBlockDim_y + // + // TODO check best option + + unsigned n = Experimental::Impl::HIPTraits::WarpSize * 4; + while (n && static_cast(m_policy.space() + .impl_internal_space_instance() + ->m_maxShmemPerBlock) < + hip_single_inter_block_reduce_scan_shmem(f, n)) { + n >>= 1; + } + return n; + } + + inline void impl_execute() { + const index_type nwork = m_policy.end() - m_policy.begin(); + if (nwork) { + // FIXME_HIP we cannot choose it larger for large work sizes to work + // correctly, the unit tests fail with wrong results + const int gridMaxComputeCapability_2x = 0x01fff; + + // FIXME_HIP block sizes greater than 256 don't work correctly, + // the unit tests fail with wrong results + const int block_size = + std::min(static_cast(local_block_size(m_functor)), 256); + + const int grid_max = + std::min(block_size * block_size, gridMaxComputeCapability_2x); + + // At most 'max_grid' blocks: + const int max_grid = + std::min(grid_max, (nwork + block_size - 1) / block_size); + + // How much work per block: + const int work_per_block = (nwork + max_grid - 1) / max_grid; + + // How many block are really needed for this much work: + m_grid_x = (nwork + work_per_block - 1) / work_per_block; + + m_scratch_space = Kokkos::Experimental::Impl::hip_internal_scratch_space( + ValueTraits::value_size(m_functor) * m_grid_x); + m_scratch_flags = Kokkos::Experimental::Impl::hip_internal_scratch_flags( + sizeof(size_type) * 1); + + dim3 grid(m_grid_x, 1, 1); + dim3 block(1, block_size, 1); // REQUIRED DIMENSIONS ( 1 , N , 1 ) + const int shmem = ValueTraits::value_size(m_functor) * (block_size + 2); + + m_final = false; + Kokkos::Experimental::Impl::HIPParallelLaunch( + *this, grid, block, shmem, + m_policy.space().impl_internal_space_instance(), + false); // copy to device and execute + + m_final = true; + Kokkos::Experimental::Impl::HIPParallelLaunch( + *this, grid, block, shmem, + m_policy.space().impl_internal_space_instance(), + false); // copy to device and execute + } + } + + ParallelScanHIPBase(const FunctorType& arg_functor, const Policy& arg_policy) + : m_functor(arg_functor), m_policy(arg_policy) {} +}; + +template +class ParallelScan, + Kokkos::Experimental::HIP> + : private ParallelScanHIPBase { + public: + using Base = ParallelScanHIPBase; + using Base::operator(); + + inline void execute() { Base::impl_execute(); } + + ParallelScan(const FunctorType& arg_functor, + const typename Base::Policy& arg_policy) + : Base(arg_functor, arg_policy) {} +}; + +//---------------------------------------------------------------------------- + +template +class ParallelScanWithTotal, + ReturnType, Kokkos::Experimental::HIP> + : private ParallelScanHIPBase { + public: + using Base = ParallelScanHIPBase; + using Base::operator(); + + ReturnType& m_returnvalue; + + inline void execute() { + Base::impl_execute(); + + const auto nwork = Base::m_policy.end() - Base::m_policy.begin(); + if (nwork) { + const int size = Base::ValueTraits::value_size(Base::m_functor); + DeepCopy( + &m_returnvalue, + Base::m_scratch_space + (Base::m_grid_x - 1) * size / sizeof(int), + size); + } + } + + ParallelScanWithTotal(const FunctorType& arg_functor, + const typename Base::Policy& arg_policy, + ReturnType& arg_returnvalue) + : Base(arg_functor, arg_policy), m_returnvalue(arg_returnvalue) {} +}; + +} // namespace Impl +} // namespace Kokkos + +#endif + +#endif diff --git a/core/src/HIP/Kokkos_HIP_Parallel_Team.hpp b/core/src/HIP/Kokkos_HIP_Parallel_Team.hpp new file mode 100644 index 00000000000..53097f36431 --- /dev/null +++ b/core/src/HIP/Kokkos_HIP_Parallel_Team.hpp @@ -0,0 +1,562 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKO_HIP_PARALLEL_TEAM_HPP +#define KOKKO_HIP_PARALLEL_TEAM_HPP + +#include + +#if defined(__HIPCC__) + +#include +#include +#include +#include + +namespace Kokkos { +namespace Impl { +template +class TeamPolicyInternal + : public PolicyTraits { + public: + using execution_policy = TeamPolicyInternal; + + using traits = PolicyTraits; + + template + friend class TeamPolicyInternal; + + private: + static int constexpr MAX_WARP = 8; + + typename traits::execution_space m_space; + int m_league_size; + int m_team_size; + int m_vector_length; + int m_team_scratch_size[2]; + int m_thread_scratch_size[2]; + int m_chunk_size; + + public: + using execution_space = Kokkos::Experimental::HIP; + + template + TeamPolicyInternal(TeamPolicyInternal const& p) { + m_league_size = p.m_league_size; + m_team_size = p.m_team_size; + m_vector_length = p.m_vector_length; + m_team_scratch_size[0] = p.m_team_scratch_size[0]; + m_team_scratch_size[1] = p.m_team_scratch_size[1]; + m_thread_scratch_size[0] = p.m_thread_scratch_size[0]; + m_thread_scratch_size[1] = p.m_thread_scratch_size[1]; + m_chunk_size = p.m_chunk_size; + m_space = p.m_space; + } + + TeamPolicyInternal& operator=(TeamPolicyInternal const& p) { + m_league_size = p.m_league_size; + m_team_size = p.m_team_size; + m_vector_length = p.m_vector_length; + m_team_scratch_size[0] = p.m_team_scratch_size[0]; + m_team_scratch_size[1] = p.m_team_scratch_size[1]; + m_thread_scratch_size[0] = p.m_thread_scratch_size[0]; + m_thread_scratch_size[1] = p.m_thread_scratch_size[1]; + m_chunk_size = p.m_chunk_size; + m_space = p.m_space; + return *this; + } + + template + int team_size_max(FunctorType const& f, ParallelForTag const&) const { + using closure_type = + Impl::ParallelFor >; + hipFuncAttributes attr = ::Kokkos::Experimental::Impl::HIPParallelLaunch< + closure_type, + typename traits::launch_bounds>::get_hip_func_attributes(); + int const block_size = ::Kokkos::Experimental::Impl::hip_get_max_block_size< + FunctorType, typename traits::launch_bounds>( + space().impl_internal_space_instance(), attr, f, + static_cast(vector_length()), + static_cast(team_scratch_size(0)) + 2 * sizeof(double), + static_cast(thread_scratch_size(0)) + sizeof(double)); + return block_size / vector_length(); + } + + template + int team_size_recommended(FunctorType const& f, ParallelForTag const&) const { + typedef Impl::ParallelFor > + closure_type; + hipFuncAttributes attr = ::Kokkos::Experimental::Impl::HIPParallelLaunch< + closure_type, + typename traits::launch_bounds>::get_hip_func_attributes(); + int const block_size = ::Kokkos::Experimental::Impl::hip_get_opt_block_size< + FunctorType, typename traits::launch_bounds>( + space().impl_internal_space_instance(), attr, f, + static_cast(vector_length()), + static_cast(team_scratch_size(0)) + 2 * sizeof(double), + static_cast(thread_scratch_size(0)) + sizeof(double)); + return block_size / vector_length(); + } + + static int vector_length_max() { + return ::Kokkos::Experimental::Impl::HIPTraits::WarpSize; + } + + static int verify_requested_vector_length(int requested_vector_length) { + int test_vector_length = + std::min(requested_vector_length, vector_length_max()); + + // Allow only power-of-two vector_length + if (!(is_integral_power_of_two(test_vector_length))) { + int test_pow2 = 1; + for (int i = 0; i < 5; i++) { + test_pow2 = test_pow2 << 1; + if (test_pow2 > test_vector_length) { + break; + } + } + test_vector_length = test_pow2 >> 1; + } + + return test_vector_length; + } + + static int scratch_size_max(int level) { + return ( + level == 0 ? 1024 * 40 : // FIXME_HIP arbitrarily setting this to 48kB + 20 * 1024 * 1024); // FIXME_HIP arbitrarily setting this to 20MB + } + + int vector_length() const { return m_vector_length; } + + int team_size() const { return m_team_size; } + + int league_size() const { return m_league_size; } + + int scratch_size(int level, int team_size_ = -1) const { + if (team_size_ < 0) team_size_ = m_team_size; + return m_team_scratch_size[level] + + team_size_ * m_thread_scratch_size[level]; + } + + int team_scratch_size(int level) const { return m_team_scratch_size[level]; } + + int thread_scratch_size(int level) const { + return m_thread_scratch_size[level]; + } + + typename traits::execution_space space() const { return m_space; } + + TeamPolicyInternal() + : m_space(typename traits::execution_space()), + m_league_size(0), + m_team_size(-1), + m_vector_length(0), + m_team_scratch_size{0, 0}, + m_thread_scratch_size{0, 0}, + m_chunk_size(::Kokkos::Experimental::Impl::HIPTraits::WarpSize) {} + + /** \brief Specify league size, request team size */ + TeamPolicyInternal(const execution_space space_, int league_size_, + int team_size_request, int vector_length_request = 1) + : m_space(space_), + m_league_size(league_size_), + m_team_size(team_size_request), + m_vector_length(verify_requested_vector_length(vector_length_request)), + m_team_scratch_size{0, 0}, + m_thread_scratch_size{0, 0}, + m_chunk_size(::Kokkos::Experimental::Impl::HIPTraits::WarpSize) { + // Make sure league size is permissable + if (league_size_ >= + static_cast( + ::Kokkos::Experimental::Impl::hip_internal_maximum_grid_count())) + Impl::throw_runtime_exception( + "Requested too large league_size for TeamPolicy on HIP execution " + "space."); + + // Make sure total block size is permissable + if (m_team_size * m_vector_length > 1024) { + Impl::throw_runtime_exception( + std::string("Kokkos::TeamPolicy< HIP > the team size is too large. " + "Team size x vector length must be smaller than 1024.")); + } + } + + /** \brief Specify league size, request team size */ + TeamPolicyInternal(const execution_space space_, int league_size_, + const Kokkos::AUTO_t& /* team_size_request */, + int vector_length_request = 1) + : m_space(space_), + m_league_size(league_size_), + m_team_size(-1), + m_vector_length(verify_requested_vector_length(vector_length_request)), + m_team_scratch_size{0, 0}, + m_thread_scratch_size{0, 0}, + m_chunk_size(::Kokkos::Experimental::Impl::HIPTraits::WarpSize) { + // Make sure league size is permissable + if (league_size_ >= + static_cast( + ::Kokkos::Experimental::Impl::hip_internal_maximum_grid_count())) + Impl::throw_runtime_exception( + "Requested too large league_size for TeamPolicy on HIP execution " + "space."); + } + + TeamPolicyInternal(int league_size_, int team_size_request, + int vector_length_request = 1) + : m_space(typename traits::execution_space()), + m_league_size(league_size_), + m_team_size(team_size_request), + m_vector_length(verify_requested_vector_length(vector_length_request)), + m_team_scratch_size{0, 0}, + m_thread_scratch_size{0, 0}, + m_chunk_size(::Kokkos::Experimental::Impl::HIPTraits::WarpSize) { + // Make sure league size is permissable + if (league_size_ >= + static_cast( + ::Kokkos::Experimental::Impl::hip_internal_maximum_grid_count())) + Impl::throw_runtime_exception( + "Requested too large league_size for TeamPolicy on HIP execution " + "space."); + + // Make sure total block size is permissable + if (m_team_size * m_vector_length > 1024) { + Impl::throw_runtime_exception( + std::string("Kokkos::TeamPolicy< HIP > the team size is too large. " + "Team size x vector length must be smaller than 1024.")); + } + } + + TeamPolicyInternal(int league_size_, + const Kokkos::AUTO_t& /* team_size_request */, + int vector_length_request = 1) + : m_space(typename traits::execution_space()), + m_league_size(league_size_), + m_team_size(-1), + m_vector_length(verify_requested_vector_length(vector_length_request)), + m_team_scratch_size{0, 0}, + m_thread_scratch_size{0, 0}, + m_chunk_size(::Kokkos::Experimental::Impl::HIPTraits::WarpSize) { + // Make sure league size is permissable + if (league_size_ >= + static_cast( + ::Kokkos::Experimental::Impl::hip_internal_maximum_grid_count())) + Impl::throw_runtime_exception( + "Requested too large league_size for TeamPolicy on HIP execution " + "space."); + } + + int chunk_size() const { return m_chunk_size; } + + TeamPolicyInternal& set_chunk_size(typename traits::index_type chunk_size_) { + m_chunk_size = chunk_size_; + return *this; + } + + /** \brief set per team scratch size for a specific level of the scratch + * hierarchy */ + TeamPolicyInternal& set_scratch_size(int level, + PerTeamValue const& per_team) { + m_team_scratch_size[level] = per_team.value; + return *this; + } + + /** \brief set per thread scratch size for a specific level of the scratch + * hierarchy */ + TeamPolicyInternal& set_scratch_size(int level, + PerThreadValue const& per_thread) { + m_thread_scratch_size[level] = per_thread.value; + return *this; + } + + /** \brief set per thread and per team scratch size for a specific level of + * the scratch hierarchy */ + TeamPolicyInternal& set_scratch_size(int level, PerTeamValue const& per_team, + PerThreadValue const& per_thread) { + m_team_scratch_size[level] = per_team.value; + m_thread_scratch_size[level] = per_thread.value; + return *this; + } + + using member_type = Kokkos::Impl::HIPTeamMember; + + protected: + template + int internal_team_size_common(const FunctorType& f, + BlockSizeCallable&& block_size_callable) const { + using closure_type = ClosureType; + using functor_value_traits = + Impl::FunctorValueTraits; + + hipFuncAttributes attr = ::Kokkos::Experimental::Impl::HIPParallelLaunch< + closure_type, + typename traits::launch_bounds>::get_hip_func_attributes(); + const int block_size = std::forward(block_size_callable)( + space().impl_internal_space_instance(), attr, f, + static_cast(vector_length()), + static_cast(team_scratch_size(0)) + 2 * sizeof(double), + static_cast(thread_scratch_size(0)) + sizeof(double) + + ((functor_value_traits::StaticValueSize != 0) + ? 0 + : functor_value_traits::value_size(f))); + KOKKOS_ASSERT(block_size > 0); + + // Currently we require Power-of-2 team size for reductions. + int p2 = 1; + while (p2 <= block_size) p2 *= 2; + p2 /= 2; + return p2 / vector_length(); + } + + template + int internal_team_size_max(const FunctorType& f) const { + return internal_team_size_common( + f, ::Kokkos::Experimental::Impl::hip_get_max_block_size< + FunctorType, typename traits::launch_bounds>); + } + + template + int internal_team_size_recommended(const FunctorType& f) const { + return internal_team_size_common( + f, ::Kokkos::Experimental::Impl::hip_get_opt_block_size< + FunctorType, typename traits::launch_bounds>); + } +}; + +struct HIPLockArrays { + std::int32_t* atomic = nullptr; + std::int32_t* scratch = nullptr; + std::int32_t n = 0; +}; + +template +class ParallelFor, + Kokkos::Experimental::HIP> { + public: + using Policy = TeamPolicyInternal; + using functor_type = FunctorType; + using size_type = ::Kokkos::Experimental::HIP::size_type; + + private: + using Member = typename Policy::member_type; + using WorkTag = typename Policy::work_tag; + using LaunchBounds = typename Policy::launch_bounds; + + // Algorithmic constraints: hipBlockDim_y is a power of two AND hipBlockDim_y + // == hipBlockDim_z == 1 shared memory utilization: + // + // [ team reduce space ] + // [ team shared space ] + + FunctorType const m_functor; + Policy const m_policy; + size_type const m_league_size; + int m_team_size; + size_type const m_vector_size; + int m_shmem_begin; + int m_shmem_size; + void* m_scratch_ptr[2]; + int m_scratch_size[2]; + mutable HIPLockArrays hip_lock_arrays; + + template + __device__ inline + typename std::enable_if::value>::type + exec_team(const Member& member) const { + m_functor(member); + } + + template + __device__ inline + typename std::enable_if::value>::type + exec_team(const Member& member) const { + m_functor(TagType(), member); + } + + public: + __device__ inline void operator()(void) const { + // Iterate this block through the league + int64_t threadid = 0; + if (m_scratch_size[1] > 0) { + __shared__ int64_t base_thread_id; + if (hipThreadIdx_x == 0 && hipThreadIdx_y == 0) { + threadid = (hipBlockIdx_x * hipBlockDim_z + hipThreadIdx_z) % + (hip_lock_arrays.n / (hipBlockDim_x * hipBlockDim_y)); + threadid *= hipBlockDim_x * hipBlockDim_y; + int done = 0; + while (!done) { + done = (0 == atomicCAS(&hip_lock_arrays.scratch[threadid], 0, 1)); + if (!done) { + threadid += hipBlockDim_x * hipBlockDim_y; + if (int64_t(threadid + hipBlockDim_x * hipBlockDim_y) >= + int64_t(hip_lock_arrays.n)) + threadid = 0; + } + } + base_thread_id = threadid; + } + __syncthreads(); + threadid = base_thread_id; + } + + int const int_league_size = static_cast(m_league_size); + for (int league_rank = hipBlockIdx_x; league_rank < int_league_size; + league_rank += hipGridDim_x) { + this->template exec_team(typename Policy::member_type( + ::Kokkos::Experimental::kokkos_impl_hip_shared_memory(), + m_shmem_begin, m_shmem_size, + static_cast( + static_cast(m_scratch_ptr[1]) + + ptrdiff_t(threadid / (hipBlockDim_x * hipBlockDim_y)) * + m_scratch_size[1]), + m_scratch_size[1], league_rank, m_league_size)); + } + if (m_scratch_size[1] > 0) { + __syncthreads(); + if (hipThreadIdx_x == 0 && hipThreadIdx_y == 0) + hip_lock_arrays.scratch[threadid] = 0; + } + } + + inline void execute() const { + HIP_SAFE_CALL(hipMalloc( + &hip_lock_arrays.atomic, + sizeof(std::int32_t) * (KOKKOS_IMPL_HIP_SPACE_ATOMIC_MASK + 1))); + HIP_SAFE_CALL(hipMalloc( + &hip_lock_arrays.scratch, + sizeof(std::int32_t) * (::Kokkos::Experimental::HIP::concurrency()))); + HIP_SAFE_CALL(hipMemset( + hip_lock_arrays.scratch, 0, + sizeof(std::int32_t) * (::Kokkos::Experimental::HIP::concurrency()))); + hip_lock_arrays.n = ::Kokkos::Experimental::HIP::concurrency(); + + int64_t const shmem_size_total = m_shmem_begin + m_shmem_size; + dim3 const grid(static_cast(m_league_size), 1, 1); + dim3 const block(static_cast(m_vector_size), + static_cast(m_team_size), 1); + + ::Kokkos::Experimental::Impl::HIPParallelLaunch( + *this, grid, block, shmem_size_total, + m_policy.space().impl_internal_space_instance(), + true); // copy to device and execute + + if (hip_lock_arrays.atomic) { + HIP_SAFE_CALL(hipFree(hip_lock_arrays.atomic)); + hip_lock_arrays.atomic = nullptr; + } + if (hip_lock_arrays.scratch) { + HIP_SAFE_CALL(hipFree(hip_lock_arrays.scratch)); + hip_lock_arrays.scratch = nullptr; + } + hip_lock_arrays.n = 0; + } + + ParallelFor(FunctorType const& arg_functor, Policy const& arg_policy) + : m_functor(arg_functor), + m_policy(arg_policy), + m_league_size(arg_policy.league_size()), + m_team_size(arg_policy.team_size()), + m_vector_size(arg_policy.vector_length()) { + hipFuncAttributes attr = ::Kokkos::Experimental::Impl::HIPParallelLaunch< + ParallelFor, LaunchBounds>::get_hip_func_attributes(); + m_team_size = + m_team_size >= 0 + ? m_team_size + : ::Kokkos::Experimental::Impl::hip_get_opt_block_size< + FunctorType, LaunchBounds>( + m_policy.space().impl_internal_space_instance(), attr, + m_functor, m_vector_size, m_policy.team_scratch_size(0), + m_policy.thread_scratch_size(0)) / + m_vector_size; + + m_shmem_begin = (sizeof(double) * (m_team_size + 2)); + m_shmem_size = + (m_policy.scratch_size(0, m_team_size) + + FunctorTeamShmemSize::value(m_functor, m_team_size)); + m_scratch_size[0] = m_policy.scratch_size(0, m_team_size); + m_scratch_size[1] = m_policy.scratch_size(1, m_team_size); + + // Functor's reduce memory, team scan memory, and team shared memory depend + // upon team size. + m_scratch_ptr[0] = nullptr; + m_scratch_ptr[1] = + m_team_size <= 0 + ? nullptr + : ::Kokkos::Experimental::Impl::hip_resize_scratch_space( + static_cast(m_scratch_size[1]) * + static_cast( + ::Kokkos::Experimental::HIP::concurrency() / + (m_team_size * m_vector_size))); + + int const shmem_size_total = m_shmem_begin + m_shmem_size; + if (m_policy.space().impl_internal_space_instance()->m_maxShmemPerBlock < + shmem_size_total) { + printf( + "%i %i\n", + m_policy.space().impl_internal_space_instance()->m_maxShmemPerBlock, + shmem_size_total); + Kokkos::Impl::throw_runtime_exception(std::string( + "Kokkos::Impl::ParallelFor< HIP > insufficient shared memory")); + } + + if (static_cast(m_team_size) > + static_cast( + ::Kokkos::Experimental::Impl::hip_get_max_block_size( + m_policy.space().impl_internal_space_instance(), attr, + arg_functor, arg_policy.vector_length(), + arg_policy.team_scratch_size(0), + arg_policy.thread_scratch_size(0)) / + arg_policy.vector_length())) { + Kokkos::Impl::throw_runtime_exception(std::string( + "Kokkos::Impl::ParallelFor< HIP > requested too large team size.")); + } + } +}; +} // namespace Impl +} // namespace Kokkos + +#endif + +#endif diff --git a/core/src/HIP/Kokkos_HIP_ReduceScan.hpp b/core/src/HIP/Kokkos_HIP_ReduceScan.hpp new file mode 100644 index 00000000000..362128c4119 --- /dev/null +++ b/core/src/HIP/Kokkos_HIP_ReduceScan.hpp @@ -0,0 +1,420 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_HIP_REDUCESCAN_HPP +#define KOKKOS_HIP_REDUCESCAN_HPP + +#include + +#if defined(__HIPCC__) + +namespace Kokkos { +namespace Impl { +template +struct HIPReductionsFunctor; + +template +struct HIPReductionsFunctor { + using ValueTraits = FunctorValueTraits; + using ValueJoin = FunctorValueJoin; + using ValueInit = FunctorValueInit; + using ValueOps = FunctorValueOps; + using pointer_type = typename ValueTraits::pointer_type; + using Scalar = typename ValueTraits::value_type; + + __device__ static inline void scalar_intra_warp_reduction( + FunctorType const& functor, + Scalar* value, // Contribution + bool const skip_vector, // Skip threads if Kokkos vector lanes are not + // part of the reduction + int const width) // How much of the warp participates + { + int const lane_id = (hipThreadIdx_y * hipBlockDim_x + hipThreadIdx_x) % + ::Kokkos::Experimental::Impl::HIPTraits::WarpSize; + for (int delta = skip_vector ? hipBlockDim_x : 1; delta < width; + delta *= 2) { + if (lane_id + delta < ::Kokkos::Experimental::Impl::HIPTraits::WarpSize) { + ValueJoin::join(functor, value, value + delta); + } + } + *value = *(value - lane_id); + } + + __device__ static inline void scalar_intra_block_reduction( + FunctorType const& functor, Scalar value, bool const skip, Scalar* result, + int const /*shared_elements*/, Scalar* shared_team_buffer_element) { + int const warp_id = (hipThreadIdx_y * hipBlockDim_x) / + ::Kokkos::Experimental::Impl::HIPTraits::WarpSize; + Scalar* const my_shared_team_buffer_element = + shared_team_buffer_element + hipThreadIdx_y * hipBlockDim_x + + hipThreadIdx_x; + *my_shared_team_buffer_element = value; + // Warp Level Reduction, ignoring Kokkos vector entries + scalar_intra_warp_reduction( + functor, my_shared_team_buffer_element, skip, + ::Kokkos::Experimental::Impl::HIPTraits::WarpSize); + // Wait for every warp to be done before using one warp to do final cross + // warp reduction + __syncthreads(); + + if (warp_id == 0) { + const unsigned int delta = + (hipThreadIdx_y * hipBlockDim_x + hipThreadIdx_x) * + ::Kokkos::Experimental::Impl::HIPTraits::WarpSize; + if (delta < hipBlockDim_x * hipBlockDim_y) + *my_shared_team_buffer_element = shared_team_buffer_element[delta]; + scalar_intra_warp_reduction( + functor, my_shared_team_buffer_element, false, + hipBlockDim_x * hipBlockDim_y / + ::Kokkos::Experimental::Impl::HIPTraits::WarpSize); + if (hipThreadIdx_x + hipThreadIdx_y == 0) + *result = *shared_team_buffer_element; + } + } + + __device__ static inline bool scalar_inter_block_reduction( + FunctorType const& functor, + ::Kokkos::Experimental::HIP::size_type const /*block_id*/, + ::Kokkos::Experimental::HIP::size_type const block_count, + ::Kokkos::Experimental::HIP::size_type* const shared_data, + ::Kokkos::Experimental::HIP::size_type* const global_data, + ::Kokkos::Experimental::HIP::size_type* const global_flags) { + Scalar* const global_team_buffer_element = + reinterpret_cast(global_data); + Scalar* const my_global_team_buffer_element = + global_team_buffer_element + hipBlockIdx_x; + Scalar* shared_team_buffer_elements = + reinterpret_cast(shared_data); + Scalar value = shared_team_buffer_elements[hipThreadIdx_y]; + int shared_elements = (hipBlockDim_x * hipBlockDim_y) / + ::Kokkos::Experimental::Impl::HIPTraits::WarpSize; + int global_elements = block_count; + __syncthreads(); + + // Do the scalar reduction inside each block + scalar_intra_block_reduction(functor, value, true, + my_global_team_buffer_element, shared_elements, + shared_team_buffer_elements); + __syncthreads(); + + // Use the last block that is done to do the do the reduction across the + // block + __shared__ unsigned int num_teams_done; + if (hipThreadIdx_x + hipThreadIdx_y == 0) { + __threadfence(); + num_teams_done = Kokkos::atomic_fetch_add(global_flags, 1) + 1; + } + bool is_last_block = false; + // FIXME_HIP HIP does not support syncthreads_or. That's why we need to make + // num_teams_done __shared__ + // if (__syncthreads_or(num_teams_done == hipGridDim_x)) {*/ + __syncthreads(); + if (num_teams_done == hipGridDim_x) { + is_last_block = true; + *global_flags = 0; + ValueInit::init(functor, &value); + for (int i = hipThreadIdx_y * hipBlockDim_x + hipThreadIdx_x; + i < global_elements; i += hipBlockDim_x * hipBlockDim_y) { + ValueJoin::join(functor, &value, &global_team_buffer_element[i]); + } + scalar_intra_block_reduction( + functor, value, false, + shared_team_buffer_elements + (hipBlockDim_y - 1), shared_elements, + shared_team_buffer_elements); + } + + return is_last_block; + } +}; + +//---------------------------------------------------------------------------- +/* + * Algorithmic constraints: + * (a) hipBlockDim_y is a power of two + * (b) hipBlockDim_y <= 1024 + * (c) hipBlockDim_x == hipBlockDim_z == 1 + */ + +template +__device__ void hip_intra_block_reduce_scan( + FunctorType const& functor, + typename FunctorValueTraits::pointer_type const + base_data) { + using ValueTraits = FunctorValueTraits; + using ValueJoin = FunctorValueJoin; + + using pointer_type = typename ValueTraits::pointer_type; + + unsigned int const value_count = ValueTraits::value_count(functor); + unsigned int const BlockSizeMask = hipBlockDim_y - 1; + int const WarpMask = Experimental::Impl::HIPTraits::WarpSize - 1; + + // Must have power of two thread count + if ((hipBlockDim_y - 1) & hipBlockDim_y) { + Kokkos::abort( + "HIP::hip_intra_block_reduce_scan requires power-of-two " + "hipBlockDim_y\n"); + } + + auto block_reduce_step = + [&functor, value_count](int const R, pointer_type const TD, int const S) { + if (R > ((1 << S) - 1)) { + ValueJoin::join(functor, TD, (TD - (value_count << S))); + } + }; + + { // Intra-warp reduction: + const unsigned rtid_intra = hipThreadIdx_y & WarpMask; + const pointer_type tdata_intra = base_data + value_count * hipThreadIdx_y; + + block_reduce_step(rtid_intra, tdata_intra, 0); + block_reduce_step(rtid_intra, tdata_intra, 1); + block_reduce_step(rtid_intra, tdata_intra, 2); + block_reduce_step(rtid_intra, tdata_intra, 3); + block_reduce_step(rtid_intra, tdata_intra, 4); + block_reduce_step(rtid_intra, tdata_intra, 5); + } + + __syncthreads(); // Wait for all warps to reduce + + { // Inter-warp reduce-scan by a single warp to avoid extra synchronizations + unsigned int const rtid_inter = + ((hipThreadIdx_y + 1) + << Experimental::Impl::HIPTraits::WarpIndexShift) - + 1; + + if (rtid_inter < hipBlockDim_y) { + pointer_type const tdata_inter = base_data + value_count * rtid_inter; + + if ((1 << 6) < BlockSizeMask) { + block_reduce_step(rtid_inter, tdata_inter, 6); + } + if ((1 << 7) < BlockSizeMask) { + block_reduce_step(rtid_inter, tdata_inter, 7); + } + if ((1 << 8) < BlockSizeMask) { + block_reduce_step(rtid_inter, tdata_inter, 8); + } + if ((1 << 9) < BlockSizeMask) { + block_reduce_step(rtid_inter, tdata_inter, 9); + } + if ((1 << 10) < BlockSizeMask) { + block_reduce_step(rtid_inter, tdata_inter, 10); + } + } + } + + __syncthreads(); // Wait for inter-warp reduce-scan to complete + + if (DoScan) { + // Update all the values for the respective warps (except for the last one) + // by adding from the last value of the previous warp. + if (hipThreadIdx_y >= Experimental::Impl::HIPTraits::WarpSize && + (hipThreadIdx_y & WarpMask) != + Experimental::Impl::HIPTraits::WarpSize - 1) { + const int offset_to_previous_warp_total = + (hipThreadIdx_y & (~WarpMask)) - 1; + ValueJoin::join(functor, base_data + value_count * hipThreadIdx_y, + base_data + value_count * offset_to_previous_warp_total); + } + } +} + +//---------------------------------------------------------------------------- +/**\brief Input value-per-thread starting at 'shared_data'. + * Reduction value at last thread's location. + * + * If 'DoScan' then write blocks' scan values and block-groups' scan values. + * + * Global reduce result is in the last threads' 'shared_data' location. + */ + +template +__device__ bool hip_single_inter_block_reduce_scan2( + FunctorType const& functor, + ::Kokkos::Experimental::HIP::size_type const block_id, + ::Kokkos::Experimental::HIP::size_type const block_count, + ::Kokkos::Experimental::HIP::size_type* const shared_data, + ::Kokkos::Experimental::HIP::size_type* const global_data, + ::Kokkos::Experimental::HIP::size_type* const global_flags) { + using size_type = ::Kokkos::Experimental::HIP::size_type; + using ValueTraits = FunctorValueTraits; + using ValueJoin = FunctorValueJoin; + using ValueInit = FunctorValueInit; + using ValueOps = FunctorValueOps; + + using pointer_type = typename ValueTraits::pointer_type; + + // '__ffs' = position of the least significant bit set to 1. + // 'hipBlockDim_y' is guaranteed to be a power of two so this + // is the integral shift value that can replace an integral divide. + unsigned int const BlockSizeShift = __ffs(hipBlockDim_y) - 1; + unsigned int const BlockSizeMask = hipBlockDim_y - 1; + + // Must have power of two thread count + if (BlockSizeMask & hipBlockDim_y) { + Kokkos::abort( + "HIP::hip_single_inter_block_reduce_scan requires power-of-two " + "blockDim"); + } + + integral_nonzero_constant const + word_count(ValueTraits::value_size(functor) / sizeof(size_type)); + + // Reduce the accumulation for the entire block. + hip_intra_block_reduce_scan( + functor, pointer_type(shared_data)); + + { + // Write accumulation total to global scratch space. + // Accumulation total is the last thread's data. + size_type* const shared = shared_data + word_count.value * BlockSizeMask; + size_type* const global = global_data + word_count.value * block_id; + + for (size_t i = hipThreadIdx_y; i < word_count.value; i += hipBlockDim_y) { + global[i] = shared[i]; + } + } + + // Contributing blocks note that their contribution has been completed via an + // atomic-increment flag If this block is not the last block to contribute to + // this group then the block is done. + // FIXME_HIP __syncthreads_or is not supported by HIP yet. + // const bool is_last_block = !__syncthreads_or( + // threadIdx.y + // ? 0 + // : (1 + atomicInc(global_flags, block_count - 1) < block_count)); + __shared__ int n_done; + n_done = 0; + __syncthreads(); + if (hipThreadIdx_y == 0) { + __threadfence(); + n_done = 1 + atomicInc(global_flags, block_count - 1); + } + __syncthreads(); + bool const is_last_block = (n_done == static_cast(block_count)); + + if (is_last_block) { + size_type const b = (static_cast(block_count) * + static_cast(hipThreadIdx_y)) >> + BlockSizeShift; + size_type const e = (static_cast(block_count) * + static_cast(hipThreadIdx_y + 1)) >> + BlockSizeShift; + + { + void* const shared_ptr = shared_data + word_count.value * hipThreadIdx_y; + /* reference_type shared_value = */ ValueInit::init(functor, shared_ptr); + + for (size_type i = b; i < e; ++i) { + ValueJoin::join(functor, shared_ptr, + global_data + word_count.value * i); + } + } + + hip_intra_block_reduce_scan( + functor, pointer_type(shared_data)); + + if (DoScan) { + size_type* const shared_value = + shared_data + word_count.value * (hipThreadIdx_y ? hipThreadIdx_y - 1 + : hipBlockDim_y); + + if (!hipThreadIdx_y) { + ValueInit::init(functor, shared_value); + } + + // Join previous inclusive scan value to each member + for (size_type i = b; i < e; ++i) { + size_type* const global_value = global_data + word_count.value * i; + ValueJoin::join(functor, shared_value, global_value); + ValueOps::copy(functor, global_value, shared_value); + } + } + } + + return is_last_block; +} + +template +__device__ bool hip_single_inter_block_reduce_scan( + FunctorType const& functor, + ::Kokkos::Experimental::HIP::size_type const block_id, + ::Kokkos::Experimental::HIP::size_type const block_count, + ::Kokkos::Experimental::HIP::size_type* const shared_data, + ::Kokkos::Experimental::HIP::size_type* const global_data, + ::Kokkos::Experimental::HIP::size_type* const global_flags) { + using ValueTraits = FunctorValueTraits; + if (!DoScan && /*FIXME*/ (bool)ValueTraits::StaticValueSize) + // FIXME_HIP For now we don't use shuffle + // return Kokkos::Impl::HIPReductionsFunctor< + // FunctorType, ArgTag, false, (ValueTraits::StaticValueSize > 16)>:: + // scalar_inter_block_reduction(functor, block_id, block_count, + // shared_data, global_data, global_flags); + return Kokkos::Impl::HIPReductionsFunctor< + FunctorType, ArgTag, false, + false>::scalar_inter_block_reduction(functor, block_id, block_count, + shared_data, global_data, + global_flags); + else { + return hip_single_inter_block_reduce_scan2( + functor, block_id, block_count, shared_data, global_data, global_flags); + } +} + +// Size in bytes required for inter block reduce or scan +template +inline unsigned hip_single_inter_block_reduce_scan_shmem( + const FunctorType& functor, const unsigned BlockSize) { + return (BlockSize + 2) * + Impl::FunctorValueTraits::value_size(functor); +} + +} // namespace Impl +} // namespace Kokkos + +#endif + +#endif diff --git a/core/src/HIP/Kokkos_HIP_Space.cpp b/core/src/HIP/Kokkos_HIP_Space.cpp new file mode 100644 index 00000000000..2dca7f13c95 --- /dev/null +++ b/core/src/HIP/Kokkos_HIP_Space.cpp @@ -0,0 +1,636 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include + +/*--------------------------------------------------------------------------*/ +/*--------------------------------------------------------------------------*/ +namespace Kokkos { +namespace Impl { + +namespace { +hipStream_t get_deep_copy_stream() { + static hipStream_t s = 0; + if (s == 0) { + HIP_SAFE_CALL(hipStreamCreate(&s)); + } + return s; +} +} // namespace + +DeepCopy::DeepCopy(void* dst, const void* src, + size_t n) { + HIP_SAFE_CALL(hipMemcpy(dst, src, n, hipMemcpyDefault)); +} + +DeepCopy::DeepCopy(void* dst, const void* src, + size_t n) { + HIP_SAFE_CALL(hipMemcpy(dst, src, n, hipMemcpyDefault)); +} + +DeepCopy::DeepCopy(void* dst, const void* src, + size_t n) { + HIP_SAFE_CALL(hipMemcpy(dst, src, n, hipMemcpyDefault)); +} + +DeepCopy::DeepCopy(const Kokkos::Experimental::HIP& + /*instance*/, + void* dst, const void* src, + size_t n) { + // FIXME_HIP use instance + HIP_SAFE_CALL(hipMemcpy(dst, src, n, hipMemcpyDefault)); +} + +DeepCopy:: + DeepCopy(const Kokkos::Experimental::HIP& /*instance*/, void* dst, + const void* src, size_t n) { + // FIXME_HIP use instance + HIP_SAFE_CALL(hipMemcpy(dst, src, n, hipMemcpyDefault)); +} + +DeepCopy:: + DeepCopy(const Kokkos::Experimental::HIP& /*instance*/, void* dst, + const void* src, size_t n) { + // FIXME_HIP use instance + HIP_SAFE_CALL(hipMemcpy(dst, src, n, hipMemcpyDefault)); +} + +DeepCopy::DeepCopy(void* dst, const void* src, + size_t n) { + HIP_SAFE_CALL(hipMemcpy(dst, src, n, hipMemcpyDefault)); +} + +DeepCopy::DeepCopy(void* dst, const void* src, + size_t n) { + HIP_SAFE_CALL(hipMemcpy(dst, src, n, hipMemcpyDefault)); +} + +DeepCopy::DeepCopy(void* dst, const void* src, + size_t n) { + HIP_SAFE_CALL(hipMemcpy(dst, src, n, hipMemcpyDefault)); +} + +DeepCopy:: + DeepCopy(const Kokkos::Experimental::HIP& /*instance*/, void* dst, + const void* src, size_t n) { + // FIXME_HIP use instance + HIP_SAFE_CALL(hipMemcpy(dst, src, n, hipMemcpyDefault)); +} + +DeepCopy::DeepCopy(const Kokkos::Experimental::HIP& + /*instance*/, + void* dst, const void* src, + size_t n) { + // FIXME_HIP use instance + HIP_SAFE_CALL(hipMemcpy(dst, src, n, hipMemcpyDefault)); +} + +DeepCopy::DeepCopy(const Kokkos::Experimental::HIP& + /*instance*/, + void* dst, const void* src, + size_t n) { + // FIXME_HIP use instance + HIP_SAFE_CALL(hipMemcpy(dst, src, n, hipMemcpyDefault)); +} + +void DeepCopyAsyncHIP(void* dst, void const* src, size_t n) { + hipStream_t s = get_deep_copy_stream(); + HIP_SAFE_CALL(hipMemcpyAsync(dst, src, n, hipMemcpyDefault, s)); + hipStreamSynchronize(s); +} + +} // namespace Impl +} // namespace Kokkos + +/*--------------------------------------------------------------------------*/ +/*--------------------------------------------------------------------------*/ + +namespace Kokkos { + +void Experimental::HIPSpace::access_error() { + const std::string msg( + "Kokkos::Experimental::HIPSpace::access_error attempt to execute " + "Experimental::HIP function from non-HIP space"); + Kokkos::Impl::throw_runtime_exception(msg); +} + +void Experimental::HIPSpace::access_error(const void* const) { + const std::string msg( + "Kokkos::Experimental::HIPSpace::access_error attempt to execute " + "Experimental::HIP function from non-HIP space"); + Kokkos::Impl::throw_runtime_exception(msg); +} + +} // namespace Kokkos + +/*--------------------------------------------------------------------------*/ +/*--------------------------------------------------------------------------*/ + +namespace Kokkos { +namespace Experimental { + +HIPSpace::HIPSpace() : m_device(HIP().hip_device()) {} + +HIPHostPinnedSpace::HIPHostPinnedSpace() {} + +void* HIPSpace::allocate(const size_t arg_alloc_size) const { + void* ptr = nullptr; + + auto const error_code = hipMalloc(&ptr, arg_alloc_size); + if (error_code != hipSuccess) { + hipGetLastError(); // This is the only way to clear the last error, which + // we should do here since we're turning it into an + // exception here + throw HIPRawMemoryAllocationFailure( + arg_alloc_size, error_code, + RawMemoryAllocationFailure::AllocationMechanism::HIPMalloc); + } + + return ptr; +} + +void* HIPHostPinnedSpace::allocate(const size_t arg_alloc_size) const { + void* ptr = nullptr; + + auto const error_code = hipHostMalloc(&ptr, arg_alloc_size); + if (error_code != hipSuccess) { + hipGetLastError(); // This is the only way to clear the last error, which + // we should do here since we're turning it into an + // exception here + throw HIPRawMemoryAllocationFailure( + arg_alloc_size, error_code, + RawMemoryAllocationFailure::AllocationMechanism::HIPHostMalloc); + } + + return ptr; +} + +void HIPSpace::deallocate(void* const arg_alloc_ptr, + const size_t /* arg_alloc_size */) const { + HIP_SAFE_CALL(hipFree(arg_alloc_ptr)); +} + +void HIPHostPinnedSpace::deallocate(void* const arg_alloc_ptr, + const size_t /* arg_alloc_size */) const { + HIP_SAFE_CALL(hipHostFree(arg_alloc_ptr)); +} + +} // namespace Experimental +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +#ifdef KOKKOS_DEBUG +SharedAllocationRecord + SharedAllocationRecord::s_root_record; + +SharedAllocationRecord SharedAllocationRecord< + Kokkos::Experimental::HIPHostPinnedSpace, void>::s_root_record; +#endif + +std::string SharedAllocationRecord::get_label() const { + SharedAllocationHeader header; + + Kokkos::Impl::DeepCopy( + &header, RecordBase::head(), sizeof(SharedAllocationHeader)); + + return std::string(header.m_label); +} + +std::string SharedAllocationRecord::get_label() const { + return std::string(RecordBase::head()->m_label); +} + +SharedAllocationRecord* +SharedAllocationRecord::allocate( + const Kokkos::Experimental::HIPSpace& arg_space, + const std::string& arg_label, const size_t arg_alloc_size) { + return new SharedAllocationRecord(arg_space, arg_label, arg_alloc_size); +} + +SharedAllocationRecord* +SharedAllocationRecord:: + allocate(const Kokkos::Experimental::HIPHostPinnedSpace& arg_space, + const std::string& arg_label, const size_t arg_alloc_size) { + return new SharedAllocationRecord(arg_space, arg_label, arg_alloc_size); +} + +void SharedAllocationRecord::deallocate( + SharedAllocationRecord* arg_rec) { + delete static_cast(arg_rec); +} + +void SharedAllocationRecord:: + deallocate(SharedAllocationRecord* arg_rec) { + delete static_cast(arg_rec); +} + +SharedAllocationRecord::~SharedAllocationRecord() { +#if defined(KOKKOS_ENABLE_PROFILING) + if (Kokkos::Profiling::profileLibraryLoaded()) { + SharedAllocationHeader header; + Kokkos::Impl::DeepCopy( + &header, RecordBase::m_alloc_ptr, sizeof(SharedAllocationHeader)); + + Kokkos::Profiling::deallocateData( + Kokkos::Profiling::SpaceHandle(Kokkos::Experimental::HIPSpace::name()), + header.m_label, data(), size()); + } +#endif + + m_space.deallocate(SharedAllocationRecord::m_alloc_ptr, + SharedAllocationRecord::m_alloc_size); +} + +SharedAllocationRecord::~SharedAllocationRecord() { +#if defined(KOKKOS_ENABLE_PROFILING) + if (Kokkos::Profiling::profileLibraryLoaded()) { + Kokkos::Profiling::deallocateData( + Kokkos::Profiling::SpaceHandle( + Kokkos::Experimental::HIPHostPinnedSpace::name()), + RecordBase::m_alloc_ptr->m_label, data(), size()); + } +#endif + + m_space.deallocate(SharedAllocationRecord::m_alloc_ptr, + SharedAllocationRecord::m_alloc_size); +} + +SharedAllocationRecord:: + SharedAllocationRecord( + const Kokkos::Experimental::HIPSpace& arg_space, + const std::string& arg_label, const size_t arg_alloc_size, + const SharedAllocationRecord::function_type arg_dealloc) + // Pass through allocated [ SharedAllocationHeader , user_memory ] + // Pass through deallocation function + : SharedAllocationRecord( +#ifdef KOKKOS_DEBUG + &SharedAllocationRecord::s_root_record, +#endif + Kokkos::Impl::checked_allocation_with_header(arg_space, arg_label, + arg_alloc_size), + sizeof(SharedAllocationHeader) + arg_alloc_size, arg_dealloc), + m_space(arg_space) { +#if defined(KOKKOS_ENABLE_PROFILING) + if (Kokkos::Profiling::profileLibraryLoaded()) { + Kokkos::Profiling::allocateData( + Kokkos::Profiling::SpaceHandle(arg_space.name()), arg_label, data(), + arg_alloc_size); + } +#endif + + SharedAllocationHeader header; + + // Fill in the Header information + header.m_record = static_cast*>(this); + + strncpy(header.m_label, arg_label.c_str(), + SharedAllocationHeader::maximum_label_length); + // Set last element zero, in case c_str is too long + header.m_label[SharedAllocationHeader::maximum_label_length - 1] = (char)0; + + // Copy to device memory + Kokkos::Impl::DeepCopy( + RecordBase::m_alloc_ptr, &header, sizeof(SharedAllocationHeader)); +} + +SharedAllocationRecord:: + SharedAllocationRecord( + const Kokkos::Experimental::HIPHostPinnedSpace& arg_space, + const std::string& arg_label, const size_t arg_alloc_size, + const SharedAllocationRecord::function_type arg_dealloc) + // Pass through allocated [ SharedAllocationHeader , user_memory ] + // Pass through deallocation function + : SharedAllocationRecord( +#ifdef KOKKOS_DEBUG + &SharedAllocationRecord::s_root_record, +#endif + Kokkos::Impl::checked_allocation_with_header(arg_space, arg_label, + arg_alloc_size), + sizeof(SharedAllocationHeader) + arg_alloc_size, arg_dealloc), + m_space(arg_space) { +#if defined(KOKKOS_ENABLE_PROFILING) + if (Kokkos::Profiling::profileLibraryLoaded()) { + Kokkos::Profiling::allocateData( + Kokkos::Profiling::SpaceHandle(arg_space.name()), arg_label, data(), + arg_alloc_size); + } +#endif + // Fill in the Header information, directly accessible via host pinned memory + + RecordBase::m_alloc_ptr->m_record = this; + + strncpy(RecordBase::m_alloc_ptr->m_label, arg_label.c_str(), + SharedAllocationHeader::maximum_label_length); + // Set last element zero, in case c_str is too long + RecordBase::m_alloc_ptr + ->m_label[SharedAllocationHeader::maximum_label_length - 1] = (char)0; +} + +//---------------------------------------------------------------------------- + +void* SharedAllocationRecord:: + allocate_tracked(const Kokkos::Experimental::HIPSpace& arg_space, + const std::string& arg_alloc_label, + const size_t arg_alloc_size) { + if (!arg_alloc_size) return (void*)0; + + SharedAllocationRecord* const r = + allocate(arg_space, arg_alloc_label, arg_alloc_size); + + RecordBase::increment(r); + + return r->data(); +} + +void SharedAllocationRecord::deallocate_tracked(void* const + arg_alloc_ptr) { + if (arg_alloc_ptr != 0) { + SharedAllocationRecord* const r = get_record(arg_alloc_ptr); + + RecordBase::decrement(r); + } +} + +void* SharedAllocationRecord:: + reallocate_tracked(void* const arg_alloc_ptr, const size_t arg_alloc_size) { + SharedAllocationRecord* const r_old = get_record(arg_alloc_ptr); + SharedAllocationRecord* const r_new = + allocate(r_old->m_space, r_old->get_label(), arg_alloc_size); + + Kokkos::Impl::DeepCopy( + r_new->data(), r_old->data(), std::min(r_old->size(), r_new->size())); + + RecordBase::increment(r_new); + RecordBase::decrement(r_old); + + return r_new->data(); +} + +//---------------------------------------------------------------------------- + +SharedAllocationRecord* +SharedAllocationRecord::get_record( + void* alloc_ptr) { + using Header = SharedAllocationHeader; + using RecordHIP = + SharedAllocationRecord; + + // Copy the header from the allocation + Header head; + + Header const* const head_hip = + alloc_ptr ? Header::get_header(alloc_ptr) : (Header*)0; + + if (alloc_ptr) { + Kokkos::Impl::DeepCopy( + &head, head_hip, sizeof(SharedAllocationHeader)); + } + + RecordHIP* const record = + alloc_ptr ? static_cast(head.m_record) : (RecordHIP*)0; + + if (!alloc_ptr || record->m_alloc_ptr != head_hip) { + Kokkos::Impl::throw_runtime_exception(std::string( + "Kokkos::Impl::SharedAllocationRecord< Kokkos::Experimental::HIPSpace " + ", void >::get_record ERROR")); + } + + return record; +} + +// Iterate records to print orphaned memory ... +void SharedAllocationRecord:: + print_records(std::ostream& s, const Kokkos::Experimental::HIPSpace& space, + bool detail) { +#ifdef KOKKOS_DEBUG + SharedAllocationRecord* r = &s_root_record; + + char buffer[256]; + + SharedAllocationHeader head; + + if (detail) { + do { + if (r->m_alloc_ptr) { + Kokkos::Impl::DeepCopy( + &head, r->m_alloc_ptr, sizeof(SharedAllocationHeader)); + } else { + head.m_label[0] = 0; + } + + // Formatting dependent on sizeof(uintptr_t) + const char* format_string; + + if (sizeof(uintptr_t) == sizeof(unsigned long)) { + format_string = + "HIP addr( 0x%.12lx ) list( 0x%.12lx 0x%.12lx ) extent[ 0x%.12lx + " + "%.8ld ] count(%d) dealloc(0x%.12lx) %s\n"; + } else if (sizeof(uintptr_t) == sizeof(unsigned long long)) { + format_string = + "HIP addr( 0x%.12llx ) list( 0x%.12llx 0x%.12llx ) extent[ " + "0x%.12llx + %.8ld ] count(%d) dealloc(0x%.12llx) %s\n"; + } + + snprintf(buffer, 256, format_string, reinterpret_cast(r), + reinterpret_cast(r->m_prev), + reinterpret_cast(r->m_next), + reinterpret_cast(r->m_alloc_ptr), r->m_alloc_size, + r->m_count, reinterpret_cast(r->m_dealloc), + head.m_label); + std::cout << buffer; + r = r->m_next; + } while (r != &s_root_record); + } else { + do { + if (r->m_alloc_ptr) { + Kokkos::Impl::DeepCopy( + &head, r->m_alloc_ptr, sizeof(SharedAllocationHeader)); + + // Formatting dependent on sizeof(uintptr_t) + const char* format_string; + + if (sizeof(uintptr_t) == sizeof(unsigned long)) { + format_string = "HIP [ 0x%.12lx + %ld ] %s\n"; + } else if (sizeof(uintptr_t) == sizeof(unsigned long long)) { + format_string = "HIP [ 0x%.12llx + %ld ] %s\n"; + } + + snprintf(buffer, 256, format_string, + reinterpret_cast(r->data()), r->size(), + head.m_label); + } else { + snprintf(buffer, 256, "HIP [ 0 + 0 ]\n"); + } + std::cout << buffer; + r = r->m_next; + } while (r != &s_root_record); + } +#else + (void)s; + (void)space; + (void)detail; + throw_runtime_exception( + "Kokkos::Impl::SharedAllocationRecord::print_records" + " only works with KOKKOS_DEBUG enabled"); +#endif +} + +/*--------------------------------------------------------------------------*/ +/*--------------------------------------------------------------------------*/ + +void* hip_resize_scratch_space(size_t bytes, bool force_shrink) { + static void* ptr = NULL; + static size_t current_size = 0; + if (current_size == 0) { + current_size = bytes; + ptr = Kokkos::kokkos_malloc( + "HIPSpace::ScratchMemory", current_size); + } + if (bytes > current_size) { + current_size = bytes; + ptr = Kokkos::kokkos_realloc(ptr, + current_size); + } + if ((bytes < current_size) && (force_shrink)) { + current_size = bytes; + Kokkos::kokkos_free(ptr); + ptr = Kokkos::kokkos_malloc( + "HIPSpace::ScratchMemory", current_size); + } + return ptr; +} + +} // namespace Impl +} // namespace Kokkos + +/*--------------------------------------------------------------------------*/ +/*--------------------------------------------------------------------------*/ +namespace Kokkos { +namespace Experimental { + +// HIP::size_type HIP::detect_device_count() +//{ return Impl::HIPInternalDevices::singleton().m_hipDevCount ; } + +int HIP::concurrency() { + // FIXME_HIP + // MI60: ThreadsPerComputeUnit*ComputeUnits/ShaderEngine*ShaderEngines) + return 2536 * 16 * 4; +} +int HIP::impl_is_initialized() { + return Impl::HIPInternal::singleton().is_initialized(); +} + +void HIP::impl_initialize(const HIP::SelectDevice config) { + Impl::HIPInternal::singleton().initialize(config.hip_device_id); + +#if defined(KOKKOS_ENABLE_PROFILING) + Kokkos::Profiling::initialize(); +#endif +} + +void HIP::impl_finalize() { + Impl::HIPInternal::singleton().finalize(); + +#if defined(KOKKOS_ENABLE_PROFILING) + Kokkos::Profiling::finalize(); +#endif +} + +HIP::HIP() : m_space_instance(&Impl::HIPInternal::singleton()) { + Impl::HIPInternal::singleton().verify_is_initialized( + "HIP instance constructor"); +} + +// HIP::HIP( const int instance_id ) +// : m_device( Impl::HIPInternal::singleton().m_hipDev ) +//{} + +void HIP::print_configuration(std::ostream& s, const bool) { + Impl::HIPInternal::singleton().print_configuration(s); +} + +void HIP::fence() const { HIP_SAFE_CALL(hipDeviceSynchronize()); } + +int HIP::hip_device() const { return impl_internal_space_instance()->m_hipDev; } +const char* HIP::name() { return "HIP"; } + +} // namespace Experimental +} // namespace Kokkos diff --git a/core/src/HIP/Kokkos_HIP_Team.hpp b/core/src/HIP/Kokkos_HIP_Team.hpp new file mode 100644 index 00000000000..b3c4f4609b4 --- /dev/null +++ b/core/src/HIP/Kokkos_HIP_Team.hpp @@ -0,0 +1,1106 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_HIP_TEAM_HPP +#define KOKKOS_HIP_TEAM_HPP + +#include + +#if defined(__HIPCC__) + +#include +#include + +#include +#include +#include +#include + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +template +struct HIPJoinFunctor { + typedef Type value_type; + + KOKKOS_INLINE_FUNCTION + static void join(volatile value_type& update, + volatile const value_type& input) { + update += input; + } +}; + +/**\brief Team member_type passed to TeamPolicy or TeamTask closures. + * + * HIP thread blocks for team closures are dimensioned as: + * hipBlockDim_x == number of "vector lanes" per "thread" + * hipBlockDim_y == number of "threads" per team + * hipBlockDim_z == number of teams in a block + * where + * A set of teams exactly fill a warp OR a team is the whole block + * ( 0 == WarpSize % ( hipBlockDim_x * hipBlockDim_y ) ) + * OR + * ( 1 == hipBlockDim_z ) + + * Thus when 1 < hipBlockDim_z the team is warp-synchronous + * and __syncthreads should not be called in team collectives. + * + * When multiple teams are mapped onto a single block then the + * total available shared memory must be partitioned among teams. + */ +class HIPTeamMember { + public: + using execution_space = Kokkos::Experimental::HIP; + using scratch_memory_space = execution_space::scratch_memory_space; + + private: + mutable void* m_team_reduce; + scratch_memory_space m_team_shared; + int m_team_reduce_size; + int m_league_rank; + int m_league_size; + + public: + KOKKOS_INLINE_FUNCTION + const execution_space::scratch_memory_space& team_shmem() const { + return m_team_shared.set_team_thread_mode(0, 1, 0); + } + + KOKKOS_INLINE_FUNCTION + const execution_space::scratch_memory_space& team_scratch( + const int& level) const { + return m_team_shared.set_team_thread_mode(level, 1, 0); + } + + KOKKOS_INLINE_FUNCTION + const execution_space::scratch_memory_space& thread_scratch( + const int& level) const { + return m_team_shared.set_team_thread_mode(level, team_size(), team_rank()); + } + + KOKKOS_INLINE_FUNCTION int league_rank() const { return m_league_rank; } + KOKKOS_INLINE_FUNCTION int league_size() const { return m_league_size; } + KOKKOS_INLINE_FUNCTION int team_rank() const { +#ifdef __HIP_DEVICE_COMPILE__ + return hipThreadIdx_y; +#else + return 0; +#endif + } + + KOKKOS_INLINE_FUNCTION int team_size() const { +#ifdef __HIP_DEVICE_COMPILE__ + return hipBlockDim_y; +#else + return 1; +#endif + } + + KOKKOS_INLINE_FUNCTION void team_barrier() const { +#ifdef __HIP_DEVICE_COMPILE__ + if (1 == hipBlockDim_z) + __syncthreads(); // team == block + else + __threadfence_block(); // team <= warp +#endif + } + + //-------------------------------------------------------------------------- + + template + KOKKOS_INLINE_FUNCTION void team_broadcast(ValueType& val, + const int& thread_id) const { +#ifdef __HIP_DEVICE_COMPILE__ + if (1 == hipBlockDim_z) { // team == block + __syncthreads(); + // Wait for shared data write until all threads arrive here + if (hipThreadIdx_x == 0u && + hipThreadIdx_y == static_cast(thread_id)) { + *(reinterpret_cast(m_team_reduce)) = val; + } + __syncthreads(); // Wait for shared data read until root thread writes + val = *(reinterpret_cast(m_team_reduce)); + } else { // team <= warp + ValueType tmp(val); // input might not be a register variable + ::Kokkos::Experimental::Impl::in_place_shfl( + val, tmp, hipBlockDim_x * thread_id, hipBlockDim_x * hipBlockDim_y); + } +#else + (void)val; + (void)thread_id; +#endif + } + + template + KOKKOS_INLINE_FUNCTION void team_broadcast(Closure const& f, ValueType& val, + const int& thread_id) const { +#ifdef __HIP_DEVICE_COMPILE__ + f(val); + + if (1 == hipBlockDim_z) { // team == block + __syncthreads(); + // Wait for shared data write until all threads arrive here + if (hipThreadIdx_x == 0u && + hipThreadIdx_y == static_cast(thread_id)) { + *(reinterpret_cast(m_team_reduce)) = val; + } + __syncthreads(); // Wait for shared data read until root thread writes + val = *(reinterpret_cast(m_team_reduce)); + } else { // team <= warp + ValueType tmp(val); // input might not be a register variable + ::Kokkos::Experimental::Impl::in_place_shfl( + val, tmp, hipBlockDim_x * thread_id, hipBlockDim_x * hipBlockDim_y); + } +#else + (void)f; + (void)val; + (void)thread_id; +#endif + } + + //-------------------------------------------------------------------------- + /**\brief Reduction across a team + * + * Mapping of teams onto blocks: + * hipBlockDim_x is "vector lanes" + * hipBlockDim_y is team "threads" + * hipBlockDim_z is number of teams per block + * + * Requires: + * hipBlockDim_x is power two + * hipBlockDim_x <= HIPTraits::WarpSize + * ( 0 == HIPTraits::WarpSize % ( hipBlockDim_x * hipBlockDim_y ) + * OR + * ( 1 == hipBlockDim_z ) + */ + template + KOKKOS_INLINE_FUNCTION + typename std::enable_if::value>::type + team_reduce(ReducerType const& reducer) const noexcept { + team_reduce(reducer, reducer.reference()); + } + + template + KOKKOS_INLINE_FUNCTION + typename std::enable_if::value>::type + team_reduce(ReducerType const& reducer, + typename ReducerType::value_type& value) const noexcept { +#ifdef __HIP_DEVICE_COMPILE__ + hip_intra_block_reduction(reducer, value, hipBlockDim_y); +#else + (void)reducer; + (void)value; +#endif + } + + //-------------------------------------------------------------------------- + /** \brief Intra-team exclusive prefix sum with team_rank() ordering + * with intra-team non-deterministic ordering accumulation. + * + * The global inter-team accumulation value will, at the end of the + * league's parallel execution, be the scan's total. + * Parallel execution ordering of the league's teams is non-deterministic. + * As such the base value for each team's scan operation is similarly + * non-deterministic. + */ + template + KOKKOS_INLINE_FUNCTION Type team_scan(const Type& value, + Type* const global_accum) const { +#ifdef __HIP_DEVICE_COMPILE__ + Type* const base_data = reinterpret_cast(m_team_reduce); + + __syncthreads(); // Don't write in to shared data until all threads have + // entered this function + + if (0 == hipThreadIdx_y) { + base_data[0] = 0; + } + + base_data[hipThreadIdx_y + 1] = value; + + Impl::hip_intra_block_reduce_scan, void>( + Impl::HIPJoinFunctor(), base_data + 1); + + if (global_accum) { + if (hipBlockDim_y == hipThreadIdx_y + 1) { + base_data[hipBlockDim_y] = + atomic_fetch_add(global_accum, base_data[hipBlockDim_y]); + } + __syncthreads(); // Wait for atomic + base_data[hipThreadIdx_y] += base_data[hipBlockDim_y]; + } + + return base_data[hipThreadIdx_y]; +#else + (void)value; + (void)global_accum; + return Type(); +#endif + } + + /** \brief Intra-team exclusive prefix sum with team_rank() ordering. + * + * The highest rank thread can compute the reduction total as + * reduction_total = dev.team_scan( value ) + value ; + */ + template + KOKKOS_INLINE_FUNCTION Type team_scan(const Type& value) const { + return this->template team_scan(value, 0); + } + + //---------------------------------------- + + template + KOKKOS_INLINE_FUNCTION static + typename std::enable_if::value>::type + vector_reduce(ReducerType const& reducer) { + vector_reduce(reducer, reducer.reference()); + } + + template + KOKKOS_INLINE_FUNCTION static + typename std::enable_if::value>::type + vector_reduce(ReducerType const& reducer, + typename ReducerType::value_type& value) { +#ifdef __HIP_DEVICE_COMPILE__ + if (hipBlockDim_x == 1) return; + + // Intra vector lane shuffle reduction: + typename ReducerType::value_type tmp(value); + typename ReducerType::value_type tmp2 = tmp; + + int constexpr warp_size = ::Kokkos::Experimental::Impl::HIPTraits::WarpSize; + unsigned mask = + hipBlockDim_x == warp_size + ? 0xffffffff + : ((1 << hipBlockDim_x) - 1) + << ((hipThreadIdx_y % (warp_size / hipBlockDim_x)) * + hipBlockDim_x); + + for (int i = hipBlockDim_x; (i >>= 1);) { + ::Kokkos::Experimental::Impl::in_place_shfl_down(tmp2, tmp, i, + hipBlockDim_x, mask); + if (static_cast(hipThreadIdx_x) < i) { + reducer.join(tmp, tmp2); + } + } + + // Broadcast from root lane to all other lanes. + // Cannot use "butterfly" algorithm to avoid the broadcast + // because floating point summation is not associative + // and thus different threads could have different results. + + ::Kokkos::Experimental::Impl::in_place_shfl(tmp2, tmp, 0, hipBlockDim_x, + mask); + value = tmp2; + reducer.reference() = tmp2; +#else + (void)reducer; + (void)value; +#endif + } + + //-------------------------------------------------------------------------- + /**\brief Global reduction across all blocks + * + * Return !0 if reducer contains the final value + */ + template + KOKKOS_INLINE_FUNCTION static + typename std::enable_if::value, int>::type + global_reduce(ReducerType const& reducer, int* const global_scratch_flags, + void* const global_scratch_space, void* const shmem, + int const shmem_size) { +#ifdef __HIP_COMPILE_DEVICE__ + + typedef typename ReducerType::value_type value_type; + typedef value_type volatile* pointer_type; + + // Number of shared memory entries for the reduction: + const int nsh = shmem_size / sizeof(value_type); + + // Number of HIP threads in the block, rank within the block + const int nid = hipBlockDim_x * hipBlockDim_y * hipBlockDim_z; + const int tid = + hipThreadIdx_x + + hipBlockDim_x * (hipThreadIdx_y + hipBlockDim_y * hipThreadIdx_z); + + // Reduces within block using all available shared memory + // Contributes if it is the root "vector lane" + + // wn == number of warps in the block + // wx == which lane within the warp + // wy == which warp within the block + + const int wn = + (nid + HIPTraits::WarpIndexMask) >> HIPTraits::WarpIndexShift; + const int wx = tid & HIPTraits::WarpIndexMask; + const int wy = tid >> HIPTraits::WarpIndexShift; + + //------------------------ + { // Intra warp shuffle reduction from contributing CUDA threads + + value_type tmp(reducer.reference()); + + int constexpr warp_size = + ::Kokkos::Experimental::Impl::HIPTraits::WarpSize; + for (int i = warp_size; static_cast(hipBlockDim_x) <= (i >>= 1);) { + Impl::in_place_shfl_down(reducer.reference(), tmp, i, warp_size); + + // Root of each vector lane reduces "thread" contribution + if (0 == hipThreadIdx_x && wx < i) { + reducer.join(&tmp, reducer.data()); + } + } + + // Reduce across warps using shared memory. + // Number of warps may not be power of two. + + __syncthreads(); // Wait before shared data write + + // Number of shared memory entries for the reduction + // is at most one per warp + const int nentry = wn < nsh ? wn : nsh; + + if (0 == wx && wy < nentry) { + // Root thread of warp 'wy' has warp's value to contribute + (reinterpret_cast(shmem))[wy] = tmp; + } + + __syncthreads(); // Wait for write to be visible to block + + // When more warps than shared entries + // then warps must take turns joining their contribution + // to the designated shared memory entry. + for (int i = nentry; i < wn; i += nentry) { + const int k = wy - i; + + if (0 == wx && i <= wy && k < nentry) { + // Root thread of warp 'wy' has warp's value to contribute + reducer.join((reinterpret_cast(shmem)) + k, &tmp); + } + + __syncthreads(); // Wait for write to be visible to block + } + + // One warp performs the inter-warp reduction: + + if (0 == wy) { + // Start fan-in at power of two covering nentry + + for (int i = (1 << (32 - __clz(nentry - 1))); (i >>= 1);) { + const int k = wx + i; + if (wx < i && k < nentry) { + reducer.join((reinterpret_cast(shmem)) + wx, + (reinterpret_cast(shmem)) + k); + __threadfence_block(); // Wait for write to be visible to warp + } + } + } + } + //------------------------ + { // Write block's value to global_scratch_memory + + int last_block = 0; + + if (0 == wx) { + reducer.copy((reinterpret_cast(global_scratch_space)) + + hipBlockIdx_x * reducer.length(), + reducer.data()); + + __threadfence(); // Wait until global write is visible. + + last_block = static_cast(hipGridDim_x) == + 1 + Kokkos::atomic_fetch_add(global_scratch_flags, 1); + + // If last block then reset count + if (last_block) *global_scratch_flags = 0; + } + + // FIXME hip does not support __syncthreads_or so we need to do it by hand + // last_block = __syncthreads_or(last_block); + + __shared__ int last_block_shared; + if (last_block) last_block_shared = last_block; + __threadfence_block(); + + if (!last_block_shared) return 0; + } + //------------------------ + // Last block reads global_scratch_memory into shared memory. + + const int nentry = nid < hipGridDim_x + ? (nid < nsh ? nid : nsh) + : (hipGridDim_x < nsh ? hipGridDim_x : nsh); + + // nentry = min( nid , nsh , gridDim.x ) + + // whole block reads global memory into shared memory: + + if (tid < nentry) { + const int offset = tid * reducer.length(); + + reducer.copy( + (reinterpret_cast(shmem)) + offset, + (reinterpret_cast(global_scratch_space)) + offset); + + for (int i = nentry + tid; i < static_cast(hipGridDim_x); + i += nentry) { + reducer.join((reinterpret_cast(shmem)) + offset, + (reinterpret_cast(global_scratch_space)) + + i * reducer.length()); + } + } + + __syncthreads(); // Wait for writes to be visible to block + + if (0 == wy) { + // Iterate to reduce shared memory to single warp fan-in size + + int constexpr warp_size = + ::Kokkos::Experimental::Impl::HIPTraits::WarpSize; + const int nreduce = warp_size < nentry ? warp_size : nentry; + + if (wx < nreduce && nreduce < nentry) { + for (int i = nreduce + wx; i < nentry; i += nreduce) { + reducer.join(((pointer_type)shmem) + wx, ((pointer_type)shmem) + i); + } + __threadfence_block(); // Wait for writes to be visible to warp + } + + // Start fan-in at power of two covering nentry + + for (int i = (1 << (warp_size - __clz(nreduce - 1))); (i >>= 1);) { + const int k = wx + i; + if (wx < i && k < nreduce) { + reducer.join((reinterpret_cast(shmem)) + wx, + (reinterpret_cast(shmem)) + k); + __threadfence_block(); // Wait for writes to be visible to warp + } + } + + if (0 == wx) { + reducer.copy(reducer.data(), reinterpret_cast(shmem)); + return 1; + } + } + return 0; + +#else + (void)reducer; + (void)global_scratch_flags; + (void)shmem; + (void)global_scratch_space; + (void)shmem_size; + return 0; +#endif + } + + //---------------------------------------- + // Private for the driver + + KOKKOS_INLINE_FUNCTION + HIPTeamMember(void* shared, const int shared_begin, const int shared_size, + void* scratch_level_1_ptr, const int scratch_level_1_size, + const int arg_league_rank, const int arg_league_size) + : m_team_reduce(shared), + m_team_shared(((char*)shared) + shared_begin, shared_size, + scratch_level_1_ptr, scratch_level_1_size), + m_team_reduce_size(shared_begin), + m_league_rank(arg_league_rank), + m_league_size(arg_league_size) {} + + public: + // Declare to avoid unused private member warnings which are trigger + // when SFINAE excludes the member function which uses these variables + // Making another class a friend also surpresses these warnings + bool impl_avoid_sfinae_warning() const noexcept { + return m_team_reduce_size > 0 && m_team_reduce != nullptr; + } +}; + +} // namespace Impl +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +template +struct TeamThreadRangeBoundariesStruct { + typedef iType index_type; + const HIPTeamMember& member; + const iType start; + const iType end; + + KOKKOS_INLINE_FUNCTION + TeamThreadRangeBoundariesStruct(const HIPTeamMember& thread_, iType count) + : member(thread_), start(0), end(count) {} + + KOKKOS_INLINE_FUNCTION + TeamThreadRangeBoundariesStruct(const HIPTeamMember& thread_, iType begin_, + iType end_) + : member(thread_), start(begin_), end(end_) {} +}; + +template +struct TeamVectorRangeBoundariesStruct { + typedef iType index_type; + const HIPTeamMember& member; + const iType start; + const iType end; + + KOKKOS_INLINE_FUNCTION + TeamVectorRangeBoundariesStruct(const HIPTeamMember& thread_, + const iType& count) + : member(thread_), start(0), end(count) {} + + KOKKOS_INLINE_FUNCTION + TeamVectorRangeBoundariesStruct(const HIPTeamMember& thread_, + const iType& begin_, const iType& end_) + : member(thread_), start(begin_), end(end_) {} +}; + +template +struct ThreadVectorRangeBoundariesStruct { + typedef iType index_type; + const index_type start; + const index_type end; + + KOKKOS_INLINE_FUNCTION + ThreadVectorRangeBoundariesStruct(const HIPTeamMember, index_type count) + : start(static_cast(0)), end(count) {} + + KOKKOS_INLINE_FUNCTION + ThreadVectorRangeBoundariesStruct(index_type count) + : start(static_cast(0)), end(count) {} + + KOKKOS_INLINE_FUNCTION + ThreadVectorRangeBoundariesStruct(const HIPTeamMember, index_type arg_begin, + index_type arg_end) + : start(arg_begin), end(arg_end) {} + + KOKKOS_INLINE_FUNCTION + ThreadVectorRangeBoundariesStruct(index_type arg_begin, index_type arg_end) + : start(arg_begin), end(arg_end) {} +}; + +} // namespace Impl + +template +KOKKOS_INLINE_FUNCTION + Impl::TeamThreadRangeBoundariesStruct + TeamThreadRange(const Impl::HIPTeamMember& thread, iType count) { + return Impl::TeamThreadRangeBoundariesStruct( + thread, count); +} + +template +KOKKOS_INLINE_FUNCTION Impl::TeamThreadRangeBoundariesStruct< + typename std::common_type::type, Impl::HIPTeamMember> +TeamThreadRange(const Impl::HIPTeamMember& thread, iType1 begin, iType2 end) { + typedef typename std::common_type::type iType; + return Impl::TeamThreadRangeBoundariesStruct( + thread, iType(begin), iType(end)); +} + +template +KOKKOS_INLINE_FUNCTION + Impl::TeamVectorRangeBoundariesStruct + TeamVectorRange(const Impl::HIPTeamMember& thread, const iType& count) { + return Impl::TeamVectorRangeBoundariesStruct( + thread, count); +} + +template +KOKKOS_INLINE_FUNCTION Impl::TeamVectorRangeBoundariesStruct< + typename std::common_type::type, Impl::HIPTeamMember> +TeamVectorRange(const Impl::HIPTeamMember& thread, const iType1& begin, + const iType2& end) { + typedef typename std::common_type::type iType; + return Impl::TeamVectorRangeBoundariesStruct( + thread, iType(begin), iType(end)); +} + +template +KOKKOS_INLINE_FUNCTION + Impl::ThreadVectorRangeBoundariesStruct + ThreadVectorRange(const Impl::HIPTeamMember& thread, iType count) { + return Impl::ThreadVectorRangeBoundariesStruct( + thread, count); +} + +template +KOKKOS_INLINE_FUNCTION + Impl::ThreadVectorRangeBoundariesStruct + ThreadVectorRange(const Impl::HIPTeamMember& thread, iType arg_begin, + iType arg_end) { + return Impl::ThreadVectorRangeBoundariesStruct( + thread, arg_begin, arg_end); +} + +KOKKOS_INLINE_FUNCTION +Impl::ThreadSingleStruct PerTeam( + const Impl::HIPTeamMember& thread) { + return Impl::ThreadSingleStruct(thread); +} + +KOKKOS_INLINE_FUNCTION +Impl::VectorSingleStruct PerThread( + const Impl::HIPTeamMember& thread) { + return Impl::VectorSingleStruct(thread); +} + +//---------------------------------------------------------------------------- + +/** \brief Inter-thread parallel_for. + * + * Executes closure(iType i) for each i=[0..N). + * + * The range [0..N) is mapped to all threads of the the calling thread team. + */ +template +KOKKOS_INLINE_FUNCTION void parallel_for( + const Impl::TeamThreadRangeBoundariesStruct& + loop_boundaries, + const Closure& closure) { +#ifdef __HIP_DEVICE_COMPILE__ + for (iType i = loop_boundaries.start + hipThreadIdx_y; + i < loop_boundaries.end; i += hipBlockDim_y) + closure(i); +#else + (void)loop_boundaries; + (void)closure; +#endif +} + +//---------------------------------------------------------------------------- + +/** \brief Inter-thread parallel_reduce with a reducer. + * + * Executes closure(iType i, ValueType & val) for each i=[0..N) + * + * The range [0..N) is mapped to all threads of the + * calling thread team and a summation of val is + * performed and put into result. + */ +template +KOKKOS_INLINE_FUNCTION + typename std::enable_if::value>::type + parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct< + iType, Impl::HIPTeamMember>& loop_boundaries, + const Closure& closure, const ReducerType& reducer) { +#ifdef __HIP_DEVICE_COMPILE__ + typename ReducerType::value_type value; + reducer.init(value); + + for (iType i = loop_boundaries.start + hipThreadIdx_y; + i < loop_boundaries.end; i += hipBlockDim_y) { + closure(i, value); + } + + loop_boundaries.member.team_reduce(reducer, value); +#else + (void)loop_boundaries; + (void)closure; + (void)reducer; +#endif +} + +/** \brief Inter-thread parallel_reduce assuming summation. + * + * Executes closure(iType i, ValueType & val) for each i=[0..N) + * + * The range [0..N) is mapped to all threads of the + * calling thread team and a summation of val is + * performed and put into result. + */ +template +KOKKOS_INLINE_FUNCTION + typename std::enable_if::value>::type + parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct< + iType, Impl::HIPTeamMember>& loop_boundaries, + const Closure& closure, ValueType& result) { +#ifdef __HIP_DEVICE_COMPILE__ + ValueType val; + Kokkos::Sum reducer(val); + + reducer.init(reducer.reference()); + + for (iType i = loop_boundaries.start + hipThreadIdx_y; + i < loop_boundaries.end; i += hipBlockDim_y) { + closure(i, val); + } + + loop_boundaries.member.team_reduce(reducer, val); + result = reducer.reference(); +#else + (void)loop_boundaries; + (void)closure; + (void)result; +#endif +} + +template +KOKKOS_INLINE_FUNCTION void parallel_for( + const Impl::TeamVectorRangeBoundariesStruct& + loop_boundaries, + const Closure& closure) { +#ifdef __HIP_DEVICE_COMPILE__ + for (iType i = loop_boundaries.start + hipThreadIdx_y * hipBlockDim_x + + hipThreadIdx_x; + i < loop_boundaries.end; i += hipBlockDim_y * hipBlockDim_x) + closure(i); +#else + (void)loop_boundaries; + (void)closure; +#endif +} + +template +KOKKOS_INLINE_FUNCTION + typename std::enable_if::value>::type + parallel_reduce(const Impl::TeamVectorRangeBoundariesStruct< + iType, Impl::HIPTeamMember>& loop_boundaries, + const Closure& closure, const ReducerType& reducer) { +#ifdef __HIP_DEVICE_COMPILE__ + typename ReducerType::value_type value; + reducer.init(value); + + for (iType i = loop_boundaries.start + hipThreadIdx_y * hipBlockDim_x + + hipThreadIdx_x; + i < loop_boundaries.end; i += hipBlockDim_y * hipBlockDim_x) { + closure(i, value); + } + + loop_boundaries.member.vector_reduce(reducer, value); + loop_boundaries.member.team_reduce(reducer, value); +#else + (void)loop_boundaries; + (void)closure; + (void)reducer; +#endif +} + +template +KOKKOS_INLINE_FUNCTION + typename std::enable_if::value>::type + parallel_reduce(const Impl::TeamVectorRangeBoundariesStruct< + iType, Impl::HIPTeamMember>& loop_boundaries, + const Closure& closure, ValueType& result) { +#ifdef __HIP_DEVICE_COMPILE__ + ValueType val; + Kokkos::Sum reducer(val); + + reducer.init(reducer.reference()); + + for (iType i = loop_boundaries.start + hipThreadIdx_y * hipBlockDim_x + + hipThreadIdx_x; + i < loop_boundaries.end; i += hipBlockDim_y * hipBlockDim_x) { + closure(i, val); + } + + loop_boundaries.member.vector_reduce(reducer); + loop_boundaries.member.team_reduce(reducer); + result = reducer.reference(); +#else + (void)loop_boundaries; + (void)closure; + (void)result; +#endif +} + +//---------------------------------------------------------------------------- + +/** \brief Intra-thread vector parallel_for. + * + * Executes closure(iType i) for each i=[0..N) + * + * The range [0..N) is mapped to all vector lanes of the the calling thread. + */ +template +KOKKOS_INLINE_FUNCTION void parallel_for( + const Impl::ThreadVectorRangeBoundariesStruct& + loop_boundaries, + const Closure& closure) { +#ifdef __HIP_DEVICE_COMPILE__ + for (iType i = loop_boundaries.start + hipThreadIdx_x; + i < loop_boundaries.end; i += hipBlockDim_x) { + closure(i); + } +#else + (void)loop_boundaries; + (void)closure; +#endif +} + +//---------------------------------------------------------------------------- + +/** \brief Intra-thread vector parallel_reduce. + * + * Calls closure(iType i, ValueType & val) for each i=[0..N). + * + * The range [0..N) is mapped to all vector lanes of + * the calling thread and a reduction of val is performed using += + * and output into result. + * + * The identity value for the += operator is assumed to be the default + * constructed value. + */ +template +KOKKOS_INLINE_FUNCTION + typename std::enable_if::value>::type + parallel_reduce(Impl::ThreadVectorRangeBoundariesStruct< + iType, Impl::HIPTeamMember> const& loop_boundaries, + Closure const& closure, ReducerType const& reducer) { +#ifdef __HIP_DEVICE_COMPILE__ + reducer.init(reducer.reference()); + + for (iType i = loop_boundaries.start + hipThreadIdx_x; + i < loop_boundaries.end; i += hipBlockDim_x) { + closure(i, reducer.reference()); + } + + Impl::HIPTeamMember::vector_reduce(reducer); +#else + (void)loop_boundaries; + (void)closure; + (void)reducer; +#endif +} + +/** \brief Intra-thread vector parallel_reduce. + * + * Calls closure(iType i, ValueType & val) for each i=[0..N). + * + * The range [0..N) is mapped to all vector lanes of + * the calling thread and a reduction of val is performed using += + * and output into result. + * + * The identity value for the += operator is assumed to be the default + * constructed value. + */ +template +KOKKOS_INLINE_FUNCTION + typename std::enable_if::value>::type + parallel_reduce(Impl::ThreadVectorRangeBoundariesStruct< + iType, Impl::HIPTeamMember> const& loop_boundaries, + Closure const& closure, ValueType& result) { +#ifdef __HIP_DEVICE_COMPILE__ + result = ValueType(); + + for (iType i = loop_boundaries.start + hipThreadIdx_x; + i < loop_boundaries.end; i += hipBlockDim_x) { + closure(i, result); + } + + Impl::HIPTeamMember::vector_reduce(Kokkos::Sum(result)); +#else + (void)loop_boundaries; + (void)closure; + (void)result; +#endif +} + +//---------------------------------------------------------------------------- + +/** \brief Intra-thread vector parallel exclusive prefix sum. + * + * Executes closure(iType i, ValueType & val, bool final) for each i=[0..N) + * + * The range [0..N) is mapped to all vector lanes in the + * thread and a scan operation is performed. + * The last call to closure has final == true. + */ +template +KOKKOS_INLINE_FUNCTION void parallel_scan( + const Impl::ThreadVectorRangeBoundariesStruct& + loop_boundaries, + const Closure& closure) { +#ifdef __HIP_DEVICE_COMPILE__ + // Extract value_type from closure + + using value_type = typename Kokkos::Impl::FunctorAnalysis< + Kokkos::Impl::FunctorPatternInterface::SCAN, void, Closure>::value_type; + + // Loop through boundaries by vector-length chunks + // must scan at each iteration + + value_type accum = 0; + + // All thread "lanes" must loop the same number of times. + // Determine an loop end for all thread "lanes." + // Requires: + // hipBlockDim_x is power of two and thus + // ( end % hipBlockDim_x ) == ( end & ( hipBlockDim_x - 1 ) ) + // 1 <= hipBlockDim_x <= HIPTraits::WarpSize + + int constexpr warp_size = ::Kokkos::Experimental::Impl::HIPTraits::WarpSize; + const int mask = hipBlockDim_x - 1; + const unsigned active_mask = + blockDim.x == warp_size + ? 0xffffffff + : ((1 << hipBlockDim_x) - 1) + << (hipThreadIdx_y % (warp_size / hipBlockDim_x)) * + hipBlockDim_x; + const int rem = loop_boundaries.end & mask; // == end % hipBlockDim_x + const int end = loop_boundaries.end + (rem ? hipBlockDim_x - rem : 0); + + for (int i = hipThreadIdx_x; i < end; i += hipBlockDim_x) { + value_type val = 0; + + // First acquire per-lane contributions: + if (i < loop_boundaries.end) closure(i, val, false); + + value_type sval = val; + + // Bottom up inclusive scan in triangular pattern + // where each HIP thread is the root of a reduction tree + // from the zeroth "lane" to itself. + // [t] += [t-1] if t >= 1 + // [t] += [t-2] if t >= 2 + // [t] += [t-4] if t >= 4 + // ... + + for (int j = 1; j < static_cast(hipBlockDim_x); j <<= 1) { + value_type tmp = 0; + ::Kokkos::Experimental::Impl::in_place_shfl_up( + tmp, sval, j, hipBlockDim_x, active_mask); + if (j <= static_cast(hipThreadIdx_x)) { + sval += tmp; + } + } + + // Include accumulation and remove value for exclusive scan: + val = accum + sval - val; + + // Provide exclusive scan value: + if (i < loop_boundaries.end) closure(i, val, true); + + // Accumulate the last value in the inclusive scan: + ::Kokkos::Experimental::Impl::in_place_shfl(sval, sval, mask, blockDim.x, + active_mask); + + accum += sval; + } +#else + (void)loop_boundaries; + (void)closure; +#endif +} + +} // namespace Kokkos + +namespace Kokkos { + +template +KOKKOS_INLINE_FUNCTION void single( + const Impl::VectorSingleStruct&, + const FunctorType& lambda) { +#ifdef __HIP_DEVICE_COMPILE__ + if (hipThreadIdx_x == 0) lambda(); +#else + (void)lambda; +#endif +} + +template +KOKKOS_INLINE_FUNCTION void single( + const Impl::ThreadSingleStruct&, + const FunctorType& lambda) { +#ifdef __HIP_DEVICE_COMPILE__ + if (hipThreadIdx_x == 0 && hipThreadIdx_y == 0) lambda(); +#else + (void)lambda; +#endif +} + +template +KOKKOS_INLINE_FUNCTION void single( + const Impl::VectorSingleStruct&, + const FunctorType& lambda, ValueType& val) { +#ifdef __HIP_DEVICE_COMPILE__ + int constexpr warp_size = ::Kokkos::Experimental::Impl::HIPTraits::WarpSize; + if (hipThreadIdx_x == 0) lambda(val); + unsigned mask = hipBlockDim_x == warp_size + ? 0xffffffff + : ((1 << hipBlockDim_x) - 1) + << ((hipThreadIdx_y % (warp_size / hipBlockDim_x)) * + hipBlockDim_x); + ::Kokkos::Experimental::Impl::in_place_shfl(val, val, 0, hipBlockDim_x, mask); +#else + (void)lambda; + (void)val; +#endif +} + +template +KOKKOS_INLINE_FUNCTION void single( + const Impl::ThreadSingleStruct& single_struct, + const FunctorType& lambda, ValueType& val) { + (void)single_struct; + (void)lambda; + (void)val; +#ifdef __HIP_DEVICE_COMPILE__ + if (hipThreadIdx_x == 0 && hipThreadIdx_y == 0) { + lambda(val); + } + single_struct.team_member.team_broadcast(val, 0); +#else + (void)single_struct; + (void)lambda; + (void)val; +#endif +} + +} // namespace Kokkos + +#endif /* defined( __HIPCC__ ) */ + +#endif /* #ifndef KOKKOS_CUDA_TEAM_HPP */ diff --git a/core/src/HIP/Kokkos_HIP_Vectorization.hpp b/core/src/HIP/Kokkos_HIP_Vectorization.hpp new file mode 100644 index 00000000000..58b5abb2eef --- /dev/null +++ b/core/src/HIP/Kokkos_HIP_Vectorization.hpp @@ -0,0 +1,160 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_HIP_VECTORIZATION_HPP +#define KOKKOS_HIP_VECTORIZATION_HPP + +#include +#include + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +// Include all lanes +constexpr unsigned shfl_all_mask = 0xffffffff; + +//---------------------------------------------------------------------------- +// Shuffle operations require input to be a register (stack) variable + +// Derived implements do_shfl_op(unsigned mask, T& in, int lane, int width), +// which turns in to one of KOKKOS_IMPL_HIP_SHFL(_UP_|_DOWN_|_)MASK +// Since the logic with respect to value sizes, etc., is the same everywhere, +// put it all in one place. +template +struct in_place_shfl_op { + // CRTP boilerplate + __device__ KOKKOS_IMPL_FORCEINLINE const Derived& self() const noexcept { + return *static_cast(this); + } + + // sizeof(Scalar) == sizeof(int) case + template + // requires _assignable_from_bits + __device__ inline typename std::enable_if::type + operator()(Scalar& out, Scalar const& in, int lane_or_delta, int width, + unsigned mask = shfl_all_mask) const noexcept { + //------------------------------------------------ + reinterpret_cast(out) = self().do_shfl_op( + mask, reinterpret_cast(in), lane_or_delta, width); + //------------------------------------------------ + } + + template + __device__ inline + typename std::enable_if::type + operator()(Scalar& out, Scalar const& in, int lane_or_delta, int width, + unsigned mask = shfl_all_mask) const noexcept { + //------------------------------------------------ + reinterpret_cast(out) = self().do_shfl_op( + mask, *reinterpret_cast(&in), lane_or_delta, width); + //------------------------------------------------ + } + + // sizeof(Scalar) > sizeof(double) case + template + __device__ inline + typename std::enable_if<(sizeof(Scalar) > sizeof(double))>::type + operator()(Scalar& out, const Scalar& val, int lane_or_delta, int width, + unsigned mask = shfl_all_mask) const noexcept { + using shuffle_as_t = int; + enum : int { N = sizeof(Scalar) / sizeof(shuffle_as_t) }; + + for (int i = 0; i < N; ++i) { + reinterpret_cast(&out)[i] = self().do_shfl_op( + mask, reinterpret_cast(&val)[i], lane_or_delta, + width); + } + } +}; + +struct in_place_shfl_fn : in_place_shfl_op { + template + __device__ KOKKOS_IMPL_FORCEINLINE T do_shfl_op(unsigned mask, T& val, + int lane, int width) const + noexcept { + return KOKKOS_IMPL_HIP_SHFL_MASK(mask, val, lane, width); + } +}; + +template +__device__ KOKKOS_IMPL_FORCEINLINE void in_place_shfl(Args&&... args) noexcept { + in_place_shfl_fn{}((Args &&) args...); +} + +struct in_place_shfl_up_fn : in_place_shfl_op { + template + __device__ KOKKOS_IMPL_FORCEINLINE T do_shfl_op(unsigned mask, T& val, + int lane, int width) const + noexcept { + return KOKKOS_IMPL_HIP_SHFL_UP_MASK(mask, val, lane, width); + } +}; + +template +__device__ KOKKOS_IMPL_FORCEINLINE void in_place_shfl_up( + Args&&... args) noexcept { + in_place_shfl_up_fn{}((Args &&) args...); +} + +struct in_place_shfl_down_fn : in_place_shfl_op { + template + __device__ KOKKOS_IMPL_FORCEINLINE T do_shfl_op(unsigned mask, T& val, + int lane, int width) const + noexcept { + return KOKKOS_IMPL_HIP_SHFL_DOWN_MASK(mask, val, lane, width); + } +}; + +template +__device__ KOKKOS_IMPL_FORCEINLINE void in_place_shfl_down( + Args&&... args) noexcept { + in_place_shfl_down_fn{}((Args &&) args...); +} + +} // namespace Impl +} // namespace Experimental +} // namespace Kokkos + +#endif diff --git a/core/src/HPX/Kokkos_HPX.cpp b/core/src/HPX/Kokkos_HPX.cpp index 0c4cca70f8e..acbd1074fde 100644 --- a/core/src/HPX/Kokkos_HPX.cpp +++ b/core/src/HPX/Kokkos_HPX.cpp @@ -85,6 +85,9 @@ void HPX::impl_initialize(int thread_count) { char *argv_hpx[] = {name, nullptr}; hpx::start(nullptr, argc_hpx, argv_hpx, config); +#if HPX_VERSION_FULL < 0x010400 + // This has been fixed in HPX 1.4.0. + // // NOTE: Wait for runtime to start. hpx::start returns as soon as // possible, meaning some operations are not allowed immediately // after hpx::start. Notably, hpx::stop needs state_running. This @@ -94,6 +97,7 @@ void HPX::impl_initialize(int thread_count) { rt = hpx::get_runtime_ptr(); hpx::util::yield_while( [rt]() { return rt->get_state() < hpx::state_running; }); +#endif m_hpx_initialized = true; } diff --git a/core/src/KokkosExp_MDRangePolicy.hpp b/core/src/KokkosExp_MDRangePolicy.hpp index 7981c04b4f4..3195dbdedf0 100644 --- a/core/src/KokkosExp_MDRangePolicy.hpp +++ b/core/src/KokkosExp_MDRangePolicy.hpp @@ -63,6 +63,10 @@ #include #endif +#if defined(__HIPCC__) && defined(KOKKOS_ENABLE_HIP) +#include +#endif + namespace Kokkos { // ------------------------------------------------------------------ // @@ -79,7 +83,8 @@ enum class Iterate template struct default_outer_direction { using type = Iterate; -#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_ROCM) +#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_ROCM) || \ + defined(KOKKOS_ENABLE_HIP) static constexpr Iterate value = Iterate::Left; #else static constexpr Iterate value = Iterate::Right; @@ -89,7 +94,8 @@ struct default_outer_direction { template struct default_inner_direction { using type = Iterate; -#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_ROCM) +#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_ROCM) || \ + defined(KOKKOS_ENABLE_HIP) static constexpr Iterate value = Iterate::Left; #else static constexpr Iterate value = Iterate::Right; @@ -256,6 +262,10 @@ struct MDRangePolicy : public Kokkos::Impl::PolicyTraits { #if defined(KOKKOS_ENABLE_ROCM) && !std::is_same::value +#endif +#if defined(KOKKOS_ENABLE_HIP) + && !std::is_same::value #endif ) { index_type span; @@ -275,7 +285,7 @@ struct MDRangePolicy : public Kokkos::Impl::PolicyTraits { m_prod_tile_dims *= m_tile[i]; } } -#if defined(KOKKOS_ENABLE_CUDA) +#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP) else // Cuda { index_type span; @@ -287,15 +297,21 @@ struct MDRangePolicy : public Kokkos::Impl::PolicyTraits { rank_start = rank - 1; rank_end = -1; } + bool is_cuda_exec_space = +#if defined(KOKKOS_ENABLE_CUDA) + std::is_same::value; +#else + false; +#endif for (int i = rank_start; i != rank_end; i += increment) { span = m_upper[i] - m_lower[i]; if (m_tile[i] <= 0) { - // TODO: determine what is a good default tile size for cuda + // TODO: determine what is a good default tile size for cuda and HIP // may be rank dependent if (((int)inner_direction == (int)Right && (i < rank - 1)) || ((int)inner_direction == (int)Left && (i > 0))) { if (m_prod_tile_dims < 256) { - m_tile[i] = 2; + m_tile[i] = (is_cuda_exec_space) ? 2 : 4; } else { m_tile[i] = 1; } @@ -311,13 +327,18 @@ struct MDRangePolicy : public Kokkos::Impl::PolicyTraits { if (m_prod_tile_dims > 1024) { // Match Cuda restriction for ParallelReduce; 1024,1024,64 // max per dim (Kepler), but product num_threads < 1024 - printf(" Tile dimensions exceed Cuda limits\n"); - Kokkos::abort( - " Cuda ExecSpace Error: MDRange tile dims exceed maximum number of " - "threads per block - choose smaller tile dims"); - // Kokkos::Impl::throw_runtime_exception( " Cuda ExecSpace Error: - // MDRange tile dims exceed maximum number of threads per block - choose - // smaller tile dims"); + if (is_cuda_exec_space) { + printf(" Tile dimensions exceed Cuda limits\n"); + Kokkos::abort( + " Cuda ExecSpace Error: MDRange tile dims exceed maximum number " + "of " + "threads per block - choose smaller tile dims"); + } else { + printf(" Tile dimensions exceed HIP limits\n"); + Kokkos::abort( + "HIP ExecSpace Error: MDRange tile dims exceed maximum number of " + "threads per block - choose smaller tile dims"); + } } } #endif @@ -396,6 +417,10 @@ struct MDRangePolicy : public Kokkos::Impl::PolicyTraits { #if defined(KOKKOS_ENABLE_ROCM) && !std::is_same::value +#endif +#if defined(KOKKOS_ENABLE_HIP) + && !std::is_same::value #endif ) { index_type span; @@ -415,8 +440,8 @@ struct MDRangePolicy : public Kokkos::Impl::PolicyTraits { m_prod_tile_dims *= m_tile[i]; } } -#if defined(KOKKOS_ENABLE_CUDA) - else // Cuda +#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP) + else // Cuda or HIP { index_type span; int increment = 1; @@ -451,13 +476,17 @@ struct MDRangePolicy : public Kokkos::Impl::PolicyTraits { if (m_prod_tile_dims > 1024) { // Match Cuda restriction for ParallelReduce; 1024,1024,64 // max per dim (Kepler), but product num_threads < 1024 +#if defined(KOKKOS_ENABLE_CUDA) printf(" Tile dimensions exceed Cuda limits\n"); Kokkos::abort( " Cuda ExecSpace Error: MDRange tile dims exceed maximum number of " "threads per block - choose smaller tile dims"); - // Kokkos::Impl::throw_runtime_exception( " Cuda ExecSpace Error: - // MDRange tile dims exceed maximum number of threads per block - choose - // smaller tile dims"); +#else + printf(" Tile dimensions exceed HIP limits\n"); + Kokkos::abort( + " HIP ExecSpace Error: MDRange tile dims exceed maximum number of " + "threads per block - choose smaller tile dims"); +#endif } } #endif diff --git a/core/src/Kokkos_Array.hpp b/core/src/Kokkos_Array.hpp index 88e7883cb97..d830616bd67 100644 --- a/core/src/Kokkos_Array.hpp +++ b/core/src/Kokkos_Array.hpp @@ -159,22 +159,21 @@ struct Array { return &m_internal_implementation_private_member_data[0]; } -#ifdef KOKKOS_IMPL_ROCM_CLANG_WORKAROUND +#ifdef KOKKOS_IMPL_HIP_CLANG_WORKAROUND // Do not default unless move and move-assignment are also defined - KOKKOS_INLINE_FUNCTION - ~Array() = default; - Array() = default; - Array(const Array&) = default; - Array& operator=(const Array&) = default; + KOKKOS_DEFAULTED_FUNCTION ~Array() = default; + KOKKOS_DEFAULTED_FUNCTION Array() = default; + KOKKOS_DEFAULTED_FUNCTION Array(const Array&) = default; + KOKKOS_DEFAULTED_FUNCTION Array& operator=(const Array&) = default; // Some supported compilers are not sufficiently C++11 compliant // for default move constructor and move assignment operator. - Array(Array&&) = default; - Array& operator=(Array&&) = default; + KOKKOS_DEFAULTED_FUNCTION Array(Array&&) = default; + KOKKOS_DEFAULTED_FUNCTION Array& operator=(Array&&) = default; KOKKOS_INLINE_FUNCTION Array(const std::initializer_list& vals) { - for (int i = 0; i < N; i++) { + for (size_t i = 0; i < N; i++) { m_internal_implementation_private_member_data[i] = vals.begin()[i]; } } @@ -217,17 +216,10 @@ struct Array { KOKKOS_INLINE_FUNCTION pointer data() { return pointer(0); } KOKKOS_INLINE_FUNCTION const_pointer data() const { return const_pointer(0); } -#ifdef KOKKOS_CUDA_9_DEFAULTED_BUG_WORKAROUND - KOKKOS_INLINE_FUNCTION ~Array() {} - KOKKOS_INLINE_FUNCTION Array() {} - KOKKOS_INLINE_FUNCTION Array(const Array&) {} - KOKKOS_INLINE_FUNCTION Array& operator=(const Array&) {} -#else - KOKKOS_INLINE_FUNCTION ~Array() = default; - KOKKOS_INLINE_FUNCTION Array() = default; - KOKKOS_INLINE_FUNCTION Array(const Array&) = default; - KOKKOS_INLINE_FUNCTION Array& operator=(const Array&) = default; -#endif + KOKKOS_DEFAULTED_FUNCTION ~Array() = default; + KOKKOS_DEFAULTED_FUNCTION Array() = default; + KOKKOS_DEFAULTED_FUNCTION Array(const Array&) = default; + KOKKOS_DEFAULTED_FUNCTION Array& operator=(const Array&) = default; // Some supported compilers are not sufficiently C++11 compliant // for default move constructor and move assignment operator. @@ -281,13 +273,9 @@ struct Array::contiguous> { KOKKOS_INLINE_FUNCTION pointer data() { return m_elem; } KOKKOS_INLINE_FUNCTION const_pointer data() const { return m_elem; } -#ifdef KOKKOS_CUDA_9_DEFAULTED_BUG_WORKAROUND - KOKKOS_INLINE_FUNCTION ~Array() {} -#else - KOKKOS_INLINE_FUNCTION ~Array() = default; -#endif - Array() = delete; - Array(const Array& rhs) = delete; + KOKKOS_DEFAULTED_FUNCTION ~Array() = default; + KOKKOS_INLINE_FUNCTION_DELETED Array() = delete; + KOKKOS_INLINE_FUNCTION_DELETED Array(const Array& rhs) = delete; // Some supported compilers are not sufficiently C++11 compliant // for default move constructor and move assignment operator. @@ -354,13 +342,9 @@ struct Array::strided> { KOKKOS_INLINE_FUNCTION pointer data() { return m_elem; } KOKKOS_INLINE_FUNCTION const_pointer data() const { return m_elem; } -#ifdef KOKKOS_CUDA_9_DEFAULTED_BUG_WORKAROUND - KOKKOS_INLINE_FUNCTION ~Array() {} -#else - KOKKOS_INLINE_FUNCTION ~Array() = default; -#endif - Array() = delete; - Array(const Array&) = delete; + KOKKOS_DEFAULTED_FUNCTION ~Array() = default; + KOKKOS_INLINE_FUNCTION_DELETED Array() = delete; + KOKKOS_INLINE_FUNCTION_DELETED Array(const Array&) = delete; // Some supported compilers are not sufficiently C++11 compliant // for default move constructor and move assignment operator. diff --git a/core/src/Kokkos_Atomic.hpp b/core/src/Kokkos_Atomic.hpp index c4f7fa3ec14..55139d07b94 100644 --- a/core/src/Kokkos_Atomic.hpp +++ b/core/src/Kokkos_Atomic.hpp @@ -86,6 +86,10 @@ #define KOKKOS_ENABLE_ROCM_ATOMICS +#elif defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HIP_GPU) + +#define KOKKOS_ENABLE_HIP_ATOMICS + #endif #if !defined(KOKKOS_ENABLE_GNU_ATOMICS) && \ @@ -178,11 +182,13 @@ extern KOKKOS_INLINE_FUNCTION void unlock_address_rocm_space(void* ptr); } // namespace Kokkos #include #endif +#if defined(KOKKOS_ENABLE_HIP) +#include +#endif #ifdef _WIN32 #include "impl/Kokkos_Atomic_Windows.hpp" #else - //---------------------------------------------------------------------------- // Atomic Assembly // @@ -209,6 +215,11 @@ extern KOKKOS_INLINE_FUNCTION void unlock_address_rocm_space(void* ptr); #include "impl/Kokkos_Atomic_Compare_Exchange_Strong.hpp" +#endif //_WIN32 + +#include "impl/Kokkos_Atomic_Generic.hpp" + +#ifndef _WIN32 //---------------------------------------------------------------------------- // Atomic fetch and add // @@ -262,6 +273,18 @@ extern KOKKOS_INLINE_FUNCTION void unlock_address_rocm_space(void* ptr); // { T tmp = *dest ; *dest = tmp & val ; return tmp ; } #include "impl/Kokkos_Atomic_Fetch_And.hpp" + +//---------------------------------------------------------------------------- +// Atomic MinMax +// +// template +// T atomic_min(volatile T* const dest, const T val) +// { T tmp = *dest ; *dest = min(*dest, val); return tmp ; } +// template +// T atomic_max(volatile T* const dest, const T val) +// { T tmp = *dest ; *dest = max(*dest, val); return tmp ; } + +#include "impl/Kokkos_Atomic_MinMax.hpp" #endif /*Not _WIN32*/ //---------------------------------------------------------------------------- @@ -284,16 +307,14 @@ extern KOKKOS_INLINE_FUNCTION void unlock_address_rocm_space(void* ptr); #include "impl/Kokkos_Volatile_Load.hpp" -#ifndef _WIN32 -#include "impl/Kokkos_Atomic_Generic.hpp" -#endif - //---------------------------------------------------------------------------- // Provide atomic loads and stores with memory order semantics #include "impl/Kokkos_Atomic_Load.hpp" #include "impl/Kokkos_Atomic_Store.hpp" +// Generic functions using the above defined functions +#include "impl/Kokkos_Atomic_Generic_Secondary.hpp" //---------------------------------------------------------------------------- // This atomic-style macro should be an inlined function, not a macro diff --git a/core/src/Kokkos_Complex.hpp b/core/src/Kokkos_Complex.hpp index a9af073b419..fec5d62c3dd 100644 --- a/core/src/Kokkos_Complex.hpp +++ b/core/src/Kokkos_Complex.hpp @@ -73,14 +73,14 @@ class using value_type = RealType; //! Default constructor (initializes both real and imaginary parts to zero). - KOKKOS_INLINE_FUNCTION + KOKKOS_DEFAULTED_FUNCTION complex() noexcept = default; //! Copy constructor. - KOKKOS_INLINE_FUNCTION + KOKKOS_DEFAULTED_FUNCTION complex(const complex&) noexcept = default; - KOKKOS_INLINE_FUNCTION + KOKKOS_DEFAULTED_FUNCTION complex& operator=(const complex&) noexcept = default; /// \brief Conversion constructor from compatible RType @@ -219,7 +219,10 @@ class // Scale (by the "1-norm" of y) to avoid unwarranted overflow. // If the real part is +/-Inf and the imaginary part is -/+Inf, // this won't change the result. - const RealType s = std::fabs(y.real()) + std::fabs(y.imag()); +#if !defined(__HIP_DEVICE_COMPILE__) // FIXME_HIP + using std::fabs; +#endif + const RealType s = fabs(y.real()) + fabs(y.imag()); // If s is 0, then y is zero, so x/y == real(x)/0 + i*imag(x)/0. // In that case, the relation x/y == (x/s) / (y/s) doesn't hold, @@ -247,7 +250,10 @@ class // Scale (by the "1-norm" of y) to avoid unwarranted overflow. // If the real part is +/-Inf and the imaginary part is -/+Inf, // this won't change the result. - const RealType s = std::fabs(y.real()) + std::fabs(y.imag()); +#if !defined(__HIP_DEVICE_COMPILE__) // FIXME_HIP + using std::fabs; +#endif + const RealType s = fabs(y.real()) + fabs(y.imag()); // If s is 0, then y is zero, so x/y == real(x)/0 + i*imag(x)/0. // In that case, the relation x/y == (x/s) / (y/s) doesn't hold, @@ -298,8 +304,21 @@ class /// complex& . See Kokkos Issue #177 for the /// explanation. In practice, this means that you should not chain /// assignments with volatile lvalues. - KOKKOS_INLINE_FUNCTION void operator=( - const complex& src) volatile noexcept { + // + // Templated, so as not to be a copy assignment operator (Kokkos issue #2577) + // Intended to behave as + // void operator=(const complex&) volatile noexcept + // + // Use cases: + // complex r; + // const complex cr; + // volatile complex vl; + // vl = r; + // vl = cr; + template ::value, + int>::type = 0> + KOKKOS_INLINE_FUNCTION void operator=(const Complex& src) volatile noexcept { re_ = src.re_; im_ = src.im_; // We deliberately do not return anything here. See explanation @@ -308,16 +327,45 @@ class //! Assignment operator, volatile LHS and volatile RHS // TODO Should this return void like the other volatile assignment operators? + // + // Templated, so as not to be a copy assignment operator (Kokkos issue #2577) + // Intended to behave as + // volatile complex& operator=(const volatile complex&) volatile noexcept + // + // Use cases: + // volatile complex vr; + // const volatile complex cvr; + // volatile complex vl; + // vl = vr; + // vl = cvr; + template ::value, + int>::type = 0> KOKKOS_INLINE_FUNCTION volatile complex& operator=( - const volatile complex& src) volatile noexcept { + const volatile Complex& src) volatile noexcept { re_ = src.re_; im_ = src.im_; return *this; } //! Assignment operator, volatile RHS and non-volatile LHS + // + // Templated, so as not to be a copy assignment operator (Kokkos issue #2577) + // Intended to behave as + // complex& operator=(const volatile complex&) noexcept + // + // Use cases: + // volatile complex vr; + // const volatile complex cvr; + // complex l; + // l = vr; + // l = cvr; + // + template ::value, + int>::type = 0> KOKKOS_INLINE_FUNCTION complex& operator=( - const volatile complex& src) noexcept { + const volatile Complex& src) noexcept { re_ = src.re_; im_ = src.im_; return *this; @@ -650,7 +698,8 @@ KOKKOS_INLINE_FUNCTION RealType real(const complex& x) noexcept { //! Absolute value (magnitude) of a complex number. template KOKKOS_INLINE_FUNCTION RealType abs(const complex& x) { -#ifndef __CUDA_ARCH__ +#if !defined(__CUDA_ARCH__) && \ + !defined(__HIP_DEVICE_COMPILE__) // FIXME_CUDA FIXME_HIP using std::hypot; #endif return hypot(x.real(), x.imag()); @@ -660,20 +709,32 @@ KOKKOS_INLINE_FUNCTION RealType abs(const complex& x) { template KOKKOS_INLINE_FUNCTION Kokkos::complex pow(const complex& x, const RealType& e) { - RealType r = abs(x); - RealType phi = std::atan(x.imag() / x.real()); - return std::pow(r, e) * - Kokkos::complex(std::cos(phi * e), std::sin(phi * e)); + RealType r = abs(x); +#if !defined(__HIP_DEVICE_COMPILE__) // FIXME_HIP + using std::atan; + using std::cos; + using std::pow; + using std::sin; +#endif + using ::pow; + RealType phi = atan(x.imag() / x.real()); + return pow(r, e) * Kokkos::complex(cos(phi * e), sin(phi * e)); } //! Square root of a complex number. template KOKKOS_INLINE_FUNCTION Kokkos::complex sqrt( const complex& x) { - RealType r = abs(x); - RealType phi = std::atan(x.imag() / x.real()); - return std::sqrt(r) * - Kokkos::complex(std::cos(phi * 0.5), std::sin(phi * 0.5)); + RealType r = abs(x); +#if !defined(__HIP_DEVICE_COMPILE__) // FIXME_HIP + using std::atan; + using std::cos; + using std::sin; + using std::sqrt; +#endif + using ::sqrt; + RealType phi = atan(x.imag() / x.real()); + return sqrt(r) * Kokkos::complex(cos(phi * 0.5), sin(phi * 0.5)); } //! Conjugate of a complex number. @@ -686,8 +747,14 @@ KOKKOS_INLINE_FUNCTION complex conj( //! Exponential of a complex number. template KOKKOS_INLINE_FUNCTION complex exp(const complex& x) { - return std::exp(x.real()) * - complex(std::cos(x.imag()), std::sin(x.imag())); +#if !defined(__HIP_DEVICE_COMPILE__) // FIXME_HIP + using std::cos; + using std::exp; + using std::sin; +#else + using ::exp; +#endif + return exp(x.real()) * complex(cos(x.imag()), sin(x.imag())); } /// This function cannot be called in a CUDA device function, @@ -720,9 +787,12 @@ KOKKOS_INLINE_FUNCTION // Scale (by the "1-norm" of y) to avoid unwarranted overflow. // If the real part is +/-Inf and the imaginary part is -/+Inf, // this won't change the result. +#if !defined(__HIP_DEVICE_COMPILE__) // FIXME_HIP + using std::fabs; +#endif typedef typename std::common_type::type common_real_type; - const common_real_type s = std::fabs(real(y)) + std::fabs(imag(y)); + const common_real_type s = fabs(real(y)) + fabs(imag(y)); // If s is 0, then y is zero, so x/y == real(x)/0 + i*imag(x)/0. // In that case, the relation x/y == (x/s) / (y/s) doesn't hold, diff --git a/core/src/Kokkos_Concepts.hpp b/core/src/Kokkos_Concepts.hpp index abfa88e1d2f..13d7925c127 100644 --- a/core/src/Kokkos_Concepts.hpp +++ b/core/src/Kokkos_Concepts.hpp @@ -153,19 +153,17 @@ namespace Kokkos { template \ struct have : std::false_type {}; \ template \ - struct have::type, \ - typename std::remove_cv::type>::type> \ + struct have::type> \ : std::true_type {}; \ template \ struct have::type, \ - typename std::remove_cv::type>::type> \ + typename std::is_base_of::type> \ : std::true_type {}; \ \ public: \ - enum { value = is_##CONCEPT::template have::value }; \ + static constexpr bool value = \ + is_##CONCEPT::template have::type>::value; \ + constexpr operator bool() const noexcept { return value; } \ }; // Public concept: @@ -205,6 +203,43 @@ KOKKOS_IMPL_IS_CONCEPT(host_thread_team_member) } // namespace Kokkos +namespace Kokkos { +namespace Impl { + +template +class has_member_team_shmem_size { + template + static int32_t test_for_member(decltype(&T::team_shmem_size)) { + return int32_t(0); + } + template + static int64_t test_for_member(...) { + return int64_t(0); + } + + public: + constexpr static bool value = + sizeof(test_for_member(0)) == sizeof(int32_t); +}; + +template +class has_member_shmem_size { + template + static int32_t test_for_member(decltype(&T::shmem_size_me)) { + return int32_t(0); + } + template + static int64_t test_for_member(...) { + return int64_t(0); + } + + public: + constexpr static bool value = + sizeof(test_for_member(0)) == sizeof(int32_t); +}; + +} // namespace Impl +} // namespace Kokkos //---------------------------------------------------------------------------- namespace Kokkos { @@ -220,6 +255,23 @@ struct Device { typedef Device device_type; }; +namespace Impl { + +template +struct is_device_helper : std::false_type {}; + +template +struct is_device_helper> : std::true_type { +}; + +} // namespace Impl + +template +using is_device = + typename Impl::is_device_helper::type>::type; + +//---------------------------------------------------------------------------- + template struct is_space { private: @@ -259,12 +311,17 @@ struct is_space { typedef typename U::device_type space; }; - typedef typename is_space::template exe is_exe; - typedef typename is_space::template mem is_mem; - typedef typename is_space::template dev is_dev; + typedef typename is_space::template exe::type> + is_exe; + typedef typename is_space::template mem::type> + is_mem; + typedef typename is_space::template dev::type> + is_dev; public: - enum { value = is_exe::value || is_mem::value || is_dev::value }; + static constexpr bool value = is_exe::value || is_mem::value || is_dev::value; + + constexpr operator bool() const noexcept { return value; } typedef typename is_exe::space execution_space; typedef typename is_mem::space memory_space; @@ -300,11 +357,11 @@ struct is_space { typedef typename std::conditional< std::is_same::value && std::is_same::value, - T, Kokkos::Device >::type + T, Kokkos::Device>::type host_mirror_space; }; -// For backward compatiblity +// For backward compatibility namespace Impl { @@ -426,7 +483,7 @@ struct SpaceAccessibility { std::is_same::value || !exe_access::accessible, AccessSpace, - Kokkos::Device >::type + Kokkos::Device>::type space; }; diff --git a/core/src/Kokkos_CopyViews.hpp b/core/src/Kokkos_CopyViews.hpp index e64b434d020..810b7127333 100644 --- a/core/src/Kokkos_CopyViews.hpp +++ b/core/src/Kokkos_CopyViews.hpp @@ -84,7 +84,7 @@ struct ViewFill { ViewType a; typename ViewType::const_value_type val; typedef typename ViewType::non_const_value_type ST; - ViewFill(const ViewType&, const ST&); + ViewFill(const ViewType&, const ST&, const ExecSpace&); }; template { ViewType a; typename ViewType::const_value_type val; typedef typename ViewType::non_const_value_type ST; - ViewFill(const ViewType&, const ST&); + ViewFill(const ViewType&, const ST&, const ExecSpace&); KOKKOS_INLINE_FUNCTION void operator()(const iType&) const; }; @@ -104,7 +104,7 @@ struct ViewFill { ViewType a; typename ViewType::const_value_type val; typedef typename ViewType::non_const_value_type ST; - ViewFill(const ViewType&, const ST&); + ViewFill(const ViewType&, const ST&, const ExecSpace&); KOKKOS_INLINE_FUNCTION void operator()(const iType&, const iType&) const; }; @@ -115,7 +115,7 @@ struct ViewFill { ViewType a; typename ViewType::const_value_type val; typedef typename ViewType::non_const_value_type ST; - ViewFill(const ViewType&, const ST&); + ViewFill(const ViewType&, const ST&, const ExecSpace&); KOKKOS_INLINE_FUNCTION void operator()(const iType&, const iType&, const iType&) const; }; @@ -126,7 +126,7 @@ struct ViewFill { ViewType a; typename ViewType::const_value_type val; typedef typename ViewType::non_const_value_type ST; - ViewFill(const ViewType&, const ST&); + ViewFill(const ViewType&, const ST&, const ExecSpace&); KOKKOS_INLINE_FUNCTION void operator()(const iType&, const iType&, const iType&, const iType&) const; }; @@ -137,7 +137,7 @@ struct ViewFill { ViewType a; typename ViewType::const_value_type val; typedef typename ViewType::non_const_value_type ST; - ViewFill(const ViewType&, const ST&); + ViewFill(const ViewType&, const ST&, const ExecSpace&); KOKKOS_INLINE_FUNCTION void operator()(const iType&, const iType&, const iType&, const iType&, const iType&) const; @@ -149,7 +149,7 @@ struct ViewFill { ViewType a; typename ViewType::const_value_type val; typedef typename ViewType::non_const_value_type ST; - ViewFill(const ViewType&, const ST&); + ViewFill(const ViewType&, const ST&, const ExecSpace&); KOKKOS_INLINE_FUNCTION void operator()(const iType&, const iType&, const iType&, const iType&, const iType&, const iType&) const; @@ -161,7 +161,7 @@ struct ViewFill { ViewType a; typename ViewType::const_value_type val; typedef typename ViewType::non_const_value_type ST; - ViewFill(const ViewType&, const ST&); + ViewFill(const ViewType&, const ST&, const ExecSpace&); KOKKOS_INLINE_FUNCTION void operator()(const iType&, const iType&, const iType&, const iType&, const iType&, const iType&, const iType&) const; @@ -173,7 +173,7 @@ struct ViewFill { ViewType a; typename ViewType::const_value_type val; typedef typename ViewType::non_const_value_type ST; - ViewFill(const ViewType&, const ST&); + ViewFill(const ViewType&, const ST&, const ExecSpace&); KOKKOS_INLINE_FUNCTION void operator()(const iType&, const iType&, const iType&, const iType&, const iType&, const iType&, const iType&, const iType&) const; @@ -183,9 +183,9 @@ template struct ViewFill { typedef typename ViewType::non_const_value_type ST; - ViewFill(const ViewType& a, const ST& val) { - Kokkos::Impl::DeepCopy( - a.data(), &val, sizeof(ST)); + ViewFill(const ViewType& a, const ST& val, const ExecSpace& space) { + Kokkos::Impl::DeepCopy(space, a.data(), &val, sizeof(ST)); } }; @@ -196,12 +196,11 @@ struct ViewFill> policy_type; - ViewFill(const ViewType& a_, typename ViewType::const_value_type& val_) + ViewFill(const ViewType& a_, typename ViewType::const_value_type& val_, + const ExecSpace& space) : a(a_), val(val_) { - ExecSpace().fence(); - Kokkos::parallel_for("Kokkos::ViewFill-1D", policy_type(0, a.extent(0)), - *this); - ExecSpace().fence(); + Kokkos::parallel_for("Kokkos::ViewFill-1D", + policy_type(space, 0, a.extent(0)), *this); } KOKKOS_INLINE_FUNCTION @@ -221,13 +220,12 @@ struct ViewFill> policy_type; - ViewFill(const ViewType& a_, typename ViewType::const_value_type& val_) + ViewFill(const ViewType& a_, typename ViewType::const_value_type& val_, + const ExecSpace& space) : a(a_), val(val_) { - ExecSpace().fence(); Kokkos::parallel_for("Kokkos::ViewFill-2D", - policy_type({0, 0}, {a.extent(0), a.extent(1)}), + policy_type(space, {0, 0}, {a.extent(0), a.extent(1)}), *this); - ExecSpace().fence(); } KOKKOS_INLINE_FUNCTION @@ -247,13 +245,13 @@ struct ViewFill> policy_type; - ViewFill(const ViewType& a_, typename ViewType::const_value_type& val_) + ViewFill(const ViewType& a_, typename ViewType::const_value_type& val_, + const ExecSpace& space) : a(a_), val(val_) { - ExecSpace().fence(); Kokkos::parallel_for( "Kokkos::ViewFill-3D", - policy_type({0, 0, 0}, {a.extent(0), a.extent(1), a.extent(2)}), *this); - ExecSpace().fence(); + policy_type(space, {0, 0, 0}, {a.extent(0), a.extent(1), a.extent(2)}), + *this); } KOKKOS_INLINE_FUNCTION @@ -275,14 +273,14 @@ struct ViewFill> policy_type; - ViewFill(const ViewType& a_, typename ViewType::const_value_type& val_) + ViewFill(const ViewType& a_, typename ViewType::const_value_type& val_, + const ExecSpace& space) : a(a_), val(val_) { - ExecSpace().fence(); - Kokkos::parallel_for("Kokkos::ViewFill-4D", - policy_type({0, 0, 0, 0}, {a.extent(0), a.extent(1), - a.extent(2), a.extent(3)}), - *this); - ExecSpace().fence(); + Kokkos::parallel_for( + "Kokkos::ViewFill-4D", + policy_type(space, {0, 0, 0, 0}, + {a.extent(0), a.extent(1), a.extent(2), a.extent(3)}), + *this); } KOKKOS_INLINE_FUNCTION @@ -305,15 +303,14 @@ struct ViewFill> policy_type; - ViewFill(const ViewType& a_, typename ViewType::const_value_type& val_) + ViewFill(const ViewType& a_, typename ViewType::const_value_type& val_, + const ExecSpace& space) : a(a_), val(val_) { - ExecSpace().fence(); - Kokkos::parallel_for( - "Kokkos::ViewFill-5D", - policy_type({0, 0, 0, 0, 0}, {a.extent(0), a.extent(1), a.extent(2), + Kokkos::parallel_for("Kokkos::ViewFill-5D", + policy_type(space, {0, 0, 0, 0, 0}, + {a.extent(0), a.extent(1), a.extent(2), a.extent(3), a.extent(4)}), - *this); - ExecSpace().fence(); + *this); } KOKKOS_INLINE_FUNCTION @@ -336,15 +333,14 @@ struct ViewFill> policy_type; - ViewFill(const ViewType& a_, typename ViewType::const_value_type& val_) + ViewFill(const ViewType& a_, typename ViewType::const_value_type& val_, + const ExecSpace& space) : a(a_), val(val_) { - ExecSpace().fence(); Kokkos::parallel_for("Kokkos::ViewFill-6D", - policy_type({0, 0, 0, 0, 0, 0}, + policy_type(space, {0, 0, 0, 0, 0, 0}, {a.extent(0), a.extent(1), a.extent(2), a.extent(3), a.extent(4), a.extent(5)}), *this); - ExecSpace().fence(); } KOKKOS_INLINE_FUNCTION @@ -367,15 +363,14 @@ struct ViewFill> policy_type; - ViewFill(const ViewType& a_, typename ViewType::const_value_type& val_) + ViewFill(const ViewType& a_, typename ViewType::const_value_type& val_, + const ExecSpace& space) : a(a_), val(val_) { - ExecSpace().fence(); Kokkos::parallel_for("Kokkos::ViewFill-7D", - policy_type({0, 0, 0, 0, 0, 0}, + policy_type(space, {0, 0, 0, 0, 0, 0}, {a.extent(0), a.extent(1), a.extent(2), a.extent(3), a.extent(5), a.extent(6)}), *this); - ExecSpace().fence(); } KOKKOS_INLINE_FUNCTION @@ -399,15 +394,14 @@ struct ViewFill> policy_type; - ViewFill(const ViewType& a_, typename ViewType::const_value_type& val_) + ViewFill(const ViewType& a_, typename ViewType::const_value_type& val_, + const ExecSpace& space) : a(a_), val(val_) { - ExecSpace().fence(); Kokkos::parallel_for("Kokkos::ViewFill-8D", - policy_type({0, 0, 0, 0, 0, 0}, + policy_type(space, {0, 0, 0, 0, 0, 0}, {a.extent(0), a.extent(1), a.extent(3), a.extent(5), a.extent(6), a.extent(7)}), *this); - ExecSpace().fence(); } KOKKOS_INLINE_FUNCTION @@ -424,7 +418,7 @@ template { ViewTypeA a; ViewTypeB b; - ViewCopy(const ViewTypeA&, const ViewTypeB&); + ViewCopy(const ViewTypeA&, const ViewTypeB&, const ExecSpace&); KOKKOS_INLINE_FUNCTION void operator()(const iType& i0) const; }; @@ -434,7 +428,7 @@ template { ViewTypeA a; ViewTypeB b; - ViewCopy(const ViewTypeA&, const ViewTypeB&); + ViewCopy(const ViewTypeA&, const ViewTypeB&, const ExecSpace&); KOKKOS_INLINE_FUNCTION void operator()(const iType& i0, const iType& i1) const; }; @@ -444,7 +438,7 @@ template { ViewTypeA a; ViewTypeB b; - ViewCopy(const ViewTypeA&, const ViewTypeB&); + ViewCopy(const ViewTypeA&, const ViewTypeB&, const ExecSpace&); KOKKOS_INLINE_FUNCTION void operator()(const iType& i0, const iType& i1, const iType& i2) const; }; @@ -454,7 +448,7 @@ template { ViewTypeA a; ViewTypeB b; - ViewCopy(const ViewTypeA&, const ViewTypeB&); + ViewCopy(const ViewTypeA&, const ViewTypeB&, const ExecSpace&); KOKKOS_INLINE_FUNCTION void operator()(const iType& i0, const iType& i1, const iType& i2, const iType& i3) const; @@ -465,7 +459,7 @@ template { ViewTypeA a; ViewTypeB b; - ViewCopy(const ViewTypeA&, const ViewTypeB&); + ViewCopy(const ViewTypeA&, const ViewTypeB&, const ExecSpace&); KOKKOS_INLINE_FUNCTION void operator()(const iType& i0, const iType& i1, const iType& i2, const iType& i3, const iType& i4) const; @@ -476,7 +470,7 @@ template { ViewTypeA a; ViewTypeB b; - ViewCopy(const ViewTypeA&, const ViewTypeB&); + ViewCopy(const ViewTypeA&, const ViewTypeB&, const ExecSpace&); KOKKOS_INLINE_FUNCTION void operator()(const iType& i0, const iType& i1, const iType& i2, const iType& i3, const iType& i4, const iType& i5) const; @@ -487,7 +481,7 @@ template { ViewTypeA a; ViewTypeB b; - ViewCopy(const ViewTypeA&, const ViewTypeB&); + ViewCopy(const ViewTypeA&, const ViewTypeB&, const ExecSpace&); KOKKOS_INLINE_FUNCTION void operator()(const iType& i0, const iType& i1, const iType& i2, const iType& i3, const iType& i4, const iType& i5, @@ -499,7 +493,7 @@ template { ViewTypeA a; ViewTypeB b; - ViewCopy(const ViewTypeA&, const ViewTypeB&); + ViewCopy(const ViewTypeA&, const ViewTypeB&, const ExecSpace&); KOKKOS_INLINE_FUNCTION void operator()(const iType& i0, const iType& i1, const iType& i2, const iType& i3, const iType& i4, const iType& i5, @@ -515,11 +509,11 @@ struct ViewCopy> policy_type; - ViewCopy(const ViewTypeA& a_, const ViewTypeB& b_) : a(a_), b(b_) { - ExecSpace().fence(); - Kokkos::parallel_for("Kokkos::ViewCopy-1D", policy_type(0, a.extent(0)), - *this); - ExecSpace().fence(); + ViewCopy(const ViewTypeA& a_, const ViewTypeB& b_, + const ExecSpace space = ExecSpace()) + : a(a_), b(b_) { + Kokkos::parallel_for("Kokkos::ViewCopy-1D", + policy_type(space, 0, a.extent(0)), *this); } KOKKOS_INLINE_FUNCTION @@ -542,12 +536,12 @@ struct ViewCopy> policy_type; - ViewCopy(const ViewTypeA& a_, const ViewTypeB& b_) : a(a_), b(b_) { - ExecSpace().fence(); + ViewCopy(const ViewTypeA& a_, const ViewTypeB& b_, + const ExecSpace space = ExecSpace()) + : a(a_), b(b_) { Kokkos::parallel_for("Kokkos::ViewCopy-2D", - policy_type({0, 0}, {a.extent(0), a.extent(1)}), + policy_type(space, {0, 0}, {a.extent(0), a.extent(1)}), *this); - ExecSpace().fence(); } KOKKOS_INLINE_FUNCTION @@ -573,12 +567,13 @@ struct ViewCopy> policy_type; - ViewCopy(const ViewTypeA& a_, const ViewTypeB& b_) : a(a_), b(b_) { - ExecSpace().fence(); + ViewCopy(const ViewTypeA& a_, const ViewTypeB& b_, + const ExecSpace space = ExecSpace()) + : a(a_), b(b_) { Kokkos::parallel_for( "Kokkos::ViewCopy-3D", - policy_type({0, 0, 0}, {a.extent(0), a.extent(1), a.extent(2)}), *this); - ExecSpace().fence(); + policy_type(space, {0, 0, 0}, {a.extent(0), a.extent(1), a.extent(2)}), + *this); } KOKKOS_INLINE_FUNCTION @@ -604,13 +599,14 @@ struct ViewCopy> policy_type; - ViewCopy(const ViewTypeA& a_, const ViewTypeB& b_) : a(a_), b(b_) { - ExecSpace().fence(); - Kokkos::parallel_for("Kokkos::ViewCopy-4D", - policy_type({0, 0, 0, 0}, {a.extent(0), a.extent(1), - a.extent(2), a.extent(3)}), - *this); - ExecSpace().fence(); + ViewCopy(const ViewTypeA& a_, const ViewTypeB& b_, + const ExecSpace space = ExecSpace()) + : a(a_), b(b_) { + Kokkos::parallel_for( + "Kokkos::ViewCopy-4D", + policy_type(space, {0, 0, 0, 0}, + {a.extent(0), a.extent(1), a.extent(2), a.extent(3)}), + *this); } KOKKOS_INLINE_FUNCTION @@ -637,14 +633,14 @@ struct ViewCopy> policy_type; - ViewCopy(const ViewTypeA& a_, const ViewTypeB& b_) : a(a_), b(b_) { - ExecSpace().fence(); - Kokkos::parallel_for( - "Kokkos::ViewCopy-5D", - policy_type({0, 0, 0, 0, 0}, {a.extent(0), a.extent(1), a.extent(2), + ViewCopy(const ViewTypeA& a_, const ViewTypeB& b_, + const ExecSpace space = ExecSpace()) + : a(a_), b(b_) { + Kokkos::parallel_for("Kokkos::ViewCopy-5D", + policy_type(space, {0, 0, 0, 0, 0}, + {a.extent(0), a.extent(1), a.extent(2), a.extent(3), a.extent(4)}), - *this); - ExecSpace().fence(); + *this); } KOKKOS_INLINE_FUNCTION @@ -671,14 +667,14 @@ struct ViewCopy> policy_type; - ViewCopy(const ViewTypeA& a_, const ViewTypeB& b_) : a(a_), b(b_) { - ExecSpace().fence(); + ViewCopy(const ViewTypeA& a_, const ViewTypeB& b_, + const ExecSpace space = ExecSpace()) + : a(a_), b(b_) { Kokkos::parallel_for("Kokkos::ViewCopy-6D", - policy_type({0, 0, 0, 0, 0, 0}, + policy_type(space, {0, 0, 0, 0, 0, 0}, {a.extent(0), a.extent(1), a.extent(2), a.extent(3), a.extent(4), a.extent(5)}), *this); - ExecSpace().fence(); } KOKKOS_INLINE_FUNCTION @@ -705,14 +701,14 @@ struct ViewCopy> policy_type; - ViewCopy(const ViewTypeA& a_, const ViewTypeB& b_) : a(a_), b(b_) { - ExecSpace().fence(); + ViewCopy(const ViewTypeA& a_, const ViewTypeB& b_, + const ExecSpace space = ExecSpace()) + : a(a_), b(b_) { Kokkos::parallel_for("Kokkos::ViewCopy-7D", - policy_type({0, 0, 0, 0, 0, 0}, + policy_type(space, {0, 0, 0, 0, 0, 0}, {a.extent(0), a.extent(1), a.extent(3), a.extent(4), a.extent(5), a.extent(6)}), *this); - ExecSpace().fence(); } KOKKOS_INLINE_FUNCTION @@ -740,14 +736,14 @@ struct ViewCopy> policy_type; - ViewCopy(const ViewTypeA& a_, const ViewTypeB& b_) : a(a_), b(b_) { - ExecSpace().fence(); + ViewCopy(const ViewTypeA& a_, const ViewTypeB& b_, + const ExecSpace space = ExecSpace()) + : a(a_), b(b_) { Kokkos::parallel_for("Kokkos::ViewCopy-8D", - policy_type({0, 0, 0, 0, 0, 0}, + policy_type(space, {0, 0, 0, 0, 0, 0}, {a.extent(0), a.extent(1), a.extent(3), a.extent(5), a.extent(6), a.extent(7)}), *this); - ExecSpace().fence(); } KOKKOS_INLINE_FUNCTION @@ -767,6 +763,85 @@ struct ViewCopy +void view_copy(const ExecutionSpace& space, const DstType& dst, + const SrcType& src) { + typedef typename DstType::memory_space dst_memory_space; + typedef typename SrcType::memory_space src_memory_space; + + enum { + ExecCanAccessSrc = + Kokkos::Impl::SpaceAccessibility::accessible + }; + enum { + ExecCanAccessDst = + Kokkos::Impl::SpaceAccessibility::accessible + }; + + if (!(ExecCanAccessSrc && ExecCanAccessDst)) { + Kokkos::Impl::throw_runtime_exception( + "Kokkos::Impl::view_copy called with invalid execution space"); + } else { + // Figure out iteration order in case we need it + int64_t strides[DstType::Rank + 1]; + dst.stride(strides); + Kokkos::Iterate iterate; + if (Kokkos::is_layouttiled::value) { + iterate = Kokkos::layout_iterate_type_selector< + typename DstType::array_layout>::outer_iteration_pattern; + } else if (std::is_same::value) { + iterate = Kokkos::Iterate::Right; + } else if (std::is_same::value) { + iterate = Kokkos::Iterate::Left; + } else if (std::is_same::value) { + if (strides[0] > strides[DstType::Rank - 1]) + iterate = Kokkos::Iterate::Right; + else + iterate = Kokkos::Iterate::Left; + } else { + if (std::is_same::value) + iterate = Kokkos::Iterate::Right; + else + iterate = Kokkos::Iterate::Left; + } + + if ((dst.span() >= size_t(std::numeric_limits::max())) || + (src.span() >= size_t(std::numeric_limits::max()))) { + if (iterate == Kokkos::Iterate::Right) + Kokkos::Impl::ViewCopy< + typename DstType::uniform_runtime_nomemspace_type, + typename SrcType::uniform_runtime_const_nomemspace_type, + Kokkos::LayoutRight, ExecutionSpace, DstType::Rank, int64_t>( + dst, src, space); + else + Kokkos::Impl::ViewCopy< + typename DstType::uniform_runtime_nomemspace_type, + typename SrcType::uniform_runtime_const_nomemspace_type, + Kokkos::LayoutLeft, ExecutionSpace, DstType::Rank, int64_t>( + dst, src, space); + } else { + if (iterate == Kokkos::Iterate::Right) + Kokkos::Impl::ViewCopy< + typename DstType::uniform_runtime_nomemspace_type, + typename SrcType::uniform_runtime_const_nomemspace_type, + Kokkos::LayoutRight, ExecutionSpace, DstType::Rank, int>(dst, src, + space); + else + Kokkos::Impl::ViewCopy< + typename DstType::uniform_runtime_nomemspace_type, + typename SrcType::uniform_runtime_const_nomemspace_type, + Kokkos::LayoutLeft, ExecutionSpace, DstType::Rank, int>(dst, src, + space); + } + } +} + template void view_copy(const DstType& dst, const SrcType& src) { typedef typename DstType::execution_space dst_execution_space; @@ -1421,21 +1496,22 @@ inline void deep_copy( const View& dst, typename ViewTraits::const_value_type& value, typename std::enable_if::specialize, void>::value>::type* = 0) { + typename ViewTraits::specialize, void>::value>::type* = + nullptr) { typedef View ViewType; + using exec_space_type = typename ViewType::execution_space; #if defined(KOKKOS_ENABLE_PROFILING) if (Kokkos::Profiling::profileLibraryLoaded()) { Kokkos::Profiling::beginDeepCopy( - Kokkos::Profiling::SpaceHandle( - typename ViewType::memory_space().name()), + Kokkos::Profiling::SpaceHandle(ViewType::memory_space::name()), dst.label(), dst.data(), - Kokkos::Profiling::SpaceHandle(Kokkos::HostSpace().name()), "Scalar", + Kokkos::Profiling::SpaceHandle(Kokkos::HostSpace::name()), "Scalar", &value, dst.span() * sizeof(typename ViewType::value_type)); } #endif - if (dst.data() == NULL) { + if (dst.data() == nullptr) { Kokkos::fence(); #if defined(KOKKOS_ENABLE_PROFILING) if (Kokkos::Profiling::profileLibraryLoaded()) { @@ -1450,7 +1526,7 @@ inline void deep_copy( typename ViewType::value_type>::value, "deep_copy requires non-const type"); - // If contigous we can simply do a 1D flat loop + // If contiguous we can simply do a 1D flat loop if (dst.span_is_contiguous()) { typedef Kokkos::View< typename ViewType::value_type*, Kokkos::LayoutRight, @@ -1463,13 +1539,13 @@ inline void deep_copy( ViewTypeFlat dst_flat(dst.data(), dst.size()); if (dst.span() < static_cast(std::numeric_limits::max())) { - Kokkos::Impl::ViewFill(dst_flat, value); + Kokkos::Impl::ViewFill(dst_flat, value, + exec_space_type()); } else - Kokkos::Impl::ViewFill(dst_flat, value); + Kokkos::Impl::ViewFill(dst_flat, value, + exec_space_type()); Kokkos::fence(); #if defined(KOKKOS_ENABLE_PROFILING) if (Kokkos::Profiling::profileLibraryLoaded()) { @@ -1511,21 +1587,21 @@ inline void deep_copy( if (dst.span() > static_cast(std::numeric_limits::max())) { if (iterate == Kokkos::Iterate::Right) Kokkos::Impl::ViewFill(dst, value); + exec_space_type, ViewType::Rank, int64_t>( + dst, value, exec_space_type()); else Kokkos::Impl::ViewFill(dst, value); + exec_space_type, ViewType::Rank, int64_t>( + dst, value, exec_space_type()); } else { if (iterate == Kokkos::Iterate::Right) Kokkos::Impl::ViewFill(dst, value); + exec_space_type, ViewType::Rank, int>( + dst, value, exec_space_type()); else Kokkos::Impl::ViewFill(dst, value); + exec_space_type, ViewType::Rank, int>( + dst, value, exec_space_type()); } Kokkos::fence(); @@ -1542,7 +1618,8 @@ inline void deep_copy( typename ViewTraits::non_const_value_type& dst, const View& src, typename std::enable_if::specialize, void>::value>::type* = 0) { + typename ViewTraits::specialize, void>::value>::type* = + nullptr) { typedef ViewTraits src_traits; typedef typename src_traits::memory_space src_memory_space; @@ -1552,14 +1629,14 @@ inline void deep_copy( #if defined(KOKKOS_ENABLE_PROFILING) if (Kokkos::Profiling::profileLibraryLoaded()) { Kokkos::Profiling::beginDeepCopy( - Kokkos::Profiling::SpaceHandle(Kokkos::HostSpace().name()), "Scalar", - &dst, Kokkos::Profiling::SpaceHandle(src_memory_space().name()), + Kokkos::Profiling::SpaceHandle(Kokkos::HostSpace::name()), "Scalar", + &dst, Kokkos::Profiling::SpaceHandle(src_memory_space::name()), src.label(), src.data(), src.span() * sizeof(typename src_traits::value_type)); } #endif - if (src.data() == NULL) { + if (src.data() == nullptr) { Kokkos::fence(); #if defined(KOKKOS_ENABLE_PROFILING) if (Kokkos::Profiling::profileLibraryLoaded()) { @@ -1587,7 +1664,8 @@ inline void deep_copy( std::is_same::specialize, void>::value && std::is_same::specialize, void>::value && (unsigned(ViewTraits::rank) == unsigned(0) && - unsigned(ViewTraits::rank) == unsigned(0)))>::type* = 0) { + unsigned(ViewTraits::rank) == unsigned(0)))>::type* = + nullptr) { typedef View dst_type; typedef View src_type; @@ -1602,14 +1680,14 @@ inline void deep_copy( #if defined(KOKKOS_ENABLE_PROFILING) if (Kokkos::Profiling::profileLibraryLoaded()) { Kokkos::Profiling::beginDeepCopy( - Kokkos::Profiling::SpaceHandle(dst_memory_space().name()), dst.label(), - dst.data(), Kokkos::Profiling::SpaceHandle(src_memory_space().name()), + Kokkos::Profiling::SpaceHandle(dst_memory_space::name()), dst.label(), + dst.data(), Kokkos::Profiling::SpaceHandle(src_memory_space::name()), src.label(), src.data(), src.span() * sizeof(typename dst_type::value_type)); } #endif - if (dst.data() == NULL && src.data() == NULL) { + if (dst.data() == nullptr && src.data() == nullptr) { Kokkos::fence(); #if defined(KOKKOS_ENABLE_PROFILING) if (Kokkos::Profiling::profileLibraryLoaded()) { @@ -1643,7 +1721,7 @@ inline void deep_copy( std::is_same::specialize, void>::value && std::is_same::specialize, void>::value && (unsigned(ViewTraits::rank) != 0 || - unsigned(ViewTraits::rank) != 0))>::type* = 0) { + unsigned(ViewTraits::rank) != 0))>::type* = nullptr) { typedef View dst_type; typedef View src_type; typedef typename dst_type::execution_space dst_execution_space; @@ -1663,14 +1741,14 @@ inline void deep_copy( #if defined(KOKKOS_ENABLE_PROFILING) if (Kokkos::Profiling::profileLibraryLoaded()) { Kokkos::Profiling::beginDeepCopy( - Kokkos::Profiling::SpaceHandle(dst_memory_space().name()), dst.label(), - dst.data(), Kokkos::Profiling::SpaceHandle(src_memory_space().name()), + Kokkos::Profiling::SpaceHandle(dst_memory_space::name()), dst.label(), + dst.data(), Kokkos::Profiling::SpaceHandle(src_memory_space::name()), src.label(), src.data(), src.span() * sizeof(typename dst_type::value_type)); } #endif - if (dst.data() == NULL || src.data() == NULL) { + if (dst.data() == nullptr || src.data() == nullptr) { #ifdef KOKKOS_ENABLE_DEPRECATED_CODE // do nothing #else @@ -1874,7 +1952,7 @@ void KOKKOS_INLINE_FUNCTION local_deep_copy( const View& src, typename std::enable_if<(unsigned(ViewTraits::rank) == 1 && unsigned(ViewTraits::rank) == - 1)>::type* = 0) { + 1)>::type* = nullptr) { if (dst.data() == nullptr) { return; } @@ -1893,7 +1971,7 @@ void KOKKOS_INLINE_FUNCTION local_deep_copy( const View& src, typename std::enable_if<(unsigned(ViewTraits::rank) == 2 && unsigned(ViewTraits::rank) == - 2)>::type* = 0) { + 2)>::type* = nullptr) { if (dst.data() == nullptr) { return; } @@ -1921,7 +1999,7 @@ void KOKKOS_INLINE_FUNCTION local_deep_copy( const View& src, typename std::enable_if<(unsigned(ViewTraits::rank) == 3 && unsigned(ViewTraits::rank) == - 3)>::type* = 0) { + 3)>::type* = nullptr) { if (dst.data() == nullptr) { return; } @@ -1951,7 +2029,7 @@ void KOKKOS_INLINE_FUNCTION local_deep_copy( const View& src, typename std::enable_if<(unsigned(ViewTraits::rank) == 4 && unsigned(ViewTraits::rank) == - 4)>::type* = 0) { + 4)>::type* = nullptr) { if (dst.data() == nullptr) { return; } @@ -1984,7 +2062,7 @@ void KOKKOS_INLINE_FUNCTION local_deep_copy( const View& src, typename std::enable_if<(unsigned(ViewTraits::rank) == 5 && unsigned(ViewTraits::rank) == - 5)>::type* = 0) { + 5)>::type* = nullptr) { if (dst.data() == nullptr) { return; } @@ -2019,7 +2097,7 @@ void KOKKOS_INLINE_FUNCTION local_deep_copy( const View& src, typename std::enable_if<(unsigned(ViewTraits::rank) == 6 && unsigned(ViewTraits::rank) == - 6)>::type* = 0) { + 6)>::type* = nullptr) { if (dst.data() == nullptr) { return; } @@ -2056,7 +2134,7 @@ void KOKKOS_INLINE_FUNCTION local_deep_copy( const View& src, typename std::enable_if<(unsigned(ViewTraits::rank) == 7 && unsigned(ViewTraits::rank) == - 7)>::type* = 0) { + 7)>::type* = nullptr) { if (dst.data() == nullptr) { return; } @@ -2095,7 +2173,7 @@ void KOKKOS_INLINE_FUNCTION local_deep_copy( const View& dst, const View& src, typename std::enable_if<(unsigned(ViewTraits::rank) == 1 && unsigned(ViewTraits::rank) == - 1)>::type* = 0) { + 1)>::type* = nullptr) { if (dst.data() == nullptr) { return; } @@ -2112,7 +2190,7 @@ void KOKKOS_INLINE_FUNCTION local_deep_copy( const View& dst, const View& src, typename std::enable_if<(unsigned(ViewTraits::rank) == 2 && unsigned(ViewTraits::rank) == - 2)>::type* = 0) { + 2)>::type* = nullptr) { if (dst.data() == nullptr) { return; } @@ -2130,7 +2208,7 @@ void KOKKOS_INLINE_FUNCTION local_deep_copy( const View& dst, const View& src, typename std::enable_if<(unsigned(ViewTraits::rank) == 3 && unsigned(ViewTraits::rank) == - 3)>::type* = 0) { + 3)>::type* = nullptr) { if (dst.data() == nullptr) { return; } @@ -2150,7 +2228,7 @@ void KOKKOS_INLINE_FUNCTION local_deep_copy( const View& dst, const View& src, typename std::enable_if<(unsigned(ViewTraits::rank) == 4 && unsigned(ViewTraits::rank) == - 4)>::type* = 0) { + 4)>::type* = nullptr) { if (dst.data() == nullptr) { return; } @@ -2171,7 +2249,7 @@ void KOKKOS_INLINE_FUNCTION local_deep_copy( const View& dst, const View& src, typename std::enable_if<(unsigned(ViewTraits::rank) == 5 && unsigned(ViewTraits::rank) == - 5)>::type* = 0) { + 5)>::type* = nullptr) { if (dst.data() == nullptr) { return; } @@ -2193,7 +2271,7 @@ void KOKKOS_INLINE_FUNCTION local_deep_copy( const View& dst, const View& src, typename std::enable_if<(unsigned(ViewTraits::rank) == 6 && unsigned(ViewTraits::rank) == - 6)>::type* = 0) { + 6)>::type* = nullptr) { if (dst.data() == nullptr) { return; } @@ -2216,7 +2294,7 @@ void KOKKOS_INLINE_FUNCTION local_deep_copy( const View& dst, const View& src, typename std::enable_if<(unsigned(ViewTraits::rank) == 7 && unsigned(ViewTraits::rank) == - 7)>::type* = 0) { + 7)>::type* = nullptr) { if (dst.data() == nullptr) { return; } @@ -2260,7 +2338,7 @@ void KOKKOS_INLINE_FUNCTION local_deep_copy( const TeamType& team, const View& dst, typename ViewTraits::const_value_type& value, typename std::enable_if<(unsigned(ViewTraits::rank) == - 1)>::type* = 0) { + 1)>::type* = nullptr) { if (dst.data() == nullptr) { return; } @@ -2278,7 +2356,7 @@ void KOKKOS_INLINE_FUNCTION local_deep_copy( const TeamType& team, const View& dst, typename ViewTraits::const_value_type& value, typename std::enable_if<(unsigned(ViewTraits::rank) == - 2)>::type* = 0) { + 2)>::type* = nullptr) { if (dst.data() == nullptr) { return; } @@ -2305,7 +2383,7 @@ void KOKKOS_INLINE_FUNCTION local_deep_copy( const TeamType& team, const View& dst, typename ViewTraits::const_value_type& value, typename std::enable_if<(unsigned(ViewTraits::rank) == - 3)>::type* = 0) { + 3)>::type* = nullptr) { if (dst.data() == nullptr) { return; } @@ -2334,7 +2412,7 @@ void KOKKOS_INLINE_FUNCTION local_deep_copy( const TeamType& team, const View& dst, typename ViewTraits::const_value_type& value, typename std::enable_if<(unsigned(ViewTraits::rank) == - 4)>::type* = 0) { + 4)>::type* = nullptr) { if (dst.data() == nullptr) { return; } @@ -2366,7 +2444,7 @@ void KOKKOS_INLINE_FUNCTION local_deep_copy( const TeamType& team, const View& dst, typename ViewTraits::const_value_type& value, typename std::enable_if<(unsigned(ViewTraits::rank) == - 5)>::type* = 0) { + 5)>::type* = nullptr) { if (dst.data() == nullptr) { return; } @@ -2400,7 +2478,7 @@ void KOKKOS_INLINE_FUNCTION local_deep_copy( const TeamType& team, const View& dst, typename ViewTraits::const_value_type& value, typename std::enable_if<(unsigned(ViewTraits::rank) == - 6)>::type* = 0) { + 6)>::type* = nullptr) { if (dst.data() == nullptr) { return; } @@ -2436,7 +2514,7 @@ void KOKKOS_INLINE_FUNCTION local_deep_copy( const TeamType& team, const View& dst, typename ViewTraits::const_value_type& value, typename std::enable_if<(unsigned(ViewTraits::rank) == - 7)>::type* = 0) { + 7)>::type* = nullptr) { if (dst.data() == nullptr) { return; } @@ -2475,7 +2553,7 @@ void KOKKOS_INLINE_FUNCTION local_deep_copy( const View& dst, typename ViewTraits::const_value_type& value, typename std::enable_if<(unsigned(ViewTraits::rank) == - 1)>::type* = 0) { + 1)>::type* = nullptr) { if (dst.data() == nullptr) { return; } @@ -2492,7 +2570,7 @@ void KOKKOS_INLINE_FUNCTION local_deep_copy( const View& dst, typename ViewTraits::const_value_type& value, typename std::enable_if<(unsigned(ViewTraits::rank) == - 2)>::type* = 0) { + 2)>::type* = nullptr) { if (dst.data() == nullptr) { return; } @@ -2510,7 +2588,7 @@ void KOKKOS_INLINE_FUNCTION local_deep_copy( const View& dst, typename ViewTraits::const_value_type& value, typename std::enable_if<(unsigned(ViewTraits::rank) == - 3)>::type* = 0) { + 3)>::type* = nullptr) { if (dst.data() == nullptr) { return; } @@ -2529,7 +2607,7 @@ void KOKKOS_INLINE_FUNCTION local_deep_copy( const View& dst, typename ViewTraits::const_value_type& value, typename std::enable_if<(unsigned(ViewTraits::rank) == - 4)>::type* = 0) { + 4)>::type* = nullptr) { if (dst.data() == nullptr) { return; } @@ -2550,7 +2628,7 @@ void KOKKOS_INLINE_FUNCTION local_deep_copy( const View& dst, typename ViewTraits::const_value_type& value, typename std::enable_if<(unsigned(ViewTraits::rank) == - 5)>::type* = 0) { + 5)>::type* = nullptr) { if (dst.data() == nullptr) { return; } @@ -2572,7 +2650,7 @@ void KOKKOS_INLINE_FUNCTION local_deep_copy( const View& dst, typename ViewTraits::const_value_type& value, typename std::enable_if<(unsigned(ViewTraits::rank) == - 6)>::type* = 0) { + 6)>::type* = nullptr) { if (dst.data() == nullptr) { return; } @@ -2595,7 +2673,7 @@ void KOKKOS_INLINE_FUNCTION local_deep_copy( const View& dst, typename ViewTraits::const_value_type& value, typename std::enable_if<(unsigned(ViewTraits::rank) == - 7)>::type* = 0) { + 7)>::type* = nullptr) { if (dst.data() == nullptr) { return; } @@ -2621,33 +2699,88 @@ void KOKKOS_INLINE_FUNCTION local_deep_copy( namespace Kokkos { -/** \brief Deep copy a value from Host memory into a view. */ +/** \brief Deep copy a value from Host memory into a view. ExecSpace can access + * dst */ template inline void deep_copy( - const ExecSpace&, const View& dst, + const ExecSpace& space, const View& dst, typename ViewTraits::const_value_type& value, typename std::enable_if< Kokkos::Impl::is_execution_space::value && - std::is_same::specialize, - void>::value>::type* = 0) { + std::is_same::specialize, void>::value && + Kokkos::Impl::SpaceAccessibility< + ExecSpace, + typename ViewTraits::memory_space>::accessible>::type* = + nullptr) { typedef ViewTraits dst_traits; + static_assert(std::is_same::value, + "deep_copy requires non-const type"); +#if defined(KOKKOS_ENABLE_PROFILING) typedef typename dst_traits::memory_space dst_memory_space; + if (Kokkos::Profiling::profileLibraryLoaded()) { + Kokkos::Profiling::beginDeepCopy( + Kokkos::Profiling::SpaceHandle(dst_memory_space::name()), dst.label(), + dst.data(), Kokkos::Profiling::SpaceHandle(Kokkos::HostSpace::name()), + "(none)", &value, dst.span() * sizeof(typename dst_traits::value_type)); + } +#endif + if (dst.data() == nullptr) { + space.fence(); + } else { + using ViewTypeUniform = typename std::conditional< + View::Rank == 0, + typename View::uniform_runtime_type, + typename View::uniform_runtime_nomemspace_type>::type; + Kokkos::Impl::ViewFill(dst, value, space); + } +#if defined(KOKKOS_ENABLE_PROFILING) + if (Kokkos::Profiling::profileLibraryLoaded()) { + Kokkos::Profiling::endDeepCopy(); + } +#endif +} + +/** \brief Deep copy a value from Host memory into a view. ExecSpace can not + * access dst */ +template +inline void deep_copy( + const ExecSpace& space, const View& dst, + typename ViewTraits::const_value_type& value, + typename std::enable_if< + Kokkos::Impl::is_execution_space::value && + std::is_same::specialize, void>::value && + !Kokkos::Impl::SpaceAccessibility< + ExecSpace, + typename ViewTraits::memory_space>::accessible>::type* = + nullptr) { + typedef ViewTraits dst_traits; static_assert(std::is_same::value, "deep_copy requires non-const type"); #if defined(KOKKOS_ENABLE_PROFILING) + typedef typename dst_traits::memory_space dst_memory_space; if (Kokkos::Profiling::profileLibraryLoaded()) { Kokkos::Profiling::beginDeepCopy( - Kokkos::Profiling::SpaceHandle(dst_memory_space().name()), dst.label(), - dst.data(), Kokkos::Profiling::SpaceHandle(Kokkos::HostSpace().name()), + Kokkos::Profiling::SpaceHandle(dst_memory_space::name()), dst.label(), + dst.data(), Kokkos::Profiling::SpaceHandle(Kokkos::HostSpace::name()), "(none)", &value, dst.span() * sizeof(typename dst_traits::value_type)); } #endif - ExecSpace().fence(); - typedef - typename View::uniform_runtime_nomemspace_type ViewTypeUniform; - Kokkos::Impl::ViewFill(dst, value); - ExecSpace().fence(); + if (dst.data() == nullptr) { + space.fence(); + } else { + space.fence(); + using ViewTypeUniform = typename std::conditional< + View::Rank == 0, + typename View::uniform_runtime_type, + typename View::uniform_runtime_nomemspace_type>::type; + using fill_exec_space = typename dst_traits::memory_space::execution_space; + Kokkos::Impl::ViewFill(dst, value, fill_exec_space()); + fill_exec_space().fence(); + } #if defined(KOKKOS_ENABLE_PROFILING) if (Kokkos::Profiling::profileLibraryLoaded()) { Kokkos::Profiling::endDeepCopy(); @@ -2672,13 +2805,13 @@ inline void deep_copy( #if defined(KOKKOS_ENABLE_PROFILING) if (Kokkos::Profiling::profileLibraryLoaded()) { Kokkos::Profiling::beginDeepCopy( - Kokkos::Profiling::SpaceHandle(Kokkos::HostSpace().name()), "(none)", - &dst, Kokkos::Profiling::SpaceHandle(src_memory_space().name()), + Kokkos::Profiling::SpaceHandle(Kokkos::HostSpace::name()), "(none)", + &dst, Kokkos::Profiling::SpaceHandle(src_memory_space::name()), src.label(), src.data(), sizeof(ST)); } #endif - if (src.data() == NULL) { + if (src.data() == nullptr) { exec_space.fence(); #if defined(KOKKOS_ENABLE_PROFILING) if (Kokkos::Profiling::profileLibraryLoaded()) { @@ -2721,13 +2854,13 @@ inline void deep_copy( #if defined(KOKKOS_ENABLE_PROFILING) if (Kokkos::Profiling::profileLibraryLoaded()) { Kokkos::Profiling::beginDeepCopy( - Kokkos::Profiling::SpaceHandle(dst_memory_space().name()), dst.label(), - dst.data(), Kokkos::Profiling::SpaceHandle(src_memory_space().name()), + Kokkos::Profiling::SpaceHandle(dst_memory_space::name()), dst.label(), + dst.data(), Kokkos::Profiling::SpaceHandle(src_memory_space::name()), src.label(), src.data(), sizeof(DT)); } #endif - if (dst.data() == NULL && src.data() == NULL) { + if (dst.data() == nullptr && src.data() == nullptr) { exec_space.fence(); #if defined(KOKKOS_ENABLE_PROFILING) if (Kokkos::Profiling::profileLibraryLoaded()) { @@ -2737,13 +2870,11 @@ inline void deep_copy( return; } - exec_space.fence(); if (dst.data() != src.data()) { Kokkos::Impl::DeepCopy( exec_space, dst.data(), src.data(), sizeof(typename dst_traits::value_type)); } - exec_space.fence(); #if defined(KOKKOS_ENABLE_PROFILING) if (Kokkos::Profiling::profileLibraryLoaded()) { Kokkos::Profiling::endDeepCopy(); @@ -2764,7 +2895,7 @@ inline void deep_copy( std::is_same::specialize, void>::value && std::is_same::specialize, void>::value && (unsigned(ViewTraits::rank) != 0 || - unsigned(ViewTraits::rank) != 0))>::type* = 0) { + unsigned(ViewTraits::rank) != 0))>::type* = nullptr) { typedef View dst_type; typedef View src_type; @@ -2785,13 +2916,21 @@ inline void deep_copy( #if defined(KOKKOS_ENABLE_PROFILING) if (Kokkos::Profiling::profileLibraryLoaded()) { Kokkos::Profiling::beginDeepCopy( - Kokkos::Profiling::SpaceHandle(dst_memory_space().name()), dst.label(), - dst.data(), Kokkos::Profiling::SpaceHandle(src_memory_space().name()), + Kokkos::Profiling::SpaceHandle(dst_memory_space::name()), dst.label(), + dst.data(), Kokkos::Profiling::SpaceHandle(src_memory_space::name()), src.label(), src.data(), dst.span() * sizeof(dst_value_type)); } #endif - if (dst.data() == NULL || src.data() == NULL) { + dst_value_type* dst_start = dst.data(); + dst_value_type* dst_end = dst.data() + dst.span(); + src_value_type* src_start = src.data(); + src_value_type* src_end = src.data() + src.span(); + + // Early dropout if identical range + if ((dst_start == nullptr || src_start == nullptr) || + ((std::ptrdiff_t(dst_start) == std::ptrdiff_t(src_start)) && + (std::ptrdiff_t(dst_end) == std::ptrdiff_t(src_end)))) { #ifdef KOKKOS_ENABLE_DEPRECATED_CODE // do nothing #else @@ -2823,7 +2962,6 @@ inline void deep_copy( Kokkos::Impl::throw_runtime_exception(message); } #endif - exec_space.fence(); #if defined(KOKKOS_ENABLE_PROFILING) if (Kokkos::Profiling::profileLibraryLoaded()) { Kokkos::Profiling::endDeepCopy(); @@ -2851,11 +2989,7 @@ inline void deep_copy( dst_memory_space>::accessible }; - // Checking for Overlapping Views. - dst_value_type* dst_start = dst.data(); - dst_value_type* dst_end = dst.data() + dst.span(); - src_value_type* src_start = src.data(); - src_value_type* src_end = src.data() + src.span(); + // Error out for non-identical overlapping views. if ((((std::ptrdiff_t)dst_start < (std::ptrdiff_t)src_end) && ((std::ptrdiff_t)dst_end > (std::ptrdiff_t)src_start)) && ((dst.span_is_contiguous() && src.span_is_contiguous()))) { @@ -2881,23 +3015,27 @@ inline void deep_copy( (src.extent(4) != dst.extent(4)) || (src.extent(5) != dst.extent(5)) || (src.extent(6) != dst.extent(6)) || (src.extent(7) != dst.extent(7))) { #ifdef KOKKOS_ENABLE_DEPRECATED_CODE - exec_space.fence(); if (ExecCanAccessSrcDst) { Kokkos::Impl::ViewRemap(dst, src); + exec_space.fence(); } else if (DstExecCanAccessSrc) { // Copying data between views in accessible memory spaces and either // non-contiguous or incompatible shape. - Kokkos::Impl::ViewRemap(dst, src); + exec_space.fence(); + Kokkos::Impl::ViewRemap(dst, + src); + dst_execution_space().fence(); } else if (SrcExecCanAccessDst) { // Copying data between views in accessible memory spaces and either // non-contiguous or incompatible shape. + exec_space.fence(); Kokkos::Impl::ViewRemap(dst, src); + src_execution_space().fence(); } else { Kokkos::Impl::throw_runtime_exception( "deep_copy given views that would require a temporary allocation"); } - exec_space.fence(); #if defined(KOKKOS_ENABLE_PROFILING) if (Kokkos::Profiling::profileLibraryLoaded()) { Kokkos::Profiling::endDeepCopy(); @@ -2946,16 +3084,26 @@ inline void deep_copy( ((dst_type::rank < 7) || (dst.stride_6() == src.stride_6())) && ((dst_type::rank < 8) || (dst.stride_7() == src.stride_7()))) { const size_t nbytes = sizeof(typename dst_type::value_type) * dst.span(); - exec_space.fence(); if ((void*)dst.data() != (void*)src.data()) { Kokkos::Impl::DeepCopy( exec_space, dst.data(), src.data(), nbytes); } - exec_space.fence(); } else { - exec_space.fence(); - Impl::view_copy(dst, src); - exec_space.fence(); + // Copying data between views in accessible memory spaces and either + // non-contiguous or incompatible shape. + if (ExecCanAccessSrcDst) { + Impl::view_copy(exec_space, dst, src); + } else if (DstExecCanAccessSrc || SrcExecCanAccessDst) { + using cpy_exec_space = + typename std::conditional::type; + exec_space.fence(); + Impl::view_copy(cpy_exec_space(), dst, src); + cpy_exec_space().fence(); + } else { + Kokkos::Impl::throw_runtime_exception( + "deep_copy given views that would require a temporary allocation"); + } } #if defined(KOKKOS_ENABLE_PROFILING) if (Kokkos::Profiling::profileLibraryLoaded()) { @@ -3285,7 +3433,7 @@ inline typename Kokkos::View::HostMirror create_mirror( typename std::enable_if< std::is_same::specialize, void>::value && !std::is_same::array_layout, - Kokkos::LayoutStride>::value>::type* = 0) { + Kokkos::LayoutStride>::value>::type* = nullptr) { typedef View src_type; typedef typename src_type::HostMirror dst_type; @@ -3321,7 +3469,7 @@ inline typename Kokkos::View::HostMirror create_mirror( typename std::enable_if< std::is_same::specialize, void>::value && std::is_same::array_layout, - Kokkos::LayoutStride>::value>::type* = 0) { + Kokkos::LayoutStride>::value>::type* = nullptr) { typedef View src_type; typedef typename src_type::HostMirror dst_type; @@ -3353,7 +3501,8 @@ template typename Impl::MirrorType::view_type create_mirror( const Space&, const Kokkos::View& src, typename std::enable_if::specialize, void>::value>::type* = 0) { + typename ViewTraits::specialize, void>::value>::type* = + nullptr) { return typename Impl::MirrorType::view_type(src.label(), src.layout()); } @@ -3367,7 +3516,7 @@ inline typename Kokkos::View::HostMirror create_mirror_view( typename Kokkos::View::HostMirror::memory_space>::value && std::is_same::data_type, typename Kokkos::View::HostMirror::data_type>:: - value)>::type* = 0) { + value)>::type* = nullptr) { return src; } @@ -3389,7 +3538,8 @@ template typename Impl::MirrorViewType::view_type create_mirror_view( const Space&, const Kokkos::View& src, typename std::enable_if< - Impl::MirrorViewType::is_same_memspace>::type* = 0) { + Impl::MirrorViewType::is_same_memspace>::type* = + nullptr) { return src; } @@ -3411,8 +3561,10 @@ create_mirror_view_and_copy( const Space&, const Kokkos::View& src, std::string const& name = "", typename std::enable_if< - Impl::MirrorViewType::is_same_memspace>::type* = 0) { + Impl::MirrorViewType::is_same_memspace>::type* = + nullptr) { (void)name; + fence(); // same behavior as deep_copy(src, src) return src; } @@ -3427,7 +3579,8 @@ create_mirror_view_and_copy( !Impl::MirrorViewType::is_same_memspace>::type* = 0) { using Mirror = typename Impl::MirrorViewType::view_type; std::string label = name.empty() ? src.label() : name; - auto mirror = Mirror(ViewAllocateWithoutInitializing(label), src.layout()); + auto mirror = typename Mirror::non_const_type{ + ViewAllocateWithoutInitializing(label), src.layout()}; deep_copy(mirror, src); return mirror; } @@ -3439,7 +3592,8 @@ typename Impl::MirrorViewType::view_type create_mirror_view( const Space&, const Kokkos::View& src, Kokkos::Impl::WithoutInitializing_t, typename std::enable_if< - Impl::MirrorViewType::is_same_memspace>::type* = 0) { + Impl::MirrorViewType::is_same_memspace>::type* = + nullptr) { return src; } diff --git a/core/src/Kokkos_Core.hpp b/core/src/Kokkos_Core.hpp index 7661efeca79..8392f0f3e56 100644 --- a/core/src/Kokkos_Core.hpp +++ b/core/src/Kokkos_Core.hpp @@ -58,13 +58,9 @@ #include #endif -//#if defined( KOKKOS_ENABLE_OPENMPTARGET ) +#if defined(KOKKOS_ENABLE_OPENMPTARGET) #include #include -//#endif - -#if defined(KOKKOS_ENABLE_QTHREADS) -#include #endif #if defined(KOKKOS_ENABLE_HPX) @@ -82,6 +78,9 @@ #if defined(KOKKOS_ENABLE_ROCM) #include #endif +#if defined(KOKKOS_ENABLE_HIP) +#include +#endif #include #include @@ -93,9 +92,7 @@ #include #include #include - #include - #include #include #include @@ -123,7 +120,15 @@ struct InitArguments { void initialize(int& narg, char* arg[]); -void initialize(const InitArguments& args = InitArguments()); +void initialize(InitArguments args = InitArguments()); + +namespace Impl { + +void pre_initialize(const InitArguments& args); + +void post_initialize(const InitArguments& args); + +} // namespace Impl bool is_initialized() noexcept; diff --git a/core/src/Kokkos_Core_fwd.hpp b/core/src/Kokkos_Core_fwd.hpp index 5b89dc51ca4..4828a957751 100644 --- a/core/src/Kokkos_Core_fwd.hpp +++ b/core/src/Kokkos_Core_fwd.hpp @@ -99,10 +99,6 @@ class HBWSpace; /// Memory space for hbw_malloc from memkind (e.g. for KNL class Serial; ///< Execution space main process on CPU. #endif -#if defined(KOKKOS_ENABLE_QTHREADS) -class Qthreads; ///< Execution space with Qthreads back-end. -#endif - #if defined(KOKKOS_ENABLE_HPX) namespace Experimental { class HPX; ///< Execution space with HPX back-end. @@ -124,13 +120,6 @@ class OpenMPTargetSpace; } // namespace Experimental #endif -#if defined(KOKKOS_ENABLE_CUDA) -class CudaSpace; ///< Memory space on Cuda GPU -class CudaUVMSpace; ///< Memory space on Cuda GPU with UVM -class CudaHostPinnedSpace; ///< Memory space on Host accessible to Cuda GPU -class Cuda; ///< Execution space for Cuda GPU -#endif - #if defined(KOKKOS_ENABLE_ROCM) namespace Experimental { class ROCmSpace; ///< Memory space on ROCm GPU @@ -138,11 +127,20 @@ class ROCm; ///< Execution space for ROCm GPU } // namespace Experimental #endif +#if defined(KOKKOS_ENABLE_HIP) +namespace Experimental { +class HIPSpace; ///< Memory space on HIP GPU +class HIP; ///< Execution space for HIP GPU +} // namespace Experimental +#endif + template struct Device; } // namespace Kokkos +#include "Cuda/Kokkos_Cuda_fwd.hpp" + //---------------------------------------------------------------------------- // Set the default execution space. @@ -157,44 +155,42 @@ namespace Kokkos { typedef Cuda DefaultExecutionSpace; #elif defined(KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMPTARGET) typedef Experimental::OpenMPTarget DefaultExecutionSpace; +#elif defined(KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_HIP) +typedef Experimental::HIP DefaultExecutionSpace; #elif defined(KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_ROCM) typedef Experimental::ROCm DefaultExecutionSpace; #elif defined(KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP) typedef OpenMP DefaultExecutionSpace; #elif defined(KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS) typedef Threads DefaultExecutionSpace; -//#elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_QTHREADS ) -// typedef Qthreads DefaultExecutionSpace; #elif defined(KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_HPX) typedef Kokkos::Experimental::HPX DefaultExecutionSpace; #elif defined(KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SERIAL) typedef Serial DefaultExecutionSpace; #else #error \ - "At least one of the following execution spaces must be defined in order to use Kokkos: Kokkos::Cuda, Kokkos::Experimental::OpenMPTarget, Kokkos::OpenMP, Kokkos::Threads, Kokkos::Qthreads, or Kokkos::Serial." + "At least one of the following execution spaces must be defined in order to use Kokkos: Kokkos::Cuda, Kokkos::Experimental::HIP, Kokkos::Experimental::OpenMPTarget, Kokkos::OpenMP, Kokkos::Threads, Kokkos::Experimental::HPX, or Kokkos::Serial." #endif #if defined(KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP) typedef OpenMP DefaultHostExecutionSpace; #elif defined(KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS) typedef Threads DefaultHostExecutionSpace; -//#elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_QTHREADS ) -// typedef Qthreads DefaultHostExecutionSpace; +#elif defined(KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_HPX) +typedef Kokkos::Experimental::HPX DefaultHostExecutionSpace; #elif defined(KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SERIAL) typedef Serial DefaultHostExecutionSpace; #elif defined(KOKKOS_ENABLE_OPENMP) typedef OpenMP DefaultHostExecutionSpace; #elif defined(KOKKOS_ENABLE_THREADS) typedef Threads DefaultHostExecutionSpace; -//#elif defined( KOKKOS_ENABLE_QTHREADS ) -// typedef Qthreads DefaultHostExecutionSpace; #elif defined(KOKKOS_ENABLE_HPX) typedef Kokkos::Experimental::HPX DefaultHostExecutionSpace; #elif defined(KOKKOS_ENABLE_SERIAL) typedef Serial DefaultHostExecutionSpace; #else #error \ - "At least one of the following execution spaces must be defined in order to use Kokkos: Kokkos::OpenMP, Kokkos::Threads, Kokkos::Qthreads, or Kokkos::Serial." + "At least one of the following execution spaces must be defined in order to use Kokkos: Kokkos::OpenMP, Kokkos::Threads, Kokkos::Experimental::HPX, or Kokkos::Serial." #endif } // namespace Kokkos @@ -213,6 +209,8 @@ namespace Impl { typedef Kokkos::CudaSpace ActiveExecutionMemorySpace; #elif defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_ROCM_GPU) typedef Kokkos::HostSpace ActiveExecutionMemorySpace; +#elif defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HIP_GPU) +typedef Kokkos::Experimental::HIPSpace ActiveExecutionMemorySpace; #elif defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST) typedef Kokkos::HostSpace ActiveExecutionMemorySpace; #else diff --git a/core/src/Kokkos_Crs.hpp b/core/src/Kokkos_Crs.hpp index f57863263b6..3725ba2604a 100644 --- a/core/src/Kokkos_Crs.hpp +++ b/core/src/Kokkos_Crs.hpp @@ -105,12 +105,12 @@ class Crs { /* * Default Constructors, operators and destructor */ - KOKKOS_FUNCTION Crs() = default; - KOKKOS_FUNCTION Crs(Crs const&) = default; - KOKKOS_FUNCTION Crs(Crs&&) = default; - KOKKOS_FUNCTION Crs& operator=(Crs const&) = default; - KOKKOS_FUNCTION Crs& operator=(Crs&&) = default; - KOKKOS_FUNCTION ~Crs() = default; + KOKKOS_DEFAULTED_FUNCTION Crs() = default; + KOKKOS_DEFAULTED_FUNCTION Crs(Crs const&) = default; + KOKKOS_DEFAULTED_FUNCTION Crs(Crs&&) = default; + KOKKOS_DEFAULTED_FUNCTION Crs& operator=(Crs const&) = default; + KOKKOS_DEFAULTED_FUNCTION Crs& operator=(Crs&&) = default; + KOKKOS_DEFAULTED_FUNCTION ~Crs() = default; /** \brief Assign to a view of the rhs array. * If the old view is the last view @@ -313,7 +313,7 @@ struct CountAndFillBase; template struct CountAndFillBase { - using data_type = typename CrsType::size_type; + using data_type = typename CrsType::data_type; using size_type = typename CrsType::size_type; using row_map_type = typename CrsType::row_map_type; using counts_type = row_map_type; @@ -343,7 +343,7 @@ struct CountAndFillBase { #if defined(KOKKOS_ENABLE_CUDA) template struct CountAndFillBase { - using data_type = typename CrsType::size_type; + using data_type = typename CrsType::data_type; using size_type = typename CrsType::size_type; using row_map_type = typename CrsType::row_map_type; using counts_type = row_map_type; diff --git a/core/src/Kokkos_Cuda.hpp b/core/src/Kokkos_Cuda.hpp index ad62ecf3835..ed51e95778f 100644 --- a/core/src/Kokkos_Cuda.hpp +++ b/core/src/Kokkos_Cuda.hpp @@ -199,8 +199,7 @@ class Cuda { //-------------------------------------------------- //! \name Cuda space instances - KOKKOS_INLINE_FUNCTION - ~Cuda() {} + ~Cuda() = default; Cuda(); @@ -258,6 +257,7 @@ class Cuda { cudaStream_t cuda_stream() const; int cuda_device() const; + const cudaDeviceProp& cuda_device_prop() const; //@} //-------------------------------------------------------------------------- @@ -267,11 +267,21 @@ class Cuda { inline Impl::CudaInternal* impl_internal_space_instance() const { return m_space_instance; } + uint32_t impl_instance_id() const noexcept { return 0; } private: Impl::CudaInternal* m_space_instance; }; +namespace Profiling { +namespace Experimental { +template <> +struct DeviceTypeTraits { + /// \brief An ID to differentiate (for example) Serial from OpenMP in Tooling + static constexpr DeviceType id = DeviceType::Cuda; +}; +} // namespace Experimental +} // namespace Profiling } // namespace Kokkos /*--------------------------------------------------------------------------*/ diff --git a/core/src/Kokkos_CudaSpace.hpp b/core/src/Kokkos_CudaSpace.hpp index a320aea2f8a..53e3b777864 100644 --- a/core/src/Kokkos_CudaSpace.hpp +++ b/core/src/Kokkos_CudaSpace.hpp @@ -130,7 +130,7 @@ int* atomic_lock_array_cuda_space_ptr(bool deallocate = false); /// global memory. /// /// Team and Thread private scratch allocations in -/// global memory are aquired via locks. +/// global memory are acquired via locks. /// This function retrieves the lock array pointer. /// If the array is not yet allocated it will do so. int* scratch_lock_array_cuda_space_ptr(bool deallocate = false); @@ -166,7 +166,7 @@ class CudaUVMSpace { /*--------------------------------*/ /** \brief CudaUVMSpace specific routine */ - static int number_of_allocations(); + KOKKOS_DEPRECATED static int number_of_allocations(); /*--------------------------------*/ @@ -401,6 +401,126 @@ struct DeepCopy { DeepCopy(const Cuda&, void* dst, const void* src, size_t); }; +template <> +struct DeepCopy { + DeepCopy(void* dst, const void* src, size_t n) { + (void)DeepCopy(dst, src, n); + } + DeepCopy(const Cuda& instance, void* dst, const void* src, size_t n) { + (void)DeepCopy(instance, dst, src, n); + } +}; + +template <> +struct DeepCopy { + DeepCopy(void* dst, const void* src, size_t n) { + (void)DeepCopy(dst, src, n); + } + DeepCopy(const Cuda& instance, void* dst, const void* src, size_t n) { + (void)DeepCopy(instance, dst, src, n); + } +}; + +template <> +struct DeepCopy { + DeepCopy(void* dst, const void* src, size_t n) { + (void)DeepCopy(dst, src, n); + } + DeepCopy(const Cuda& instance, void* dst, const void* src, size_t n) { + (void)DeepCopy(instance, dst, src, n); + } +}; + +template <> +struct DeepCopy { + DeepCopy(void* dst, const void* src, size_t n) { + (void)DeepCopy(dst, src, n); + } + DeepCopy(const Cuda& instance, void* dst, const void* src, size_t n) { + (void)DeepCopy(instance, dst, src, n); + } +}; + +template <> +struct DeepCopy { + DeepCopy(void* dst, const void* src, size_t n) { + (void)DeepCopy(dst, src, n); + } + DeepCopy(const Cuda& instance, void* dst, const void* src, size_t n) { + (void)DeepCopy(instance, dst, src, n); + } +}; + +template <> +struct DeepCopy { + DeepCopy(void* dst, const void* src, size_t n) { + (void)DeepCopy(dst, src, n); + } + DeepCopy(const Cuda& instance, void* dst, const void* src, size_t n) { + (void)DeepCopy(instance, dst, src, n); + } +}; + +template <> +struct DeepCopy { + DeepCopy(void* dst, const void* src, size_t n) { + (void)DeepCopy(dst, src, n); + } + DeepCopy(const Cuda& instance, void* dst, const void* src, size_t n) { + (void)DeepCopy(instance, dst, src, n); + } +}; + +template <> +struct DeepCopy { + DeepCopy(void* dst, const void* src, size_t n) { + (void)DeepCopy(dst, src, n); + } + DeepCopy(const Cuda& instance, void* dst, const void* src, size_t n) { + (void)DeepCopy(instance, dst, src, n); + } +}; + +template <> +struct DeepCopy { + DeepCopy(void* dst, const void* src, size_t n) { + (void)DeepCopy(dst, src, n); + } + DeepCopy(const Cuda& instance, void* dst, const void* src, size_t n) { + (void)DeepCopy(instance, dst, src, n); + } +}; + +template <> +struct DeepCopy { + DeepCopy(void* dst, const void* src, size_t n) { + (void)DeepCopy(dst, src, n); + } + DeepCopy(const Cuda& instance, void* dst, const void* src, size_t n) { + (void)DeepCopy(instance, dst, src, n); + } +}; + +template <> +struct DeepCopy { + DeepCopy(void* dst, const void* src, size_t n) { + (void)DeepCopy(dst, src, n); + } + DeepCopy(const Cuda& instance, void* dst, const void* src, size_t n) { + (void)DeepCopy(instance, dst, src, n); + } +}; + +template <> +struct DeepCopy { + DeepCopy(void* dst, const void* src, size_t n) { + (void)DeepCopy(dst, src, n); + } + DeepCopy(const Cuda& instance, void* dst, const void* src, size_t n) { + (void)DeepCopy(instance, dst, src, n); + } +}; + template struct DeepCopy { inline DeepCopy(void* dst, const void* src, size_t n) { @@ -640,8 +760,8 @@ struct VerifyExecutionCanAccessMemorySpace struct VerifyExecutionCanAccessMemorySpace< - typename enable_if::value, - Kokkos::CudaSpace>::type, + typename std::enable_if::value, + Kokkos::CudaSpace>::type, OtherSpace> { enum { value = false }; KOKKOS_INLINE_FUNCTION static void verify(void) { diff --git a/core/src/Kokkos_ExecPolicy.hpp b/core/src/Kokkos_ExecPolicy.hpp index bf3a134b64b..11910138d33 100644 --- a/core/src/Kokkos_ExecPolicy.hpp +++ b/core/src/Kokkos_ExecPolicy.hpp @@ -193,6 +193,7 @@ class RangePolicy : public Impl::PolicyTraits { inline void set(const ChunkSize& chunksize, Args... args) { m_granularity = chunksize.value; m_granularity_mask = m_granularity - 1; + set(args...); } public: @@ -210,8 +211,8 @@ class RangePolicy : public Impl::PolicyTraits { private: /** \brief finalize chunk_size if it was set to AUTO*/ inline void set_auto_chunk_size() { - typename traits::index_type concurrency = - traits::execution_space::concurrency(); + int64_t concurrency = + static_cast(traits::execution_space::concurrency()); if (concurrency == 0) concurrency = 1; if (m_granularity > 0) { @@ -219,12 +220,14 @@ class RangePolicy : public Impl::PolicyTraits { Kokkos::abort("RangePolicy blocking granularity must be power of two"); } - member_type new_chunk_size = 1; - while (new_chunk_size * 100 * concurrency < m_end - m_begin) + int64_t new_chunk_size = 1; + while (new_chunk_size * 100 * concurrency < + static_cast(m_end - m_begin)) new_chunk_size *= 2; if (new_chunk_size < 128) { new_chunk_size = 1; - while ((new_chunk_size * 40 * concurrency < m_end - m_begin) && + while ((new_chunk_size * 40 * concurrency < + static_cast(m_end - m_begin)) && (new_chunk_size < 128)) new_chunk_size *= 2; } @@ -483,7 +486,6 @@ struct ScratchRequest { level = level_; per_team = 0; per_thread = thread_value.value; - ; } inline ScratchRequest(const int& level_, const Impl::PerTeamValue& team_value, @@ -491,7 +493,6 @@ struct ScratchRequest { level = level_; per_team = team_value.value; per_thread = thread_value.value; - ; } inline ScratchRequest(const int& level_, @@ -500,7 +501,6 @@ struct ScratchRequest { level = level_; per_team = team_value.value; per_thread = thread_value.value; - ; } }; diff --git a/core/src/Kokkos_Extents.hpp b/core/src/Kokkos_Extents.hpp index 2e07e8b76b6..856adf9cf97 100644 --- a/core/src/Kokkos_Extents.hpp +++ b/core/src/Kokkos_Extents.hpp @@ -97,36 +97,39 @@ struct _parse_impl { // backwards template struct _parse_impl< - T*, Experimental::Extents, + T*, Kokkos::Experimental::Extents, typename std::enable_if<_all_remaining_extents_dynamic::value>::type> - : _parse_impl> {}; + : _parse_impl> { +}; // int*(*[x])[y] should still work also (meaning int[][x][][y]) template -struct _parse_impl, - typename std::enable_if< - not _all_remaining_extents_dynamic::value>::type> { +struct _parse_impl< + T*, Kokkos::Experimental::Extents, + typename std::enable_if::value>::type> { using _next = Kokkos::Experimental::AppendExtent< - typename _parse_impl, void>::type, - Experimental::dynamic_extent>; + typename _parse_impl, + void>::type, + Kokkos::Experimental::dynamic_extent>; using type = typename _next::type; }; template -struct _parse_impl, void> - : _parse_impl, void> + : _parse_impl< + T, Kokkos::Experimental::Extents // TODO @pedantic this // could be a // narrowing cast - > {}; + > {}; } // end namespace _parse_view_extents_impl template struct ParseViewExtents { using type = typename _parse_view_extents_impl ::_parse_impl< - DataType, Experimental::Extents<>>::type; + DataType, Kokkos::Experimental::Extents<>>::type; }; template @@ -135,7 +138,7 @@ struct ApplyExtent { }; template -struct ApplyExtent { +struct ApplyExtent { using type = ValueType*; }; @@ -150,15 +153,17 @@ struct ApplyExtent { }; template -struct ApplyExtent { +struct ApplyExtent { using type = - typename ApplyExtent::type*; + typename ApplyExtent::type*; }; template -struct ApplyExtent { +struct ApplyExtent { using type = - typename ApplyExtent::type[N]; + typename ApplyExtent::type[N]; }; } // end namespace Impl diff --git a/core/src/Kokkos_Future.hpp b/core/src/Kokkos_Future.hpp index 15a5d39aad9..1995e2609a8 100644 --- a/core/src/Kokkos_Future.hpp +++ b/core/src/Kokkos_Future.hpp @@ -296,7 +296,8 @@ class BasicFuture { task_base* m_task; - KOKKOS_INLINE_FUNCTION explicit BasicFuture(task_base* task) : m_task(0) { + KOKKOS_INLINE_FUNCTION explicit BasicFuture(task_base* task) + : m_task(nullptr) { if (task) queue_type::assign(&m_task, task); } @@ -306,7 +307,7 @@ class BasicFuture { //---------------------------------------- KOKKOS_INLINE_FUNCTION - bool is_null() const { return 0 == m_task; } + bool is_null() const { return nullptr == m_task; } KOKKOS_INLINE_FUNCTION int reference_count() const { @@ -317,7 +318,7 @@ class BasicFuture { KOKKOS_INLINE_FUNCTION void clear() { - if (m_task) queue_type::assign(&m_task, (task_base*)0); + if (m_task) queue_type::assign(&m_task, nullptr); } //---------------------------------------- @@ -332,11 +333,11 @@ class BasicFuture { KOKKOS_INLINE_FUNCTION BasicFuture(BasicFuture&& rhs) noexcept : m_task(rhs.m_task) { - rhs.m_task = 0; + rhs.m_task = nullptr; } KOKKOS_INLINE_FUNCTION - BasicFuture(const BasicFuture& rhs) : m_task(0) { + BasicFuture(const BasicFuture& rhs) : m_task(nullptr) { if (rhs.m_task) queue_type::assign(&m_task, rhs.m_task); } @@ -344,7 +345,7 @@ class BasicFuture { BasicFuture& operator=(BasicFuture&& rhs) noexcept { clear(); m_task = rhs.m_task; - rhs.m_task = 0; + rhs.m_task = nullptr; return *this; } @@ -420,13 +421,13 @@ class BasicFuture { KOKKOS_INLINE_FUNCTION int is_ready() const noexcept { - return (0 == m_task) || + return (nullptr == m_task) || (((task_base*)task_base::LockTag) == m_task->m_wait); } KOKKOS_INLINE_FUNCTION const typename Impl::TaskResult::reference_type get() const { - if (0 == m_task) { + if (nullptr == m_task) { Kokkos::abort("Kokkos:::Future::get ERROR: is_null()"); } return Impl::TaskResult::get(m_task); diff --git a/core/src/Kokkos_HBWSpace.hpp b/core/src/Kokkos_HBWSpace.hpp index 357dcc90144..ce36b018cf4 100644 --- a/core/src/Kokkos_HBWSpace.hpp +++ b/core/src/Kokkos_HBWSpace.hpp @@ -64,10 +64,10 @@ namespace Impl { /// This function initializes the locks to zero (unset). void init_lock_array_hbw_space(); -/// \brief Aquire a lock for the address +/// \brief Acquire a lock for the address /// -/// This function tries to aquire the lock for the hash value derived -/// from the provided ptr. If the lock is successfully aquired the +/// This function tries to acquire the lock for the hash value derived +/// from the provided ptr. If the lock is successfully acquired the /// function returns true. Otherwise it returns false. bool lock_address_hbw_space(void* ptr); @@ -75,7 +75,7 @@ bool lock_address_hbw_space(void* ptr); /// /// This function releases the lock for the hash value derived /// from the provided ptr. This function should only be called -/// after previously successfully aquiring a lock with +/// after previously successfully acquiring a lock with /// lock_address. void unlock_address_hbw_space(void* ptr); @@ -110,19 +110,15 @@ class HBWSpace { typedef Kokkos::OpenMP execution_space; #elif defined(KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS) typedef Kokkos::Threads execution_space; -//#elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_QTHREADS ) -// typedef Kokkos::Qthreads execution_space; #elif defined(KOKKOS_ENABLE_OPENMP) typedef Kokkos::OpenMP execution_space; #elif defined(KOKKOS_ENABLE_THREADS) typedef Kokkos::Threads execution_space; -//#elif defined( KOKKOS_ENABLE_QTHREADS ) -// typedef Kokkos::Qthreads execution_space; #elif defined(KOKKOS_ENABLE_SERIAL) typedef Kokkos::Serial execution_space; #else #error \ - "At least one of the following host execution spaces must be defined: Kokkos::OpenMP, Kokkos::Threads, Kokkos::Qhreads, or Kokkos::Serial. You might be seeing this message if you disabled the Kokkos::Serial device explicitly using the Kokkos_ENABLE_Serial:BOOL=OFF CMake option, but did not enable any of the other host execution space devices." + "At least one of the following host execution spaces must be defined: Kokkos::OpenMP, Kokkos::Threads, or Kokkos::Serial. You might be seeing this message if you disabled the Kokkos::Serial device explicitly using the Kokkos_ENABLE_Serial:BOOL=OFF CMake option, but did not enable any of the other host execution space devices." #endif //! This memory space preferred device_type @@ -192,7 +188,12 @@ class SharedAllocationRecord const Kokkos::Experimental::HBWSpace m_space; protected: - ~SharedAllocationRecord(); + ~SharedAllocationRecord() +#if defined( \ + KOKKOS_IMPL_INTEL_WORKAROUND_NOEXCEPT_SPECIFICATION_VIRTUAL_FUNCTION) + noexcept +#endif + ; SharedAllocationRecord() = default; SharedAllocationRecord( diff --git a/core/src/Kokkos_HIP.hpp b/core/src/Kokkos_HIP.hpp new file mode 100644 index 00000000000..4e9325c2d28 --- /dev/null +++ b/core/src/Kokkos_HIP.hpp @@ -0,0 +1,65 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_HIP_HPP +#define KOKKOS_HIP_HPP + +#include + +#if defined(KOKKOS_ENABLE_HIP) + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#include +#include +#include + +#include +#include +#include +#include + +#endif +#endif diff --git a/core/src/Kokkos_HIP_Space.hpp b/core/src/Kokkos_HIP_Space.hpp new file mode 100644 index 00000000000..90bdb7b9130 --- /dev/null +++ b/core/src/Kokkos_HIP_Space.hpp @@ -0,0 +1,758 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_HIPSPACE_HPP +#define KOKKOS_HIPSPACE_HPP + +#include + +#if defined(KOKKOS_ENABLE_HIP) + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +#include +/*--------------------------------------------------------------------------*/ + +namespace Kokkos { +namespace Experimental { +/** \brief HIP on-device memory management */ + +class HIPSpace { + public: + //! Tag this class as a kokkos memory space + using memory_space = HIPSpace; + using execution_space = Kokkos::Experimental::HIP; + using device_type = Kokkos::Device; + + using size_type = unsigned int; + + /*--------------------------------*/ + + HIPSpace(); + HIPSpace(HIPSpace&& rhs) = default; + HIPSpace(const HIPSpace& rhs) = default; + HIPSpace& operator=(HIPSpace&& rhs) = default; + HIPSpace& operator=(const HIPSpace& rhs) = default; + ~HIPSpace() = default; + + /**\brief Allocate untracked memory in the hip space */ + void* allocate(const size_t arg_alloc_size) const; + + /**\brief Deallocate untracked memory in the hip space */ + void deallocate(void* const arg_alloc_ptr, const size_t arg_alloc_size) const; + + /**\brief Return Name of the MemorySpace */ + static constexpr const char* name() { return "HIP"; } + + /*--------------------------------*/ + /** \brief Error reporting for HostSpace attempt to access HIPSpace */ + static void access_error(); + static void access_error(const void* const); + + private: + int m_device; ///< Which HIP device + + friend class Kokkos::Impl::SharedAllocationRecord< + Kokkos::Experimental::HIPSpace, void>; +}; + +} // namespace Experimental + +namespace Impl { + +/// \brief Initialize lock array for arbitrary size atomics. +/// +/// Arbitrary atomics are implemented using a hash table of locks +/// where the hash value is derived from the address of the +/// object for which an atomic operation is performed. +/// This function initializes the locks to zero (unset). +void init_lock_arrays_hip_space(); + +/// \brief Retrieve the pointer to the lock array for arbitrary size atomics. +/// +/// Arbitrary atomics are implemented using a hash table of locks +/// where the hash value is derived from the address of the +/// object for which an atomic operation is performed. +/// This function retrieves the lock array pointer. +/// If the array is not yet allocated it will do so. +int* atomic_lock_array_hip_space_ptr(bool deallocate = false); + +/// \brief Retrieve the pointer to the scratch array for team and thread private +/// global memory. +/// +/// Team and Thread private scratch allocations in +/// global memory are acquired via locks. +/// This function retrieves the lock array pointer. +/// If the array is not yet allocated it will do so. +int* scratch_lock_array_hip_space_ptr(bool deallocate = false); + +/// \brief Retrieve the pointer to the scratch array for unique identifiers. +/// +/// Unique identifiers in the range 0-HIP::concurrency +/// are provided via locks. +/// This function retrieves the lock array pointer. +/// If the array is not yet allocated it will do so. +int* threadid_lock_array_hip_space_ptr(bool deallocate = false); +} // namespace Impl +} // namespace Kokkos + +/*--------------------------------------------------------------------------*/ +/*--------------------------------------------------------------------------*/ + +namespace Kokkos { +namespace Experimental { +/** \brief Host memory that is accessible to HIP execution space + * through HIP's host-pinned memory allocation. + */ +class HIPHostPinnedSpace { + public: + //! Tag this class as a kokkos memory space + /** \brief Memory is in HostSpace so use the HostSpace::execution_space */ + using execution_space = HostSpace::execution_space; + using memory_space = HIPHostPinnedSpace; + using device_type = Kokkos::Device; + using size_type = unsigned int; + + /*--------------------------------*/ + + HIPHostPinnedSpace(); + HIPHostPinnedSpace(HIPHostPinnedSpace&& rhs) = default; + HIPHostPinnedSpace(const HIPHostPinnedSpace& rhs) = default; + HIPHostPinnedSpace& operator=(HIPHostPinnedSpace&& rhs) = default; + HIPHostPinnedSpace& operator=(const HIPHostPinnedSpace& rhs) = default; + ~HIPHostPinnedSpace() = default; + + /**\brief Allocate untracked memory in the space */ + void* allocate(const size_t arg_alloc_size) const; + + /**\brief Deallocate untracked memory in the space */ + void deallocate(void* const arg_alloc_ptr, const size_t arg_alloc_size) const; + + /**\brief Return Name of the MemorySpace */ + static constexpr const char* name() { return "HIPHostPinned"; } + + /*--------------------------------*/ +}; +} // namespace Experimental +} // namespace Kokkos + +/*--------------------------------------------------------------------------*/ +/*--------------------------------------------------------------------------*/ + +namespace Kokkos { +namespace Impl { + +static_assert( + Kokkos::Impl::MemorySpaceAccess::assignable, + ""); + +//---------------------------------------- + +template <> +struct MemorySpaceAccess { + enum { assignable = false }; + enum { accessible = false }; + enum { deepcopy = true }; +}; + +template <> +struct MemorySpaceAccess { + // HostSpace::execution_space == HIPHostPinnedSpace::execution_space + enum { assignable = true }; + enum { accessible = true }; + enum { deepcopy = true }; +}; + +//---------------------------------------- + +template <> +struct MemorySpaceAccess { + enum { assignable = false }; + enum { accessible = false }; + enum { deepcopy = true }; +}; + +template <> +struct MemorySpaceAccess { + // HIPSpace::execution_space != HIPHostPinnedSpace::execution_space + enum { assignable = false }; + enum { accessible = true }; // HIPSpace::execution_space + enum { deepcopy = true }; +}; + +//---------------------------------------- +// HIPHostPinnedSpace::execution_space == HostSpace::execution_space +// HIPHostPinnedSpace accessible to both HIP and Host + +template <> +struct MemorySpaceAccess { + enum { assignable = false }; // Cannot access from HIP + enum { accessible = true }; // HIPHostPinnedSpace::execution_space + enum { deepcopy = true }; +}; + +template <> +struct MemorySpaceAccess { + enum { assignable = false }; // Cannot access from Host + enum { accessible = false }; + enum { deepcopy = true }; +}; + +}; // namespace Impl +//---------------------------------------- + +} // namespace Kokkos + +/*--------------------------------------------------------------------------*/ +/*--------------------------------------------------------------------------*/ + +namespace Kokkos { +namespace Impl { + +void DeepCopyAsyncHIP(void* dst, const void* src, size_t n); + +template <> +struct DeepCopy { + DeepCopy(void* dst, const void* src, size_t); + DeepCopy(const Kokkos::Experimental::HIP&, void* dst, const void* src, + size_t); +}; + +template <> +struct DeepCopy { + DeepCopy(void* dst, const void* src, size_t); + DeepCopy(const Kokkos::Experimental::HIP&, void* dst, const void* src, + size_t); +}; + +template <> +struct DeepCopy { + DeepCopy(void* dst, const void* src, size_t); + DeepCopy(const Kokkos::Experimental::HIP&, void* dst, const void* src, + size_t); +}; + +template +struct DeepCopy { + inline DeepCopy(void* dst, const void* src, size_t n) { + (void)DeepCopy( + dst, src, n); + } + + inline DeepCopy(const ExecutionSpace& exec, void* dst, const void* src, + size_t n) { + exec.fence(); + DeepCopyAsyncHIP(dst, src, n); + } +}; + +template +struct DeepCopy { + inline DeepCopy(void* dst, const void* src, size_t n) { + (void)DeepCopy(dst, src, n); + } + + inline DeepCopy(const ExecutionSpace& exec, void* dst, const void* src, + size_t n) { + exec.fence(); + DeepCopyAsyncHIP(dst, src, n); + } +}; + +template +struct DeepCopy { + inline DeepCopy(void* dst, const void* src, size_t n) { + (void)DeepCopy(dst, src, n); + } + + inline DeepCopy(const ExecutionSpace& exec, void* dst, const void* src, + size_t n) { + exec.fence(); + DeepCopyAsyncHIP(dst, src, n); + } +}; + +template <> +struct DeepCopy { + DeepCopy(void* dst, const void* src, size_t); + DeepCopy(const Kokkos::Experimental::HIP&, void* dst, const void* src, + size_t); +}; + +template <> +struct DeepCopy { + DeepCopy(void* dst, const void* src, size_t); + DeepCopy(const Kokkos::Experimental::HIP&, void* dst, const void* src, + size_t); +}; + +template <> +struct DeepCopy { + DeepCopy(void* dst, const void* src, size_t); + DeepCopy(const Kokkos::Experimental::HIP&, void* dst, const void* src, + size_t); +}; + +template +struct DeepCopy { + inline DeepCopy(void* dst, const void* src, size_t n) { + (void)DeepCopy(dst, src, n); + } + + inline DeepCopy(const ExecutionSpace& exec, void* dst, const void* src, + size_t n) { + exec.fence(); + DeepCopyAsyncHIP(dst, src, n); + } +}; + +template +struct DeepCopy { + inline DeepCopy(void* dst, const void* src, size_t n) { + (void)DeepCopy(dst, src, n); + } + + inline DeepCopy(const ExecutionSpace& exec, void* dst, const void* src, + size_t n) { + exec.fence(); + DeepCopyAsyncHIP(dst, src, n); + } +}; + +template +struct DeepCopy { + inline DeepCopy(void* dst, const void* src, size_t n) { + (void)DeepCopy(dst, src, n); + } + + inline DeepCopy(const ExecutionSpace& exec, void* dst, const void* src, + size_t n) { + exec.fence(); + DeepCopyAsyncHIP(dst, src, n); + } +}; + +template +struct DeepCopy { + inline DeepCopy(void* dst, const void* src, size_t n) { + (void)DeepCopy(dst, src, n); + } + + inline DeepCopy(const ExecutionSpace& exec, void* dst, const void* src, + size_t n) { + exec.fence(); + DeepCopyAsyncHIP(dst, src, n); + } +}; + +template +struct DeepCopy { + inline DeepCopy(void* dst, const void* src, size_t n) { + (void)DeepCopy(dst, src, n); + } + + inline DeepCopy(const ExecutionSpace& exec, void* dst, const void* src, + size_t n) { + exec.fence(); + DeepCopyAsyncHIP(dst, src, n); + } +}; +} // namespace Impl +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +/** Running in HIPSpace attempting to access HostSpace: error */ +template <> +struct VerifyExecutionCanAccessMemorySpace { + enum { value = false }; + KOKKOS_INLINE_FUNCTION static void verify(void) { + Kokkos::abort("HIP code attempted to access HostSpace memory"); + } + + KOKKOS_INLINE_FUNCTION static void verify(const void*) { + Kokkos::abort("HIP code attempted to access HostSpace memory"); + } +}; + +/** Running in HIPSpace accessing HIPHostPinnedSpace: ok */ +template <> +struct VerifyExecutionCanAccessMemorySpace< + Kokkos::Experimental::HIPSpace, Kokkos::Experimental::HIPHostPinnedSpace> { + enum { value = true }; + KOKKOS_INLINE_FUNCTION static void verify(void) {} + KOKKOS_INLINE_FUNCTION static void verify(const void*) {} +}; + +/** Running in HIPSpace attempting to access an unknown space: error */ +template +struct VerifyExecutionCanAccessMemorySpace< + typename std::enable_if< + !std::is_same::value, + Kokkos::Experimental::HIPSpace>::type, + OtherSpace> { + enum { value = false }; + KOKKOS_INLINE_FUNCTION static void verify(void) { + Kokkos::abort("HIP code attempted to access unknown Space memory"); + } + + KOKKOS_INLINE_FUNCTION static void verify(const void*) { + Kokkos::abort("HIP code attempted to access unknown Space memory"); + } +}; + +//---------------------------------------------------------------------------- +/** Running in HostSpace attempting to access HIPSpace */ +template <> +struct VerifyExecutionCanAccessMemorySpace { + enum { value = false }; + inline static void verify(void) { + Kokkos::Experimental::HIPSpace::access_error(); + } + inline static void verify(const void* p) { + Kokkos::Experimental::HIPSpace::access_error(p); + } +}; + +/** Running in HostSpace accessing HIPHostPinnedSpace is OK */ +template <> +struct VerifyExecutionCanAccessMemorySpace< + Kokkos::HostSpace, Kokkos::Experimental::HIPHostPinnedSpace> { + enum { value = true }; + KOKKOS_INLINE_FUNCTION static void verify(void) {} + KOKKOS_INLINE_FUNCTION static void verify(const void*) {} +}; +} // namespace Impl +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +template <> +class SharedAllocationRecord + : public SharedAllocationRecord { + private: + typedef SharedAllocationRecord RecordBase; + + SharedAllocationRecord(const SharedAllocationRecord&) = delete; + SharedAllocationRecord& operator=(const SharedAllocationRecord&) = delete; + + static void deallocate(RecordBase*); + +#ifdef KOKKOS_DEBUG + static RecordBase s_root_record; +#endif + + const Kokkos::Experimental::HIPSpace m_space; + + protected: + ~SharedAllocationRecord(); + + SharedAllocationRecord( + const Kokkos::Experimental::HIPSpace& arg_space, + const std::string& arg_label, const size_t arg_alloc_size, + const RecordBase::function_type arg_dealloc = &deallocate); + + public: + std::string get_label() const; + + static SharedAllocationRecord* allocate( + const Kokkos::Experimental::HIPSpace& arg_space, + const std::string& arg_label, const size_t arg_alloc_size); + + /**\brief Allocate tracked memory in the space */ + static void* allocate_tracked(const Kokkos::Experimental::HIPSpace& arg_space, + const std::string& arg_label, + const size_t arg_alloc_size); + + /**\brief Reallocate tracked memory in the space */ + static void* reallocate_tracked(void* const arg_alloc_ptr, + const size_t arg_alloc_size); + + /**\brief Deallocate tracked memory in the space */ + static void deallocate_tracked(void* const arg_alloc_ptr); + + static SharedAllocationRecord* get_record(void* arg_alloc_ptr); + + static void print_records(std::ostream&, + const Kokkos::Experimental::HIPSpace&, + bool detail = false); +}; + +template <> +class SharedAllocationRecord + : public SharedAllocationRecord { + private: + typedef SharedAllocationRecord RecordBase; + + SharedAllocationRecord(const SharedAllocationRecord&) = delete; + SharedAllocationRecord& operator=(const SharedAllocationRecord&) = delete; + + static void deallocate(RecordBase*); + +#ifdef KOKKOS_DEBUG + static RecordBase s_root_record; +#endif + + const Kokkos::Experimental::HIPHostPinnedSpace m_space; + + protected: + ~SharedAllocationRecord(); + SharedAllocationRecord() : RecordBase(), m_space() {} + + SharedAllocationRecord( + const Kokkos::Experimental::HIPHostPinnedSpace& arg_space, + const std::string& arg_label, const size_t arg_alloc_size, + const RecordBase::function_type arg_dealloc = &deallocate); + + public: + std::string get_label() const; + + static SharedAllocationRecord* allocate( + const Kokkos::Experimental::HIPHostPinnedSpace& arg_space, + const std::string& arg_label, const size_t arg_alloc_size); + /**\brief Allocate tracked memory in the space */ + static void* allocate_tracked( + const Kokkos::Experimental::HIPHostPinnedSpace& arg_space, + const std::string& arg_label, const size_t arg_alloc_size); + + /**\brief Reallocate tracked memory in the space */ + static void* reallocate_tracked(void* const arg_alloc_ptr, + const size_t arg_alloc_size); + + /**\brief Deallocate tracked memory in the space */ + static void deallocate_tracked(void* const arg_alloc_ptr); + + static SharedAllocationRecord* get_record(void* arg_alloc_ptr); + + static void print_records(std::ostream&, + const Kokkos::Experimental::HIPHostPinnedSpace&, + bool detail = false); +}; +} // namespace Impl +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Experimental { +namespace Impl { +class HIPInternal; +} +/// \class HIP +/// \brief Kokkos device for multicore processors in the host memory space. +class HIP { + public: + //------------------------------------ + //! \name Type declarations that all Kokkos devices must provide. + //@{ + + //! Tag this class as a kokkos execution space + using execution_space = HIP; + using memory_space = HIPSpace; + using device_type = Kokkos::Device; + + using array_layout = LayoutLeft; + using size_type = HIPSpace::size_type; + + using scratch_memory_space = ScratchMemorySpace; + + ~HIP() = default; + HIP(); + // explicit HIP( const int instance_id ); + + HIP(HIP&&) = default; + HIP(const HIP&) = default; + HIP& operator=(HIP&&) = default; + HIP& operator=(const HIP&) = default; + + //@} + //------------------------------------ + //! \name Functions that all Kokkos devices must implement. + //@{ + + KOKKOS_INLINE_FUNCTION static int in_parallel() { +#if defined(__HIP_ARCH__) + return true; +#else + return false; +#endif + } + + /** \brief Wait until all dispatched functors complete. A noop for OpenMP. */ + static void impl_static_fence(); + void fence() const; + + /// \brief Print configuration information to the given output stream. + static void print_configuration(std::ostream&, const bool detail = false); + + /// \brief Free any resources being consumed by the device. + static void impl_finalize(); + + /** \brief Initialize the device. + * + */ + struct SelectDevice { + int hip_device_id; + SelectDevice() : hip_device_id(0) {} + explicit SelectDevice(int id) : hip_device_id(id) {} + }; + + int hip_device() const; + + static void impl_initialize(const SelectDevice = SelectDevice()); + + static int impl_is_initialized(); + + // static size_type device_arch(); + + // static size_type detect_device_count(); + + static int concurrency(); + static const char* name(); + + inline Impl::HIPInternal* impl_internal_space_instance() const { + return m_space_instance; + } + + uint32_t impl_instance_id() const noexcept { return 0; } + + private: + Impl::HIPInternal* m_space_instance; +}; +} // namespace Experimental +namespace Profiling { +namespace Experimental { +template <> +struct DeviceTypeTraits { + static constexpr DeviceType id = DeviceType::HIP; +}; +} // namespace Experimental +} // namespace Profiling +} // namespace Kokkos + +namespace Kokkos { +namespace Impl { + +template <> +struct MemorySpaceAccess { + enum { assignable = false }; + enum { accessible = true }; + enum { deepcopy = false }; +}; + +template <> +struct VerifyExecutionCanAccessMemorySpace< + Kokkos::Experimental::HIP::memory_space, + Kokkos::Experimental::HIP::scratch_memory_space> { + enum { value = true }; + KOKKOS_INLINE_FUNCTION static void verify(void) {} + KOKKOS_INLINE_FUNCTION static void verify(const void*) {} +}; + +template <> +struct VerifyExecutionCanAccessMemorySpace< + Kokkos::HostSpace, Kokkos::Experimental::HIP::scratch_memory_space> { + enum { value = false }; + inline static void verify(void) { + Kokkos::Experimental::HIPSpace::access_error(); + } + inline static void verify(const void* p) { + Kokkos::Experimental::HIPSpace::access_error(p); + } +}; + +} // namespace Impl +} // namespace Kokkos + +#endif /* #if defined( KOKKOS_ENABLE_HIP ) */ +#endif /* #define KOKKOS_HIPSPACE_HPP */ diff --git a/core/src/Kokkos_HPX.hpp b/core/src/Kokkos_HPX.hpp index 46101c824f2..10354635c58 100644 --- a/core/src/Kokkos_HPX.hpp +++ b/core/src/Kokkos_HPX.hpp @@ -83,6 +83,7 @@ #include #include #include +#include #include #include @@ -194,6 +195,7 @@ class HPX { const bool /* verbose */ = false) { std::cout << "HPX backend" << std::endl; } + uint32_t impl_instance_id() const noexcept { return 0; } static bool in_parallel(HPX const & = HPX()) noexcept { return false; } static void impl_static_fence(HPX const & = HPX()) @@ -228,8 +230,8 @@ class HPX { } template - static void partition_master(F const &f, int requested_num_partitions = 0, - int requested_partition_size = 0) { + static void partition_master(F const &, int requested_num_partitions = 0, + int = 0) { if (requested_num_partitions > 1) { Kokkos::abort( "Kokkos::Experimental::HPX::partition_master: can't partition an " @@ -297,6 +299,15 @@ class HPX { }; } // namespace Experimental +namespace Profiling { +namespace Experimental { +template <> +struct DeviceTypeTraits { + constexpr static DeviceType id = DeviceType::HPX; +}; +} // namespace Experimental +} // namespace Profiling + namespace Impl { template inline void dispatch_execute_task(Closure *closure) { @@ -462,7 +473,7 @@ struct HPXTeamMember { template KOKKOS_INLINE_FUNCTION typename std::enable_if::value>::type - team_reduce(const ReducerType &reducer) const {} + team_reduce(const ReducerType &) const {} template KOKKOS_INLINE_FUNCTION Type @@ -590,6 +601,11 @@ class TeamPolicyInternal template friend class TeamPolicyInternal; + const typename traits::execution_space &space() const { + static typename traits::execution_space m_space; + return m_space; + } + template TeamPolicyInternal(const TeamPolicyInternal &p) { @@ -612,8 +628,7 @@ class TeamPolicyInternal } TeamPolicyInternal(const typename traits::execution_space &, - int league_size_request, - const Kokkos::AUTO_t &team_size_request, + int league_size_request, const Kokkos::AUTO_t &, int /* vector_length_request */ = 1) : m_team_scratch_size{0, 0}, m_thread_scratch_size{0, 0}, @@ -629,8 +644,7 @@ class TeamPolicyInternal init(league_size_request, team_size_request); } - TeamPolicyInternal(int league_size_request, - const Kokkos::AUTO_t &team_size_request, + TeamPolicyInternal(int league_size_request, const Kokkos::AUTO_t &, int /* vector_length_request */ = 1) : m_team_scratch_size{0, 0}, m_thread_scratch_size{0, 0}, @@ -1169,7 +1183,7 @@ class ParallelReduce, ReducerType, const ViewType &arg_view, typename std::enable_if::value && !Kokkos::is_reducer_type::value, - void *>::type = NULL) + void *>::type = nullptr) : m_functor(arg_functor), m_policy(arg_policy), m_reducer(InvalidType()), @@ -1359,7 +1373,7 @@ class ParallelReduce, ReducerType, const ViewType &arg_view, typename std::enable_if::value && !Kokkos::is_reducer_type::value, - void *>::type = NULL) + void *>::type = nullptr) : m_functor(arg_functor), m_mdr_policy(arg_policy), m_policy(Policy(0, m_mdr_policy.m_num_tiles).set_chunk_size(1)), @@ -1990,7 +2004,7 @@ class ParallelReduce, const ViewType &arg_result, typename std::enable_if::value && !Kokkos::is_reducer_type::value, - void *>::type = NULL) + void *>::type = nullptr) : m_functor(arg_functor), m_league(arg_policy.league_size()), m_policy(arg_policy), @@ -2241,28 +2255,28 @@ KOKKOS_INLINE_FUNCTION void parallel_scan( template KOKKOS_INLINE_FUNCTION void single( - const Impl::VectorSingleStruct &single_struct, + const Impl::VectorSingleStruct &, const FunctorType &lambda) { lambda(); } template KOKKOS_INLINE_FUNCTION void single( - const Impl::ThreadSingleStruct &single_struct, + const Impl::ThreadSingleStruct &, const FunctorType &lambda) { lambda(); } template KOKKOS_INLINE_FUNCTION void single( - const Impl::VectorSingleStruct &single_struct, + const Impl::VectorSingleStruct &, const FunctorType &lambda, ValueType &val) { lambda(val); } template KOKKOS_INLINE_FUNCTION void single( - const Impl::ThreadSingleStruct &single_struct, + const Impl::ThreadSingleStruct &, const FunctorType &lambda, ValueType &val) { lambda(val); } diff --git a/core/src/Kokkos_HostSpace.hpp b/core/src/Kokkos_HostSpace.hpp index 974ca1e5ef6..5bc50c7ff08 100644 --- a/core/src/Kokkos_HostSpace.hpp +++ b/core/src/Kokkos_HostSpace.hpp @@ -74,10 +74,10 @@ namespace Impl { /// This function initializes the locks to zero (unset). void init_lock_array_host_space(); -/// \brief Aquire a lock for the address +/// \brief Acquire a lock for the address /// -/// This function tries to aquire the lock for the hash value derived -/// from the provided ptr. If the lock is successfully aquired the +/// This function tries to acquire the lock for the hash value derived +/// from the provided ptr. If the lock is successfully acquired the /// function returns true. Otherwise it returns false. bool lock_address_host_space(void* ptr); @@ -85,7 +85,7 @@ bool lock_address_host_space(void* ptr); /// /// This function releases the lock for the hash value derived /// from the provided ptr. This function should only be called -/// after previously successfully aquiring a lock with +/// after previously successfully acquiring a lock with /// lock_address. void unlock_address_host_space(void* ptr); @@ -118,21 +118,17 @@ class HostSpace { typedef Kokkos::Threads execution_space; #elif defined(KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_HPX) typedef Kokkos::Experimental::HPX execution_space; -//#elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_QTHREADS ) -// typedef Kokkos::Qthreads execution_space; #elif defined(KOKKOS_ENABLE_OPENMP) typedef Kokkos::OpenMP execution_space; #elif defined(KOKKOS_ENABLE_THREADS) typedef Kokkos::Threads execution_space; -//#elif defined( KOKKOS_ENABLE_QTHREADS ) -// typedef Kokkos::Qthreads execution_space; #elif defined(KOKKOS_ENABLE_HPX) typedef Kokkos::Experimental::HPX execution_space; #elif defined(KOKKOS_ENABLE_SERIAL) typedef Kokkos::Serial execution_space; #else #error \ - "At least one of the following host execution spaces must be defined: Kokkos::OpenMP, Kokkos::Threads, Kokkos::Qthreads, or Kokkos::Serial. You might be seeing this message if you disabled the Kokkos::Serial device explicitly using the Kokkos_ENABLE_Serial:BOOL=OFF CMake option, but did not enable any of the other host execution space devices." + "At least one of the following host execution spaces must be defined: Kokkos::OpenMP, Kokkos::Threads, or Kokkos::Serial. You might be seeing this message if you disabled the Kokkos::Serial device explicitly using the Kokkos_ENABLE_Serial:BOOL=OFF CMake option, but did not enable any of the other host execution space devices." #endif //! This memory space preferred device_type @@ -248,7 +244,12 @@ class SharedAllocationRecord const Kokkos::HostSpace m_space; protected: - ~SharedAllocationRecord(); + ~SharedAllocationRecord() +#if defined( \ + KOKKOS_IMPL_INTEL_WORKAROUND_NOEXCEPT_SPECIFICATION_VIRTUAL_FUNCTION) + noexcept +#endif + ; SharedAllocationRecord() = default; SharedAllocationRecord( @@ -267,6 +268,9 @@ class SharedAllocationRecord #if defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST) return new SharedAllocationRecord(arg_space, arg_label, arg_alloc_size); #else + (void)arg_space; + (void)arg_label; + (void)arg_alloc_size; return (SharedAllocationRecord*)0; #endif } diff --git a/core/src/Kokkos_Macros.hpp b/core/src/Kokkos_Macros.hpp index 5649d12e97f..00a07bdcf9d 100644 --- a/core/src/Kokkos_Macros.hpp +++ b/core/src/Kokkos_Macros.hpp @@ -50,7 +50,6 @@ * * KOKKOS_ENABLE_CUDA Kokkos::Cuda execution and memory spaces * KOKKOS_ENABLE_THREADS Kokkos::Threads execution space - * KOKKOS_ENABLE_QTHREADS Kokkos::Qthreads execution space * KOKKOS_ENABLE_HPX Kokkos::Experimental::HPX execution space * KOKKOS_ENABLE_OPENMP Kokkos::OpenMP execution space * KOKKOS_ENABLE_OPENMPTARGET Kokkos::Experimental::OpenMPTarget @@ -80,6 +79,7 @@ * KOKKOS_COMPILER_APPLECC * KOKKOS_COMPILER_CLANG * KOKKOS_COMPILER_PGI + * KOKKOS_COMPILER_MSVC * * Macros for which compiler extension to use for atomics on intrinsice types * @@ -99,17 +99,17 @@ //---------------------------------------------------------------------------- -#if defined(KOKKOS_ENABLE_SERIAL) || defined(KOKKOS_ENABLE_THREADS) || \ - defined(KOKKOS_ENABLE_OPENMP) || defined(KOKKOS_ENABLE_QTHREADS) || \ - defined(KOKKOS_ENABLE_HPX) || defined(KOKKOS_ENABLE_ROCM) || \ - defined(KOKKOS_ENABLE_OPENMPTARGET) +#if defined(KOKKOS_ENABLE_SERIAL) || defined(KOKKOS_ENABLE_THREADS) || \ + defined(KOKKOS_ENABLE_OPENMP) || defined(KOKKOS_ENABLE_HPX) || \ + defined(KOKKOS_ENABLE_ROCM) || defined(KOKKOS_ENABLE_OPENMPTARGET) || \ + defined(KOKKOS_ENABLE_HIP) #define KOKKOS_INTERNAL_ENABLE_NON_CUDA_BACKEND #endif -#if !defined(KOKKOS_ENABLE_THREADS) && !defined(KOKKOS_ENABLE_CUDA) && \ - !defined(KOKKOS_ENABLE_OPENMP) && !defined(KOKKOS_ENABLE_QTHREADS) && \ - !defined(KOKKOS_ENABLE_HPX) && !defined(KOKKOS_ENABLE_ROCM) && \ - !defined(KOKKOS_ENABLE_OPENMPTARGET) +#if !defined(KOKKOS_ENABLE_THREADS) && !defined(KOKKOS_ENABLE_CUDA) && \ + !defined(KOKKOS_ENABLE_OPENMP) && !defined(KOKKOS_ENABLE_HPX) && \ + !defined(KOKKOS_ENABLE_ROCM) && !defined(KOKKOS_ENABLE_OPENMPTARGET) && \ + !defined(KOKKOS_ENABLE_HIP) #define KOKKOS_INTERNAL_NOT_PARALLEL #endif @@ -131,37 +131,17 @@ #error "#include did not define CUDA_VERSION." #endif -#if (CUDA_VERSION < 7000) -// CUDA supports C++11 in device code starting with version 7.0. -// This includes auto type and device code internal lambdas. -#error "Cuda version 7.0 or greater required." -#endif - #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ < 300) // Compiling with CUDA compiler for device code. #error "Cuda device capability >= 3.0 is required." #endif #ifdef KOKKOS_ENABLE_CUDA_LAMBDA -#if (CUDA_VERSION < 7050) -// CUDA supports C++11 lambdas generated in host code to be given -// to the device starting with version 7.5. But the release candidate (7.5.6) -// still identifies as 7.0. -#error "Cuda version 7.5 or greater required for host-to-device Lambda support." -#endif - -#if (CUDA_VERSION < 8000) && defined(__NVCC__) -#define KOKKOS_LAMBDA [=] __device__ -#if defined(KOKKOS_INTERNAL_ENABLE_NON_CUDA_BACKEND) -#undef KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA -#endif -#else #define KOKKOS_LAMBDA [=] __host__ __device__ #if defined(KOKKOS_ENABLE_CXX17) || defined(KOKKOS_ENABLE_CXX20) #define KOKKOS_CLASS_LAMBDA [ =, *this ] __host__ __device__ #endif -#endif #if defined(__NVCC__) #define KOKKOS_IMPL_NEED_FUNCTOR_WRAPPER @@ -170,12 +150,6 @@ #undef KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA #endif // !defined(KOKKOS_ENABLE_CUDA_LAMBDA) -#if (9000 <= CUDA_VERSION) && (CUDA_VERSION < 10000) -// CUDA 9 introduced an incorrect warning, -// see https://github.com/kokkos/kokkos/issues/1470 -#define KOKKOS_CUDA_9_DEFAULTED_BUG_WORKAROUND -#endif - #if (10000 > CUDA_VERSION) #define KOKKOS_ENABLE_PRE_CUDA_10_DEPRECATION_API #endif @@ -194,6 +168,17 @@ #endif // #if defined( KOKKOS_ENABLE_CUDA ) && defined( __CUDACC__ ) +#if defined(KOKKOS_ENABLE_HIP) + +#define KOKKOS_IMPL_HIP_CLANG_WORKAROUND + +#define HIP_ENABLE_PRINTF +#include +#include + +#define KOKKOS_LAMBDA [=] __host__ __device__ +#endif // #if defined(KOKKOS_ENABLE_HIP) + //---------------------------------------------------------------------------- // Mapping compiler built-ins to KOKKOS_COMPILER_*** macros @@ -267,6 +252,10 @@ #endif #endif +#if defined(_MSC_VER) && !defined(KOKKOS_COMPILER_INTEL) +#define KOKKOS_COMPILER_MSVC _MSC_VER +#endif + //#endif // #if !defined( __CUDA_ARCH__ ) //---------------------------------------------------------------------------- // Language info: C++, CUDA, OPENMP @@ -283,7 +272,24 @@ #else #define KOKKOS_INLINE_FUNCTION_DELETED __device__ __host__ inline #endif -#endif // #if defined( __CUDA_ARCH__ ) +#if (CUDA_VERSION < 10000) +#define KOKKOS_DEFAULTED_FUNCTION __host__ __device__ inline +#else +#define KOKKOS_DEFAULTED_FUNCTION inline +#endif +#endif + +#if defined(KOKKOS_ENABLE_HIP) + +#define KOKKOS_FORCEINLINE_FUNCTION __device__ __host__ __forceinline__ +#define KOKKOS_INLINE_FUNCTION __device__ __host__ inline +#define KOKKOS_DEFAULTED_FUNCTION __device__ __host__ inline +#define KOKKOS_INLINE_FUNCTION_DELETED __device__ __host__ inline +#define KOKKOS_FUNCTION __device__ __host__ +#if defined(KOKKOS_ENABLE_CXX17) || defined(KOKKOS_ENABLE_CXX20) +#define KOKKOS_CLASS_LAMBDA [ =, *this ] __host__ __device__ +#endif +#endif // #if defined( KOKKOS_ENABLE_HIP ) #if defined(KOKKOS_ENABLE_ROCM) && defined(__HCC__) @@ -291,6 +297,7 @@ #define KOKKOS_INLINE_FUNCTION __attribute__((amp, cpu)) inline #define KOKKOS_FUNCTION __attribute__((amp, cpu)) #define KOKKOS_LAMBDA [=] __attribute__((amp, cpu)) +#define KOKKOS_DEFAULTED_FUNCTION __attribute__((amp, cpu)) inline #endif #if defined(_OPENMP) @@ -355,6 +362,10 @@ #endif #endif +#if (1800 > KOKKOS_COMPILER_INTEL) +#define KOKKOS_IMPL_INTEL_WORKAROUND_NOEXCEPT_SPECIFICATION_VIRTUAL_FUNCTION +#endif + #if defined(__MIC__) // Compiling for Xeon Phi #endif @@ -470,6 +481,10 @@ #if !defined(KOKKOS_INLINE_FUNCTION_DELETED) #define KOKKOS_INLINE_FUNCTION_DELETED inline #endif + +#if !defined(KOKKOS_DEFAULTED_FUNCTION) +#define KOKKOS_DEFAULTED_FUNCTION inline +#endif //---------------------------------------------------------------------------- // Define empty macro for restrict if necessary: @@ -497,28 +512,30 @@ // There is zero or one default execution space specified. #if 1 < ((defined(KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_CUDA) ? 1 : 0) + \ + (defined(KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_HIP) ? 1 : 0) + \ (defined(KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_ROCM) ? 1 : 0) + \ (defined(KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMPTARGET) ? 1 : 0) + \ (defined(KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP) ? 1 : 0) + \ (defined(KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS) ? 1 : 0) + \ - (defined(KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_QTHREADS) ? 1 : 0) + \ (defined(KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_HPX) ? 1 : 0) + \ (defined(KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SERIAL) ? 1 : 0)) #error "More than one KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_* specified." #endif // If default is not specified then chose from enabled execution spaces. -// Priority: CUDA, OPENMP, THREADS, QTHREADS, HPX, SERIAL +// Priority: CUDA, HIP, ROCM, OPENMPTARGET, OPENMP, THREADS, HPX, SERIAL #if defined(KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_CUDA) +#elif defined(KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_HIP) #elif defined(KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_ROCM) #elif defined(KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMPTARGET) #elif defined(KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP) #elif defined(KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS) -//#elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_QTHREADS ) #elif defined(KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_HPX) #elif defined(KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SERIAL) #elif defined(KOKKOS_ENABLE_CUDA) #define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_CUDA +#elif defined(KOKKOS_ENABLE_HIP) +#define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_HIP #elif defined(KOKKOS_ENABLE_ROCM) #define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_ROCM #elif defined(KOKKOS_ENABLE_OPENMPTARGET) @@ -527,8 +544,6 @@ #define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP #elif defined(KOKKOS_ENABLE_THREADS) #define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS -//#elif defined( KOKKOS_ENABLE_QTHREADS ) -// #define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_QTHREADS #elif defined(KOKKOS_ENABLE_HPX) #define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_HPX #else @@ -543,6 +558,10 @@ #elif defined(__HCC__) && defined(__HCC_ACCELERATOR__) && \ defined(KOKKOS_ENABLE_ROCM) #define KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_ROCM_GPU +#elif defined(__HIPCC__) && \ + (defined(__HCC_ACCELERATOR__) || defined(__CUDA_ARCH__)) && \ + defined(KOKKOS_ENABLE_HIP) +#define KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HIP_GPU #else #define KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST #endif @@ -557,27 +576,25 @@ #endif //---------------------------------------------------------------------------- -// If compiling with CUDA then must be using CUDA 8 or better -// and use relocateable device code to enable the task policy. -// nvcc relocatable device code option: --relocatable-device-code=true +// If compiling with CUDA, we must use relocateable device code +// to enable the task policy. -#if (defined(KOKKOS_ENABLE_CUDA)) -#if (8000 <= CUDA_VERSION) && \ - defined(KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE) +#if defined(KOKKOS_ENABLE_CUDA) +#if defined(KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE) #define KOKKOS_ENABLE_TASKDAG #endif #else +#ifndef KOKKOS_ENABLE_HIP #define KOKKOS_ENABLE_TASKDAG #endif +#endif #if defined(KOKKOS_ENABLE_CUDA) -#if (9000 <= CUDA_VERSION) #define KOKKOS_IMPL_CUDA_VERSION_9_WORKAROUND #if (__CUDA_ARCH__) #define KOKKOS_IMPL_CUDA_SYNCWARP_NEEDS_MASK #endif #endif -#endif #define KOKKOS_INVALID_INDEX (~std::size_t(0)) @@ -622,4 +639,17 @@ #define KOKKOS_IMPL_ENABLE_CXXABI #endif +// WORKAROUND for AMD aomp which apparently defines CUDA_ARCH when building for +// AMD GPUs with OpenMP Target ??? +#if defined(__CUDA_ARCH__) && !defined(__CUDACC__) && \ + !defined(KOKKOS_ENABLE_HIP) && !defined(KOKKOS_ENABLE_CUDA) +#undef __CUDA_ARCH__ +#endif + +#if defined(KOKKOS_COMPILER_MSVC) +#define KOKKOS_THREAD_LOCAL __declspec(thread) +#else +#define KOKKOS_THREAD_LOCAL __thread +#endif + #endif // #ifndef KOKKOS_MACROS_HPP diff --git a/core/src/Kokkos_MemoryPool.hpp b/core/src/Kokkos_MemoryPool.hpp index 5228f366120..da075447018 100644 --- a/core/src/Kokkos_MemoryPool.hpp +++ b/core/src/Kokkos_MemoryPool.hpp @@ -257,61 +257,14 @@ class MemoryPool { //-------------------------------------------------------------------------- -#ifdef KOKKOS_CUDA_9_DEFAULTED_BUG_WORKAROUND - KOKKOS_INLINE_FUNCTION MemoryPool(MemoryPool &&rhs) - : m_tracker(std::move(rhs.m_tracker)), - m_sb_state_array(std::move(rhs.m_sb_state_array)), - m_sb_state_size(std::move(rhs.m_sb_state_size)), - m_sb_size_lg2(std::move(rhs.m_sb_size_lg2)), - m_max_block_size_lg2(std::move(rhs.m_max_block_size_lg2)), - m_min_block_size_lg2(std::move(rhs.m_min_block_size_lg2)), - m_sb_count(std::move(rhs.m_sb_count)), - m_hint_offset(std::move(rhs.m_hint_offset)), - m_data_offset(std::move(rhs.m_data_offset)) {} - KOKKOS_INLINE_FUNCTION MemoryPool(const MemoryPool &rhs) - : m_tracker(rhs.m_tracker), - m_sb_state_array(rhs.m_sb_state_array), - m_sb_state_size(rhs.m_sb_state_size), - m_sb_size_lg2(rhs.m_sb_size_lg2), - m_max_block_size_lg2(rhs.m_max_block_size_lg2), - m_min_block_size_lg2(rhs.m_min_block_size_lg2), - m_sb_count(rhs.m_sb_count), - m_hint_offset(rhs.m_hint_offset), - m_data_offset(rhs.m_data_offset) {} - KOKKOS_INLINE_FUNCTION MemoryPool &operator=(MemoryPool &&rhs) { - m_tracker = std::move(rhs.m_tracker); - m_sb_state_array = std::move(rhs.m_sb_state_array); - m_sb_state_size = std::move(rhs.m_sb_state_size); - m_sb_size_lg2 = std::move(rhs.m_sb_size_lg2); - m_max_block_size_lg2 = std::move(rhs.m_max_block_size_lg2); - m_min_block_size_lg2 = std::move(rhs.m_min_block_size_lg2); - m_sb_count = std::move(rhs.m_sb_count); - m_hint_offset = std::move(rhs.m_hint_offset); - m_data_offset = std::move(rhs.m_data_offset); - return *this; - } - KOKKOS_INLINE_FUNCTION MemoryPool &operator=(const MemoryPool &rhs) { - m_tracker = rhs.m_tracker; - m_sb_state_array = rhs.m_sb_state_array; - m_sb_state_size = rhs.m_sb_state_size; - m_sb_size_lg2 = rhs.m_sb_size_lg2; - m_max_block_size_lg2 = rhs.m_max_block_size_lg2; - m_min_block_size_lg2 = rhs.m_min_block_size_lg2; - m_sb_count = rhs.m_sb_count; - m_hint_offset = rhs.m_hint_offset; - m_data_offset = rhs.m_data_offset; - return *this; - } -#else - KOKKOS_INLINE_FUNCTION MemoryPool(MemoryPool &&) = default; - KOKKOS_INLINE_FUNCTION MemoryPool(const MemoryPool &) = default; - KOKKOS_INLINE_FUNCTION MemoryPool &operator=(MemoryPool &&) = default; - KOKKOS_INLINE_FUNCTION MemoryPool &operator=(const MemoryPool &) = default; -#endif + KOKKOS_DEFAULTED_FUNCTION MemoryPool(MemoryPool &&) = default; + KOKKOS_DEFAULTED_FUNCTION MemoryPool(const MemoryPool &) = default; + KOKKOS_DEFAULTED_FUNCTION MemoryPool &operator=(MemoryPool &&) = default; + KOKKOS_DEFAULTED_FUNCTION MemoryPool &operator=(const MemoryPool &) = default; KOKKOS_INLINE_FUNCTION MemoryPool() : m_tracker(), - m_sb_state_array(0), + m_sb_state_array(nullptr), m_sb_state_size(0), m_sb_size_lg2(0), m_max_block_size_lg2(0), @@ -339,7 +292,7 @@ class MemoryPool { const size_t min_total_alloc_size, size_t min_block_alloc_size = 0, size_t max_block_alloc_size = 0, size_t min_superblock_size = 0) : m_tracker(), - m_sb_state_array(0), + m_sb_state_array(nullptr), m_sb_state_size(0), m_sb_size_lg2(0), m_max_block_size_lg2(0), @@ -547,9 +500,9 @@ class MemoryPool { "allocation size"); } - if (0 == alloc_size) return (void *)0; + if (0 == alloc_size) return nullptr; - void *p = 0; + void *p = nullptr; const uint32_t block_size_lg2 = get_block_size_lg2(alloc_size); @@ -590,7 +543,7 @@ class MemoryPool { int32_t sb_id = -1; - volatile uint32_t *sb_state_array = 0; + volatile uint32_t *sb_state_array = nullptr; while (attempt_limit) { int32_t hint_sb_id = -1; @@ -784,7 +737,7 @@ class MemoryPool { */ KOKKOS_INLINE_FUNCTION void deallocate(void *p, size_t /* alloc_size */) const noexcept { - if (0 == p) return; + if (nullptr == p) return; // Determine which superblock and block const ptrdiff_t d = diff --git a/core/src/Kokkos_NumericTraits.hpp b/core/src/Kokkos_NumericTraits.hpp index 666bb332662..88040bcbaa6 100644 --- a/core/src/Kokkos_NumericTraits.hpp +++ b/core/src/Kokkos_NumericTraits.hpp @@ -160,8 +160,8 @@ struct reduction_identity { KOKKOS_FORCEINLINE_FUNCTION constexpr static long prod() { return static_cast(1); } - KOKKOS_FORCEINLINE_FUNCTION constexpr static long max() { return LLONG_MIN; } - KOKKOS_FORCEINLINE_FUNCTION constexpr static long min() { return LLONG_MAX; } + KOKKOS_FORCEINLINE_FUNCTION constexpr static long max() { return LONG_MIN; } + KOKKOS_FORCEINLINE_FUNCTION constexpr static long min() { return LONG_MAX; } KOKKOS_FORCEINLINE_FUNCTION constexpr static long bor() { return static_cast(0x0); } @@ -368,7 +368,8 @@ struct reduction_identity { KOKKOS_FORCEINLINE_FUNCTION constexpr static double min() { return DBL_MAX; } }; -#if !defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA) +#if !defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA) && \ + !defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HIP_GPU) template <> struct reduction_identity { KOKKOS_FORCEINLINE_FUNCTION constexpr static long double sum() { diff --git a/core/src/Kokkos_OpenMP.hpp b/core/src/Kokkos_OpenMP.hpp index 3955c061562..d9b9077c6df 100644 --- a/core/src/Kokkos_OpenMP.hpp +++ b/core/src/Kokkos_OpenMP.hpp @@ -94,9 +94,6 @@ class OpenMP { using size_type = memory_space::size_type; using scratch_memory_space = ScratchMemorySpace; - /// \brief Get a handle to the default execution space instance - inline OpenMP() noexcept; - /// \brief Print configuration information to the given output stream. static void print_configuration(std::ostream&, const bool verbose = false); @@ -225,8 +222,17 @@ class OpenMP { #endif static constexpr const char* name() noexcept { return "OpenMP"; } + uint32_t impl_instance_id() const noexcept { return 0; } }; +namespace Profiling { +namespace Experimental { +template <> +struct DeviceTypeTraits { + static constexpr DeviceType id = DeviceType::OpenMP; +}; +} // namespace Experimental +} // namespace Profiling } // namespace Kokkos /*--------------------------------------------------------------------------*/ diff --git a/core/src/Kokkos_OpenMPTarget.hpp b/core/src/Kokkos_OpenMPTarget.hpp index 9706751f9cf..d113f244229 100644 --- a/core/src/Kokkos_OpenMPTarget.hpp +++ b/core/src/Kokkos_OpenMPTarget.hpp @@ -65,6 +65,10 @@ namespace Kokkos { namespace Experimental { +namespace Impl { +class OpenMPTargetInternal; +} + /// \class OpenMPTarget /// \brief Kokkos device for multicore processors in the host memory space. class OpenMPTarget { @@ -84,76 +88,47 @@ class OpenMPTarget { typedef ScratchMemorySpace scratch_memory_space; - //@} - //------------------------------------ - //! \name Functions that all Kokkos execution spaces must implement. - //@{ - inline static bool in_parallel() { return omp_in_parallel(); } - /** \brief Set the device in a "sleep" state. A noop for OpenMPTarget. */ - static bool sleep(); - - /** \brief Wake the device from the 'sleep' state. A noop for OpenMPTarget. */ - static bool wake(); - - /** \brief Wait until all dispatched functors complete. A noop for - * OpenMPTarget. */ - static void fence() {} - - /// \brief Print configuration information to the given output stream. - static void print_configuration(std::ostream&, const bool detail = false); - - /// \brief Free any resources being consumed by the device. - static void finalize(); - - /** \brief Initialize the device. - * - * 1) If the hardware locality library is enabled and OpenMPTarget has not - * already bound threads then bind OpenMPTarget threads to maximize - * core utilization and group for memory hierarchy locality. - * - * 2) Allocate a HostThread for each OpenMPTarget thread to hold its - * topology and fan in/out data. - */ - static void initialize(unsigned thread_count = 0, unsigned use_numa_count = 0, - unsigned use_cores_per_numa = 0); - - static int is_initialized(); + static void fence(); /** \brief Return the maximum amount of concurrency. */ static int concurrency(); - //@} - //------------------------------------ - /** \brief This execution space has a topological thread pool which can be - * queried. - * - * All threads within a pool have a common memory space for which they are - * cache coherent. depth = 0 gives the number of threads in the whole pool. - * depth = 1 gives the number of threads in a NUMA region, typically - * sharing L3 cache. depth = 2 gives the number of threads at the finest - * granularity, typically sharing L1 cache. - */ - inline static int thread_pool_size(int depth = 0); - - /** \brief The rank of the executing thread in this thread pool */ - KOKKOS_INLINE_FUNCTION static int thread_pool_rank(); + //! Print configuration information to the given output stream. + void print_configuration(std::ostream&, const bool detail = false); - //------------------------------------ + static const char* name(); - inline static unsigned max_hardware_threads() { return thread_pool_size(0); } + //! Free any resources being consumed by the device. + void impl_finalize(); - KOKKOS_INLINE_FUNCTION static unsigned hardware_thread_id() { - return thread_pool_rank(); + //! Has been initialized + static int impl_is_initialized(); + + //! Initialize, telling the CUDA run-time library which device to use. + void impl_initialize(); + + inline Impl::OpenMPTargetInternal* impl_internal_space_instance() const { + return m_space_instance; } - static const char* name(); + OpenMPTarget(); + uint32_t impl_instance_id() const noexcept { return 0; } private: - static bool m_is_initialized; + Impl::OpenMPTargetInternal* m_space_instance; +}; +} // namespace Experimental + +namespace Profiling { +namespace Experimental { +template <> +struct DeviceTypeTraits { + static constexpr DeviceType id = DeviceType::OpenMPTarget; }; } // namespace Experimental +} // namespace Profiling } // namespace Kokkos /*--------------------------------------------------------------------------*/ @@ -179,6 +154,7 @@ struct VerifyExecutionCanAccessMemorySpace< #include #include +#include #include /*--------------------------------------------------------------------------*/ diff --git a/core/src/Kokkos_OpenMPTargetSpace.hpp b/core/src/Kokkos_OpenMPTargetSpace.hpp index 96c46754b45..9d24a342e7b 100644 --- a/core/src/Kokkos_OpenMPTargetSpace.hpp +++ b/core/src/Kokkos_OpenMPTargetSpace.hpp @@ -69,10 +69,10 @@ namespace Impl { /// This function initializes the locks to zero (unset). // void init_lock_array_host_space(); -/// \brief Aquire a lock for the address +/// \brief Acquire a lock for the address /// -/// This function tries to aquire the lock for the hash value derived -/// from the provided ptr. If the lock is successfully aquired the +/// This function tries to acquire the lock for the hash value derived +/// from the provided ptr. If the lock is successfully acquired the /// function returns true. Otherwise it returns false. // bool lock_address_host_space(void* ptr); @@ -80,7 +80,7 @@ namespace Impl { /// /// This function releases the lock for the hash value derived /// from the provided ptr. This function should only be called -/// after previously successfully aquiring a lock with +/// after previously successfully acquiring a lock with /// lock_address. // void unlock_address_host_space(void* ptr); @@ -128,6 +128,8 @@ class OpenMPTargetSpace { /**\brief Deallocate untracked memory in the space */ void deallocate(void* const arg_alloc_ptr, const size_t arg_alloc_size) const; + static constexpr const char* name() { return "OpenMPTargetSpace"; } + private: friend class Kokkos::Impl::SharedAllocationRecord< Kokkos::Experimental::OpenMPTargetSpace, void>; @@ -174,7 +176,13 @@ class SharedAllocationRecord KOKKOS_INLINE_FUNCTION static SharedAllocationRecord* allocate( const Kokkos::Experimental::OpenMPTargetSpace& arg_space, - const std::string& arg_label, const size_t arg_alloc_size); + const std::string& arg_label, const size_t arg_alloc_size) { +#if defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST) + return new SharedAllocationRecord(arg_space, arg_label, arg_alloc_size); +#else + return nullptr; +#endif + } /**\brief Allocate tracked memory in the space */ static void* allocate_tracked( diff --git a/core/src/Kokkos_Pair.hpp b/core/src/Kokkos_Pair.hpp index ee9797b4b34..23bb755e334 100644 --- a/core/src/Kokkos_Pair.hpp +++ b/core/src/Kokkos_Pair.hpp @@ -78,14 +78,7 @@ struct pair { /// This calls the default constructors of T1 and T2. It won't /// compile if those default constructors are not defined and /// public. - KOKKOS_FORCEINLINE_FUNCTION constexpr -#ifdef KOKKOS_CUDA_9_DEFAULTED_BUG_WORKAROUND - pair() - : first(), second() { - } -#else - pair() = default; -#endif + KOKKOS_DEFAULTED_FUNCTION constexpr pair() = default; /// \brief Constructor that takes both elements of the pair. /// @@ -439,14 +432,7 @@ struct pair { first_type first; enum { second = 0 }; - KOKKOS_FORCEINLINE_FUNCTION constexpr -#ifdef KOKKOS_CUDA_9_DEFAULTED_BUG_WORKAROUND - pair() - : first() { - } -#else - pair() = default; -#endif + KOKKOS_DEFAULTED_FUNCTION constexpr pair() = default; KOKKOS_FORCEINLINE_FUNCTION constexpr pair(const first_type& f) : first(f) {} diff --git a/core/src/Kokkos_Parallel.hpp b/core/src/Kokkos_Parallel.hpp index 7277ce287a1..775ab9203c5 100644 --- a/core/src/Kokkos_Parallel.hpp +++ b/core/src/Kokkos_Parallel.hpp @@ -114,7 +114,7 @@ struct FunctorPolicyExecutionSpace< Functor, Policy, typename enable_if_type::type, EnablePolicy> { - typedef typename Functor::device_type execution_space; + typedef typename Functor::device_type::execution_space execution_space; }; template @@ -158,16 +158,21 @@ template inline void parallel_for( const ExecPolicy& policy, const FunctorType& functor, const std::string& str = "", - typename Impl::enable_if< - Kokkos::Impl::is_execution_policy::value>::type* = 0) { + typename std::enable_if< + Kokkos::Impl::is_execution_policy::value>::type* = + nullptr) { #if defined(KOKKOS_ENABLE_PROFILING) uint64_t kpID = 0; if (Kokkos::Profiling::profileLibraryLoaded()) { Kokkos::Impl::ParallelConstructName name(str); - Kokkos::Profiling::beginParallelFor(name.get(), 0, &kpID); + Kokkos::Profiling::beginParallelFor( + name.get(), Kokkos::Profiling::Experimental::device_id(policy.space()), + &kpID); } +#else + (void)str; #endif Kokkos::Impl::shared_allocation_tracking_disable(); @@ -194,8 +199,12 @@ inline void parallel_for(const size_t work_count, const FunctorType& functor, uint64_t kpID = 0; if (Kokkos::Profiling::profileLibraryLoaded()) { Kokkos::Impl::ParallelConstructName name(str); - Kokkos::Profiling::beginParallelFor(name.get(), 0, &kpID); + Kokkos::Profiling::beginParallelFor( + name.get(), + Kokkos::Profiling::Experimental::device_id(policy().space()), &kpID); } +#else + (void)str; #endif Kokkos::Impl::shared_allocation_tracking_disable(); @@ -403,16 +412,21 @@ template inline void parallel_scan( const ExecutionPolicy& policy, const FunctorType& functor, const std::string& str = "", - typename Impl::enable_if< - Kokkos::Impl::is_execution_policy::value>::type* = 0) { + typename std::enable_if< + Kokkos::Impl::is_execution_policy::value>::type* = + nullptr) { #if defined(KOKKOS_ENABLE_PROFILING) uint64_t kpID = 0; if (Kokkos::Profiling::profileLibraryLoaded()) { Kokkos::Impl::ParallelConstructName name(str); - Kokkos::Profiling::beginParallelScan(name.get(), 0, &kpID); + Kokkos::Profiling::beginParallelScan( + name.get(), Kokkos::Profiling::Experimental::device_id(policy.space()), + &kpID); } +#else + (void)str; #endif Kokkos::Impl::shared_allocation_tracking_disable(); @@ -440,8 +454,12 @@ inline void parallel_scan(const size_t work_count, const FunctorType& functor, uint64_t kpID = 0; if (Kokkos::Profiling::profileLibraryLoaded()) { Kokkos::Impl::ParallelConstructName name(str); - Kokkos::Profiling::beginParallelScan(name.get(), 0, &kpID); + Kokkos::Profiling::beginParallelScan( + name.get(), + Kokkos::Profiling::Experimental::device_id(policy().space()), &kpID); } +#else + (void)str; #endif Kokkos::Impl::shared_allocation_tracking_disable(); @@ -479,16 +497,21 @@ template inline void parallel_scan( const ExecutionPolicy& policy, const FunctorType& functor, ReturnType& return_value, const std::string& str = "", - typename Impl::enable_if< - Kokkos::Impl::is_execution_policy::value>::type* = 0) { + typename std::enable_if< + Kokkos::Impl::is_execution_policy::value>::type* = + nullptr) { #if defined(KOKKOS_ENABLE_PROFILING) uint64_t kpID = 0; if (Kokkos::Profiling::profileLibraryLoaded()) { Kokkos::Impl::ParallelConstructName name(str); - Kokkos::Profiling::beginParallelScan(name.get(), 0, &kpID); + Kokkos::Profiling::beginParallelScan( + name.get(), Kokkos::Profiling::Experimental::device_id(policy.space()), + &kpID); } +#else + (void)str; #endif Kokkos::Impl::shared_allocation_tracking_disable(); @@ -503,7 +526,7 @@ inline void parallel_scan( Kokkos::Profiling::endParallelScan(kpID); } #endif - Kokkos::fence(); + policy.space().fence(); } template @@ -519,8 +542,12 @@ inline void parallel_scan(const size_t work_count, const FunctorType& functor, uint64_t kpID = 0; if (Kokkos::Profiling::profileLibraryLoaded()) { Kokkos::Impl::ParallelConstructName name(str); - Kokkos::Profiling::beginParallelScan(name.get(), 0, &kpID); + Kokkos::Profiling::beginParallelScan( + name.get(), + Kokkos::Profiling::Experimental::device_id(policy().space()), &kpID); } +#else + (void)str; #endif Kokkos::Impl::shared_allocation_tracking_disable(); @@ -535,7 +562,7 @@ inline void parallel_scan(const size_t work_count, const FunctorType& functor, Kokkos::Profiling::endParallelScan(kpID); } #endif - Kokkos::fence(); + execution_space().fence(); } template @@ -564,7 +591,10 @@ inline void parallel_scan(const std::string& str, const ExecutionPolicy& policy, namespace Kokkos { namespace Impl { -template +template ::value, + bool HasShmemSize = has_member_shmem_size::value> struct FunctorTeamShmemSize { KOKKOS_INLINE_FUNCTION static size_t value(const FunctorType&, int) { return 0; @@ -572,22 +602,27 @@ struct FunctorTeamShmemSize { }; template -struct FunctorTeamShmemSize< - FunctorType, - typename Impl::enable_if<0 < sizeof(&FunctorType::team_shmem_size)>::type> { +struct FunctorTeamShmemSize { static inline size_t value(const FunctorType& f, int team_size) { return f.team_shmem_size(team_size); } }; template -struct FunctorTeamShmemSize< - FunctorType, - typename Impl::enable_if<0 < sizeof(&FunctorType::shmem_size)>::type> { +struct FunctorTeamShmemSize { static inline size_t value(const FunctorType& f, int team_size) { return f.shmem_size(team_size); } }; +template +struct FunctorTeamShmemSize { + static inline size_t value(const FunctorType& /*f*/, int /*team_size*/) { + Kokkos::abort( + "Functor with both team_shmem_size and shmem_size defined is " + "not allowed"); + return 0; + } +}; } // namespace Impl } // namespace Kokkos diff --git a/core/src/Kokkos_Parallel_Reduce.hpp b/core/src/Kokkos_Parallel_Reduce.hpp index 1fa23f714f6..4ef2dbdf0d7 100644 --- a/core/src/Kokkos_Parallel_Reduce.hpp +++ b/core/src/Kokkos_Parallel_Reduce.hpp @@ -543,7 +543,6 @@ struct MaxLoc { KOKKOS_INLINE_FUNCTION void init(value_type& val) const { val.val = reduction_identity::max(); - ; val.loc = reduction_identity::min(); } @@ -622,7 +621,6 @@ struct MinMax { KOKKOS_INLINE_FUNCTION void init(value_type& val) const { val.max_val = reduction_identity::max(); - ; val.min_val = reduction_identity::min(); } @@ -711,7 +709,6 @@ struct MinMaxLoc { KOKKOS_INLINE_FUNCTION void init(value_type& val) const { val.max_val = reduction_identity::max(); - ; val.min_val = reduction_identity::min(); val.max_loc = reduction_identity::min(); val.min_loc = reduction_identity::min(); @@ -772,7 +769,7 @@ struct ParallelReduceReturnValue< template struct ParallelReduceReturnValue< - typename std::enable_if<(is_array::value || + typename std::enable_if<(std::is_array::value || std::is_pointer::value)>::type, ReturnType, FunctorType> { typedef Kokkos::View::type, @@ -788,7 +785,7 @@ struct ParallelReduceReturnValue< #ifdef KOKKOS_ENABLE_DEPRECATED_CODE return return_type(return_val, functor.value_count); #else - if (is_array::value) + if (std::is_array::value) return return_type(return_val); else return return_type(return_val, functor.value_count); @@ -865,6 +862,8 @@ struct ParallelReduceAdaptor { name(label); Kokkos::Profiling::beginParallelReduce(name.get(), 0, &kpID); } +#else + (void)label; #endif Kokkos::Impl::shared_allocation_tracking_disable(); @@ -915,22 +914,25 @@ struct ReducerHasTestReferenceFunction { static std::false_type test_func(...); enum { - value = std::is_same(0))>::value + value = std::is_same(nullptr))>::value }; }; -template ::value> +template ::value> struct ParallelReduceFence { - static void fence(const T&) { Kokkos::fence(); } + static void fence(const ExecutionSpace& execution_space, const T&) { + execution_space.fence(); + } }; -template -struct ParallelReduceFence, false> { - static void fence(const View){}; +template +struct ParallelReduceFence, false> { + static void fence(const ExecutionSpace&, const View){}; }; -template -struct ParallelReduceFence { - static void fence(const T& reducer) { - if (reducer.references_scalar()) Kokkos::fence(); +template +struct ParallelReduceFence { + static void fence(const ExecutionSpace& execution_space, const T& reducer) { + if (reducer.references_scalar()) execution_space.fence(); } }; } // namespace Impl @@ -976,22 +978,26 @@ template inline void parallel_reduce( const std::string& label, const PolicyType& policy, const FunctorType& functor, ReturnType& return_value, - typename Impl::enable_if< - Kokkos::Impl::is_execution_policy::value>::type* = 0) { + typename std::enable_if< + Kokkos::Impl::is_execution_policy::value>::type* = + nullptr) { Impl::ParallelReduceAdaptor::execute( label, policy, functor, return_value); - Impl::ParallelReduceFence::fence(return_value); + Impl::ParallelReduceFence::fence(policy.space(), return_value); } template inline void parallel_reduce( const PolicyType& policy, const FunctorType& functor, ReturnType& return_value, - typename Impl::enable_if< - Kokkos::Impl::is_execution_policy::value>::type* = 0) { + typename std::enable_if< + Kokkos::Impl::is_execution_policy::value>::type* = + nullptr) { Impl::ParallelReduceAdaptor::execute( "", policy, functor, return_value); - Impl::ParallelReduceFence::fence(return_value); + Impl::ParallelReduceFence::fence(policy.space(), return_value); } template @@ -1001,7 +1007,8 @@ inline void parallel_reduce(const size_t& policy, const FunctorType& functor, void, size_t, FunctorType>::policy_type policy_type; Impl::ParallelReduceAdaptor::execute( "", policy_type(0, policy), functor, return_value); - Impl::ParallelReduceFence::fence(return_value); + Impl::ParallelReduceFence:: + fence(typename policy_type::execution_space(), return_value); } template @@ -1012,7 +1019,8 @@ inline void parallel_reduce(const std::string& label, const size_t& policy, void, size_t, FunctorType>::policy_type policy_type; Impl::ParallelReduceAdaptor::execute( label, policy_type(0, policy), functor, return_value); - Impl::ParallelReduceFence::fence(return_value); + Impl::ParallelReduceFence:: + fence(typename policy_type::execution_space(), return_value); } // ReturnValue as View or Reducer: take by copy to allow for inline construction @@ -1021,24 +1029,28 @@ template inline void parallel_reduce( const std::string& label, const PolicyType& policy, const FunctorType& functor, const ReturnType& return_value, - typename Impl::enable_if< - Kokkos::Impl::is_execution_policy::value>::type* = 0) { + typename std::enable_if< + Kokkos::Impl::is_execution_policy::value>::type* = + nullptr) { ReturnType return_value_impl = return_value; Impl::ParallelReduceAdaptor::execute( label, policy, functor, return_value_impl); - Impl::ParallelReduceFence::fence(return_value); + Impl::ParallelReduceFence::fence(policy.space(), return_value); } template inline void parallel_reduce( const PolicyType& policy, const FunctorType& functor, const ReturnType& return_value, - typename Impl::enable_if< - Kokkos::Impl::is_execution_policy::value>::type* = 0) { + typename std::enable_if< + Kokkos::Impl::is_execution_policy::value>::type* = + nullptr) { ReturnType return_value_impl = return_value; Impl::ParallelReduceAdaptor::execute( "", policy, functor, return_value_impl); - Impl::ParallelReduceFence::fence(return_value); + Impl::ParallelReduceFence::fence(policy.space(), return_value); } template @@ -1049,7 +1061,8 @@ inline void parallel_reduce(const size_t& policy, const FunctorType& functor, ReturnType return_value_impl = return_value; Impl::ParallelReduceAdaptor::execute( "", policy_type(0, policy), functor, return_value_impl); - Impl::ParallelReduceFence::fence(return_value); + Impl::ParallelReduceFence:: + fence(typename policy_type::execution_space(), return_value); } template @@ -1061,7 +1074,8 @@ inline void parallel_reduce(const std::string& label, const size_t& policy, ReturnType return_value_impl = return_value; Impl::ParallelReduceAdaptor::execute( label, policy_type(0, policy), functor, return_value_impl); - Impl::ParallelReduceFence::fence(return_value); + Impl::ParallelReduceFence:: + fence(typename policy_type::execution_space(), return_value); } // No Return Argument @@ -1070,8 +1084,9 @@ template inline void parallel_reduce( const std::string& label, const PolicyType& policy, const FunctorType& functor, - typename Impl::enable_if< - Kokkos::Impl::is_execution_policy::value>::type* = 0) { + typename std::enable_if< + Kokkos::Impl::is_execution_policy::value>::type* = + nullptr) { typedef Kokkos::Impl::FunctorValueTraits ValueTraits; typedef typename Kokkos::Impl::if_c< (ValueTraits::StaticValueSize != 0), typename ValueTraits::value_type, @@ -1094,8 +1109,9 @@ inline void parallel_reduce( template inline void parallel_reduce( const PolicyType& policy, const FunctorType& functor, - typename Impl::enable_if< - Kokkos::Impl::is_execution_policy::value>::type* = 0) { + typename std::enable_if< + Kokkos::Impl::is_execution_policy::value>::type* = + nullptr) { typedef Kokkos::Impl::FunctorValueTraits ValueTraits; typedef typename Kokkos::Impl::if_c< (ValueTraits::StaticValueSize != 0), typename ValueTraits::value_type, @@ -1169,7 +1185,7 @@ inline void parallel_reduce(const std::string& label, const size_t& policy, } // namespace Kokkos #ifdef KOKKOS_ENABLE_DEPRECATED_CODE -// backwards compatiblity for Kokkos::Experimental reducers +// backwards compatibility for Kokkos::Experimental reducers namespace Kokkos { namespace Experimental { using Kokkos::BAnd; diff --git a/core/src/Kokkos_Profiling_ProfileSection.hpp b/core/src/Kokkos_Profiling_ProfileSection.hpp index 3c667f44320..3999ccb9663 100644 --- a/core/src/Kokkos_Profiling_ProfileSection.hpp +++ b/core/src/Kokkos_Profiling_ProfileSection.hpp @@ -1,46 +1,46 @@ /* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 + //@HEADER + // ************************************************************************ + // + // Kokkos v. 3.0 // Copyright (2020) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ + // + // Under the terms of Contract DE-NA0003525 with NTESS, + // the U.S. Government retains certain rights in this software. + // + // Redistribution and use in source and binary forms, with or without + // modification, are permitted provided that the following conditions are + // met: + // + // 1. Redistributions of source code must retain the above copyright + // notice, this list of conditions and the following disclaimer. + // + // 2. Redistributions in binary form must reproduce the above copyright + // notice, this list of conditions and the following disclaimer in the + // documentation and/or other materials provided with the distribution. + // + // 3. Neither the name of the Corporation nor the names of the + // contributors may be used to endorse or promote products derived from + // this software without specific prior written permission. + // + // THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY + // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE + // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + // + // Questions? Contact Christian R. Trott (crtrott@sandia.gov) + // + // ************************************************************************ + //@HEADER + */ #ifndef KOKKOSP_PROFILE_SECTION_HPP #define KOKKOSP_PROFILE_SECTION_HPP diff --git a/core/src/Kokkos_Qthreads.hpp b/core/src/Kokkos_Qthreads.hpp deleted file mode 100644 index e10bd48593a..00000000000 --- a/core/src/Kokkos_Qthreads.hpp +++ /dev/null @@ -1,196 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#ifndef KOKKOS_QTHREADS_HPP -#define KOKKOS_QTHREADS_HPP - -#include -#if defined(KOKKOS_ENABLE_QTHREADS) - -#include - -// Defines to enable experimental Qthreads functionality. -#define QTHREAD_LOCAL_PRIORITY -#define CLONED_TASKS - -#include - -#include -#include - -#include -#include -#include -//#include -//#include -//#include // Uncomment when Tasking working. -#include -#include -#include - -/*--------------------------------------------------------------------------*/ - -namespace Kokkos { - -namespace Impl { - -class QthreadsExec; - -} // namespace Impl - -} // namespace Kokkos - -/*--------------------------------------------------------------------------*/ - -namespace Kokkos { - -/** \brief Execution space supported by Qthreads */ -class Qthreads { - public: - //! \name Type declarations that all Kokkos devices must provide. - //@{ - - //! Tag this class as an execution space - typedef Qthreads execution_space; - typedef Kokkos::HostSpace memory_space; - //! This execution space preferred device_type - typedef Kokkos::Device device_type; - - typedef Kokkos::LayoutRight array_layout; - typedef memory_space::size_type size_type; - - typedef ScratchMemorySpace scratch_memory_space; - - //@} - /*------------------------------------------------------------------------*/ - - /** \brief Initialization will construct one or more instances */ - static Qthreads& instance(int = 0); - - /** \brief Set the execution space to a "sleep" state. - * - * This function sets the "sleep" state in which it is not ready for work. - * This may consume less resources than in an "ready" state, - * but it may also take time to transition to the "ready" state. - * - * \return True if enters or is in the "sleep" state. - * False if functions are currently executing. - */ - bool sleep(); - - /** \brief Wake from the sleep state. - * - * \return True if enters or is in the "ready" state. - * False if functions are currently executing. - */ - static bool wake(); - - /** \brief Wait until all dispatched functions to complete. - * - * The parallel_for or parallel_reduce dispatch of a functor may - * return asynchronously, before the functor completes. This - * method does not return until all dispatched functors on this - * device have completed. - */ - static void fence(); - - /*------------------------------------------------------------------------*/ - - static int in_parallel(); - - static int is_initialized(); - - /** \brief Return maximum amount of concurrency */ - static int concurrency(); - - static void initialize(int thread_count); - static void finalize(); - - /** \brief Print configuration information to the given output stream. */ - static void print_configuration(std::ostream&, const bool detail = false); - - int shepherd_size() const; - int shepherd_worker_size() const; - - static const char* name(); -}; - -} // namespace Kokkos - -/*--------------------------------------------------------------------------*/ - -namespace Kokkos { - -namespace Impl { - -template <> -struct MemorySpaceAccess { - enum { assignable = false }; - enum { accessible = true }; - enum { deepcopy = false }; -}; - -template <> -struct VerifyExecutionCanAccessMemorySpace< - Kokkos::Qthreads::memory_space, Kokkos::Qthreads::scratch_memory_space> { - enum { value = true }; - inline static void verify(void) {} - inline static void verify(const void*) {} -}; - -} // namespace Impl - -} // namespace Kokkos - -/*--------------------------------------------------------------------------*/ - -#include -#include -//#include // Uncomment when Tasking -// working. #include // Uncomment when -// Tasking working. - -#endif // #define KOKKOS_ENABLE_QTHREADS -#endif // #define KOKKOS_QTHREADS_HPP diff --git a/core/src/Kokkos_ROCmSpace.hpp b/core/src/Kokkos_ROCmSpace.hpp index 36017cd40f1..56a1a93b9d0 100644 --- a/core/src/Kokkos_ROCmSpace.hpp +++ b/core/src/Kokkos_ROCmSpace.hpp @@ -130,7 +130,7 @@ int* atomic_lock_array_rocm_space_ptr(bool deallocate = false); /// global memory. /// /// Team and Thread private scratch allocations in -/// global memory are aquired via locks. +/// global memory are acquired via locks. /// This function retrieves the lock array pointer. /// If the array is not yet allocated it will do so. int* scratch_lock_array_rocm_space_ptr(bool deallocate = false); @@ -481,7 +481,7 @@ struct VerifyExecutionCanAccessMemorySpace< /** Running in ROCmSpace attempting to access an unknown space: error */ template struct VerifyExecutionCanAccessMemorySpace< - typename enable_if< + typename std::enable_if< !is_same::value, Kokkos::Experimental::ROCmSpace>::type, OtherSpace> { diff --git a/core/src/Kokkos_ScratchSpace.hpp b/core/src/Kokkos_ScratchSpace.hpp index c2337f08a11..708e0218b7b 100644 --- a/core/src/Kokkos_ScratchSpace.hpp +++ b/core/src/Kokkos_ScratchSpace.hpp @@ -92,6 +92,8 @@ class ScratchMemorySpace { typedef typename ExecSpace::array_layout array_layout; typedef typename ExecSpace::size_type size_type; + static constexpr const char* name() { return "ScratchMemorySpace"; } + template KOKKOS_INLINE_FUNCTION static IntType align(const IntType& size) { return (size + MASK) & ~MASK; @@ -108,13 +110,13 @@ class ScratchMemorySpace { #ifdef KOKKOS_DEBUG // mfh 23 Jun 2015: printf call consumes 25 registers // in a CUDA build, so only print in debug mode. The - // function still returns NULL if not enough memory. + // function still returns nullptr if not enough memory. printf( "ScratchMemorySpace<...>::get_shmem: Failed to allocate " "%ld byte(s); remaining capacity is %ld byte(s)\n", long(size), long(m_end_L0 - m_iter_L0)); #endif // KOKKOS_DEBUG - tmp = 0; + tmp = nullptr; } return tmp; } else { @@ -124,13 +126,13 @@ class ScratchMemorySpace { #ifdef KOKKOS_DEBUG // mfh 23 Jun 2015: printf call consumes 25 registers // in a CUDA build, so only print in debug mode. The - // function still returns NULL if not enough memory. + // function still returns nullptr if not enough memory. printf( "ScratchMemorySpace<...>::get_shmem: Failed to allocate " "%ld byte(s); remaining capacity is %ld byte(s)\n", long(size), long(m_end_L1 - m_iter_L1)); #endif // KOKKOS_DEBUG - tmp = 0; + tmp = nullptr; } return tmp; } @@ -151,13 +153,13 @@ class ScratchMemorySpace { #ifdef KOKKOS_DEBUG // mfh 23 Jun 2015: printf call consumes 25 registers // in a CUDA build, so only print in debug mode. The - // function still returns NULL if not enough memory. + // function still returns nullptr if not enough memory. printf( "ScratchMemorySpace<...>::get_shmem: Failed to allocate " "%ld byte(s); remaining capacity is %ld byte(s)\n", long(size), long(m_end_L0 - m_iter_L0)); #endif // KOKKOS_DEBUG - tmp = 0; + tmp = nullptr; } return tmp; } else { @@ -171,13 +173,13 @@ class ScratchMemorySpace { #ifdef KOKKOS_DEBUG // mfh 23 Jun 2015: printf call consumes 25 registers // in a CUDA build, so only print in debug mode. The - // function still returns NULL if not enough memory. + // function still returns nullptr if not enough memory. printf( "ScratchMemorySpace<...>::get_shmem: Failed to allocate " "%ld byte(s); remaining capacity is %ld byte(s)\n", long(size), long(m_end_L1 - m_iter_L1)); #endif // KOKKOS_DEBUG - tmp = 0; + tmp = nullptr; } return tmp; } @@ -186,7 +188,7 @@ class ScratchMemorySpace { template KOKKOS_INLINE_FUNCTION ScratchMemorySpace(void* ptr_L0, const IntType& size_L0, - void* ptr_L1 = NULL, + void* ptr_L1 = nullptr, const IntType& size_L1 = 0) : m_iter_L0((char*)ptr_L0), m_end_L0(m_iter_L0 + size_L0), diff --git a/core/src/Kokkos_Serial.hpp b/core/src/Kokkos_Serial.hpp index e30598be212..1f97998ea50 100644 --- a/core/src/Kokkos_Serial.hpp +++ b/core/src/Kokkos_Serial.hpp @@ -181,11 +181,20 @@ class Serial { return impl_thread_pool_size(0); } #endif + uint32_t impl_instance_id() const noexcept { return 0; } static const char* name(); //-------------------------------------------------------------------------- }; +namespace Profiling { +namespace Experimental { +template <> +struct DeviceTypeTraits { + static constexpr DeviceType id = DeviceType::Serial; +}; +} // namespace Experimental +} // namespace Profiling } // namespace Kokkos /*--------------------------------------------------------------------------*/ @@ -257,6 +266,11 @@ class TeamPolicyInternal //! Execution space of this execution policy: typedef Kokkos::Serial execution_space; + const typename traits::execution_space& space() const { + static typename traits::execution_space m_space; + return m_space; + } + TeamPolicyInternal& operator=(const TeamPolicyInternal& p) { m_league_size = p.m_league_size; m_team_scratch_size[0] = p.m_team_scratch_size[0]; @@ -641,7 +655,7 @@ class ParallelReduce, ReducerType, const HostViewType& arg_result_view, typename std::enable_if::value && !Kokkos::is_reducer_type::value, - void*>::type = NULL) + void*>::type = nullptr) : m_functor(arg_functor), m_policy(arg_policy), m_reducer(InvalidType()), @@ -912,7 +926,7 @@ class ParallelReduce, ReducerType, const HostViewType& arg_result_view, typename std::enable_if::value && !Kokkos::is_reducer_type::value, - void*>::type = NULL) + void*>::type = nullptr) : m_functor(arg_functor), m_mdr_policy(arg_policy), m_policy(Policy(0, m_mdr_policy.m_num_tiles).set_chunk_size(1)), @@ -1087,7 +1101,7 @@ class ParallelReduce, const ViewType& arg_result, typename std::enable_if::value && !Kokkos::is_reducer_type::value, - void*>::type = NULL) + void*>::type = nullptr) : m_functor(arg_functor), m_league(arg_policy.league_size()), m_reducer(InvalidType()), diff --git a/core/src/Kokkos_TaskScheduler.hpp b/core/src/Kokkos_TaskScheduler.hpp index 4a78d6aca1c..6b9608d6290 100644 --- a/core/src/Kokkos_TaskScheduler.hpp +++ b/core/src/Kokkos_TaskScheduler.hpp @@ -143,7 +143,7 @@ class BasicTaskScheduler : public Impl::TaskSchedulerBase { Kokkos::BasicFuture _spawn_impl(DepTaskType* arg_predecessor_task, TaskPriority arg_priority, typename task_base::function_type arg_function, - typename task_base::destroy_type arg_destroy, + typename task_base::destroy_type /*arg_destroy*/, FunctorType&& arg_functor) { using functor_future_type = future_type_for_functor::type>; @@ -203,7 +203,7 @@ class BasicTaskScheduler : public Impl::TaskSchedulerBase { public: KOKKOS_INLINE_FUNCTION - BasicTaskScheduler() : m_track(), m_queue(0) {} + BasicTaskScheduler() : m_track(), m_queue(nullptr) {} KOKKOS_INLINE_FUNCTION BasicTaskScheduler(BasicTaskScheduler&& rhs) noexcept @@ -231,7 +231,7 @@ class BasicTaskScheduler : public Impl::TaskSchedulerBase { } explicit BasicTaskScheduler(memory_pool const& arg_memory_pool) noexcept - : m_track(), m_queue(0) { + : m_track(), m_queue(nullptr) { typedef Kokkos::Impl::SharedAllocationRecord record_type; @@ -349,7 +349,7 @@ class BasicTaskScheduler : public Impl::TaskSchedulerBase { task->m_priority = static_cast(arg_priority); - task->add_dependence((task_base*)0); + task->add_dependence(nullptr); // Postcondition: task is in Executing-Respawn state } @@ -380,8 +380,8 @@ class BasicTaskScheduler : public Impl::TaskSchedulerBase { } } - if (q != 0) { // this should probably handle the queue == 0 case, but - // this is deprecated code anyway + if (q != nullptr) { // this should probably handle the queue == 0 case, + // but this is deprecated code anyway size_t const alloc_size = q->when_all_allocation_size(narg); @@ -459,7 +459,7 @@ class BasicTaskScheduler : public Impl::TaskSchedulerBase { for (int i = 0; i < narg; ++i) { const input_type arg_f = func(i); - if (0 != arg_f.m_task) { + if (nullptr != arg_f.m_task) { // Not scheduled, so task scheduler is not yet set // if ( m_queue != static_cast< BasicTaskScheduler const * >( // arg_f.m_task->m_scheduler )->m_queue ) { diff --git a/core/src/Kokkos_Threads.hpp b/core/src/Kokkos_Threads.hpp index d44042b062b..9dd644df2ea 100644 --- a/core/src/Kokkos_Threads.hpp +++ b/core/src/Kokkos_Threads.hpp @@ -56,6 +56,7 @@ #include #include #include +#include #include /*--------------------------------------------------------------------------*/ @@ -200,11 +201,21 @@ class Threads { } #endif + uint32_t impl_instance_id() const noexcept { return 0; } + static const char* name(); //@} //---------------------------------------- }; +namespace Profiling { +namespace Experimental { +template <> +struct DeviceTypeTraits { + static constexpr DeviceType id = DeviceType::Threads; +}; +} // namespace Experimental +} // namespace Profiling } // namespace Kokkos /*--------------------------------------------------------------------------*/ diff --git a/core/src/Kokkos_Timer.hpp b/core/src/Kokkos_Timer.hpp index 117f097f66d..1dab73b44cb 100644 --- a/core/src/Kokkos_Timer.hpp +++ b/core/src/Kokkos_Timer.hpp @@ -60,7 +60,7 @@ class Timer { public: inline void reset() { m_old = std::chrono::high_resolution_clock::now(); } - inline ~Timer() {} + inline ~Timer() = default; inline Timer() { reset(); } diff --git a/core/src/Kokkos_Vectorization.hpp b/core/src/Kokkos_Vectorization.hpp index 2604538b1c1..cd24734100e 100644 --- a/core/src/Kokkos_Vectorization.hpp +++ b/core/src/Kokkos_Vectorization.hpp @@ -49,6 +49,8 @@ #if defined(KOKKOS_ENABLE_CUDA) #include +#elif defined(KOKKOS_ENABLE_HIP) +#include #endif #endif diff --git a/core/src/Kokkos_View.hpp b/core/src/Kokkos_View.hpp index 6610bb842bd..3d68d780a2e 100644 --- a/core/src/Kokkos_View.hpp +++ b/core/src/Kokkos_View.hpp @@ -450,6 +450,70 @@ struct ViewTraits { template class View; +template +struct is_always_assignable_impl; + +template +struct is_always_assignable_impl, + Kokkos::View> { + using mapping_type = Kokkos::Impl::ViewMapping< + typename Kokkos::View::traits, + typename Kokkos::View::traits, + typename Kokkos::View::traits::specialize>; + + constexpr static bool value = + mapping_type::is_assignable && + static_cast(Kokkos::View::rank_dynamic) >= + static_cast(Kokkos::View::rank_dynamic); +}; + +template +using is_always_assignable = is_always_assignable_impl< + typename std::remove_reference::type, + typename std::remove_const< + typename std::remove_reference::type>::type>; + +#ifdef KOKKOS_ENABLE_CXX17 +template +inline constexpr bool is_always_assignable_v = + is_always_assignable::value; +#endif + +template +constexpr bool is_assignable(const Kokkos::View& dst, + const Kokkos::View& src) { + using DstTraits = typename Kokkos::View::traits; + using SrcTraits = typename Kokkos::View::traits; + using mapping_type = + Kokkos::Impl::ViewMapping; + +#ifdef KOKKOS_ENABLE_CXX17 + return is_always_assignable_v, + Kokkos::View> || +#else + return is_always_assignable, + Kokkos::View>::value || +#endif + (mapping_type::is_assignable && + ((DstTraits::dimension::rank_dynamic >= 1) || + (dst.static_extent(0) == src.extent(0))) && + ((DstTraits::dimension::rank_dynamic >= 2) || + (dst.static_extent(1) == src.extent(1))) && + ((DstTraits::dimension::rank_dynamic >= 3) || + (dst.static_extent(2) == src.extent(2))) && + ((DstTraits::dimension::rank_dynamic >= 4) || + (dst.static_extent(3) == src.extent(3))) && + ((DstTraits::dimension::rank_dynamic >= 5) || + (dst.static_extent(4) == src.extent(4))) && + ((DstTraits::dimension::rank_dynamic >= 6) || + (dst.static_extent(5) == src.extent(5))) && + ((DstTraits::dimension::rank_dynamic >= 7) || + (dst.static_extent(6) == src.extent(6))) && + ((DstTraits::dimension::rank_dynamic >= 8) || + (dst.static_extent(7) == src.extent(7)))); +} + } /* namespace Kokkos */ //---------------------------------------------------------------------------- @@ -793,19 +857,18 @@ class View : public ViewTraits { //---------------------------------------- private: - enum { - is_layout_left = - std::is_same::value, + static constexpr bool is_layout_left = + std::is_same::value; - is_layout_right = - std::is_same::value, + static constexpr bool is_layout_right = + std::is_same::value; - is_layout_stride = std::is_same::value, + static constexpr bool is_layout_stride = + std::is_same::value; - is_default_map = std::is_same::value && - (is_layout_left || is_layout_right || is_layout_stride) - }; + static constexpr bool is_default_map = + std::is_same::value && + (is_layout_left || is_layout_right || is_layout_stride); template ::accessible> @@ -823,6 +886,8 @@ class View : public ViewTraits { #if defined(KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK) +#define KOKKOS_IMPL_SINK(ARG) ARG + #define KOKKOS_IMPL_VIEW_OPERATOR_VERIFY(ARG) \ View::template verify_space< \ Kokkos::Impl::ActiveExecutionMemorySpace>::check(); \ @@ -830,6 +895,8 @@ class View : public ViewTraits { #else +#define KOKKOS_IMPL_SINK(ARG) + #define KOKKOS_IMPL_VIEW_OPERATOR_VERIFY(ARG) \ View::template verify_space< \ Kokkos::Impl::ActiveExecutionMemorySpace>::check(); @@ -1452,8 +1519,9 @@ class View : public ViewTraits { typename std::enable_if<(Kokkos::Impl::are_integral::value && (0 == Rank)), reference_type>::type - access(Args... args) const { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY((m_track, m_map, args...)) + access(Args... KOKKOS_IMPL_SINK(args)) const { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( + KOKKOS_IMPL_SINK((m_track, m_map, args...))) return m_map.reference(); } @@ -1462,8 +1530,9 @@ class View : public ViewTraits { typename std::enable_if<(Kokkos::Impl::are_integral::value && (1 == Rank) && !is_default_map), reference_type>::type - access(const I0& i0, Args... args) const { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY((m_track, m_map, i0, args...)) + access(const I0& i0, Args... KOKKOS_IMPL_SINK(args)) const { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( + KOKKOS_IMPL_SINK((m_track, m_map, i0, args...))) return m_map.reference(i0); } @@ -1473,8 +1542,9 @@ class View : public ViewTraits { (1 == Rank) && is_default_map && !is_layout_stride), reference_type>::type - access(const I0& i0, Args... args) const { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY((m_track, m_map, i0, args...)) + access(const I0& i0, Args... KOKKOS_IMPL_SINK(args)) const { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( + KOKKOS_IMPL_SINK((m_track, m_map, i0, args...))) return m_map.m_impl_handle[i0]; } @@ -1484,8 +1554,9 @@ class View : public ViewTraits { (1 == Rank) && is_default_map && is_layout_stride), reference_type>::type - access(const I0& i0, Args... args) const { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY((m_track, m_map, i0, args...)) + access(const I0& i0, Args... KOKKOS_IMPL_SINK(args)) const { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( + KOKKOS_IMPL_SINK((m_track, m_map, i0, args...))) return m_map.m_impl_handle[m_map.m_impl_offset.m_stride.S0 * i0]; } @@ -1494,8 +1565,9 @@ class View : public ViewTraits { (Kokkos::Impl::are_integral::value && (2 == Rank) && !is_default_map), reference_type>::type - access(const I0& i0, const I1& i1, Args... args) const { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY((m_track, m_map, i0, i1, args...)) + access(const I0& i0, const I1& i1, Args... KOKKOS_IMPL_SINK(args)) const { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( + KOKKOS_IMPL_SINK((m_track, m_map, i0, i1, args...))) return m_map.reference(i0, i1); } @@ -1504,8 +1576,9 @@ class View : public ViewTraits { (Kokkos::Impl::are_integral::value && (2 == Rank) && is_default_map && is_layout_left && (traits::rank_dynamic == 0)), reference_type>::type - access(const I0& i0, const I1& i1, Args... args) const { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY((m_track, m_map, i0, i1, args...)) + access(const I0& i0, const I1& i1, Args... KOKKOS_IMPL_SINK(args)) const { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( + KOKKOS_IMPL_SINK((m_track, m_map, i0, i1, args...))) return m_map.m_impl_handle[i0 + m_map.m_impl_offset.m_dim.N0 * i1]; } @@ -1514,8 +1587,9 @@ class View : public ViewTraits { (Kokkos::Impl::are_integral::value && (2 == Rank) && is_default_map && is_layout_left && (traits::rank_dynamic != 0)), reference_type>::type - access(const I0& i0, const I1& i1, Args... args) const { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY((m_track, m_map, i0, i1, args...)) + access(const I0& i0, const I1& i1, Args... KOKKOS_IMPL_SINK(args)) const { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( + KOKKOS_IMPL_SINK((m_track, m_map, i0, i1, args...))) return m_map.m_impl_handle[i0 + m_map.m_impl_offset.m_stride * i1]; } @@ -1524,8 +1598,9 @@ class View : public ViewTraits { (Kokkos::Impl::are_integral::value && (2 == Rank) && is_default_map && is_layout_right && (traits::rank_dynamic == 0)), reference_type>::type - access(const I0& i0, const I1& i1, Args... args) const { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY((m_track, m_map, i0, i1, args...)) + access(const I0& i0, const I1& i1, Args... KOKKOS_IMPL_SINK(args)) const { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( + KOKKOS_IMPL_SINK((m_track, m_map, i0, i1, args...))) return m_map.m_impl_handle[i1 + m_map.m_impl_offset.m_dim.N1 * i0]; } @@ -1534,8 +1609,9 @@ class View : public ViewTraits { (Kokkos::Impl::are_integral::value && (2 == Rank) && is_default_map && is_layout_right && (traits::rank_dynamic != 0)), reference_type>::type - access(const I0& i0, const I1& i1, Args... args) const { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY((m_track, m_map, i0, i1, args...)) + access(const I0& i0, const I1& i1, Args... KOKKOS_IMPL_SINK(args)) const { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( + KOKKOS_IMPL_SINK((m_track, m_map, i0, i1, args...))) return m_map.m_impl_handle[i1 + m_map.m_impl_offset.m_stride * i0]; } @@ -1544,8 +1620,9 @@ class View : public ViewTraits { (Kokkos::Impl::are_integral::value && (2 == Rank) && is_default_map && is_layout_stride), reference_type>::type - access(const I0& i0, const I1& i1, Args... args) const { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY((m_track, m_map, i0, i1, args...)) + access(const I0& i0, const I1& i1, Args... KOKKOS_IMPL_SINK(args)) const { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( + KOKKOS_IMPL_SINK((m_track, m_map, i0, i1, args...))) return m_map.m_impl_handle[i0 * m_map.m_impl_offset.m_stride.S0 + i1 * m_map.m_impl_offset.m_stride.S1]; } @@ -1558,8 +1635,10 @@ class View : public ViewTraits { (Kokkos::Impl::are_integral::value && (3 == Rank) && is_default_map), reference_type>::type - access(const I0& i0, const I1& i1, const I2& i2, Args... args) const { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY((m_track, m_map, i0, i1, i2, args...)) + access(const I0& i0, const I1& i1, const I2& i2, + Args... KOKKOS_IMPL_SINK(args)) const { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( + KOKKOS_IMPL_SINK((m_track, m_map, i0, i1, i2, args...))) return m_map.m_impl_handle[m_map.m_impl_offset(i0, i1, i2)]; } @@ -1568,8 +1647,10 @@ class View : public ViewTraits { (Kokkos::Impl::are_integral::value && (3 == Rank) && !is_default_map), reference_type>::type - access(const I0& i0, const I1& i1, const I2& i2, Args... args) const { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY((m_track, m_map, i0, i1, i2, args...)) + access(const I0& i0, const I1& i1, const I2& i2, + Args... KOKKOS_IMPL_SINK(args)) const { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( + KOKKOS_IMPL_SINK((m_track, m_map, i0, i1, i2, args...))) return m_map.reference(i0, i1, i2); } @@ -1582,8 +1663,9 @@ class View : public ViewTraits { (4 == Rank) && is_default_map), reference_type>::type access(const I0& i0, const I1& i1, const I2& i2, const I3& i3, - Args... args) const { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY((m_track, m_map, i0, i1, i2, i3, args...)) + Args... KOKKOS_IMPL_SINK(args)) const { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( + KOKKOS_IMPL_SINK((m_track, m_map, i0, i1, i2, i3, args...))) return m_map.m_impl_handle[m_map.m_impl_offset(i0, i1, i2, i3)]; } @@ -1593,8 +1675,9 @@ class View : public ViewTraits { (4 == Rank) && !is_default_map), reference_type>::type access(const I0& i0, const I1& i1, const I2& i2, const I3& i3, - Args... args) const { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY((m_track, m_map, i0, i1, i2, i3, args...)) + Args... KOKKOS_IMPL_SINK(args)) const { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( + KOKKOS_IMPL_SINK((m_track, m_map, i0, i1, i2, i3, args...))) return m_map.reference(i0, i1, i2, i3); } @@ -1608,9 +1691,9 @@ class View : public ViewTraits { (5 == Rank) && is_default_map), reference_type>::type access(const I0& i0, const I1& i1, const I2& i2, const I3& i3, const I4& i4, - Args... args) const { + Args... KOKKOS_IMPL_SINK(args)) const { KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( - (m_track, m_map, i0, i1, i2, i3, i4, args...)) + KOKKOS_IMPL_SINK((m_track, m_map, i0, i1, i2, i3, i4, args...))) return m_map.m_impl_handle[m_map.m_impl_offset(i0, i1, i2, i3, i4)]; } @@ -1621,9 +1704,9 @@ class View : public ViewTraits { (5 == Rank) && !is_default_map), reference_type>::type access(const I0& i0, const I1& i1, const I2& i2, const I3& i3, const I4& i4, - Args... args) const { + Args... KOKKOS_IMPL_SINK(args)) const { KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( - (m_track, m_map, i0, i1, i2, i3, i4, args...)) + KOKKOS_IMPL_SINK((m_track, m_map, i0, i1, i2, i3, i4, args...))) return m_map.reference(i0, i1, i2, i3, i4); } @@ -1637,9 +1720,9 @@ class View : public ViewTraits { (6 == Rank) && is_default_map), reference_type>::type access(const I0& i0, const I1& i1, const I2& i2, const I3& i3, const I4& i4, - const I5& i5, Args... args) const { + const I5& i5, Args... KOKKOS_IMPL_SINK(args)) const { KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( - (m_track, m_map, i0, i1, i2, i3, i4, i5, args...)) + KOKKOS_IMPL_SINK((m_track, m_map, i0, i1, i2, i3, i4, i5, args...))) return m_map.m_impl_handle[m_map.m_impl_offset(i0, i1, i2, i3, i4, i5)]; } @@ -1650,9 +1733,9 @@ class View : public ViewTraits { (6 == Rank) && !is_default_map), reference_type>::type access(const I0& i0, const I1& i1, const I2& i2, const I3& i3, const I4& i4, - const I5& i5, Args... args) const { + const I5& i5, Args... KOKKOS_IMPL_SINK(args)) const { KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( - (m_track, m_map, i0, i1, i2, i3, i4, i5, args...)) + KOKKOS_IMPL_SINK((m_track, m_map, i0, i1, i2, i3, i4, i5, args...))) return m_map.reference(i0, i1, i2, i3, i4, i5); } @@ -1666,9 +1749,9 @@ class View : public ViewTraits { (7 == Rank) && is_default_map), reference_type>::type access(const I0& i0, const I1& i1, const I2& i2, const I3& i3, const I4& i4, - const I5& i5, const I6& i6, Args... args) const { + const I5& i5, const I6& i6, Args... KOKKOS_IMPL_SINK(args)) const { KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( - (m_track, m_map, i0, i1, i2, i3, i4, i5, i6, args...)) + KOKKOS_IMPL_SINK((m_track, m_map, i0, i1, i2, i3, i4, i5, i6, args...))) return m_map.m_impl_handle[m_map.m_impl_offset(i0, i1, i2, i3, i4, i5, i6)]; } @@ -1679,9 +1762,9 @@ class View : public ViewTraits { (7 == Rank) && !is_default_map), reference_type>::type access(const I0& i0, const I1& i1, const I2& i2, const I3& i3, const I4& i4, - const I5& i5, const I6& i6, Args... args) const { + const I5& i5, const I6& i6, Args... KOKKOS_IMPL_SINK(args)) const { KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( - (m_track, m_map, i0, i1, i2, i3, i4, i5, i6, args...)) + KOKKOS_IMPL_SINK((m_track, m_map, i0, i1, i2, i3, i4, i5, i6, args...))) return m_map.reference(i0, i1, i2, i3, i4, i5, i6); } @@ -1696,9 +1779,10 @@ class View : public ViewTraits { (8 == Rank) && is_default_map), reference_type>::type access(const I0& i0, const I1& i1, const I2& i2, const I3& i3, const I4& i4, - const I5& i5, const I6& i6, const I7& i7, Args... args) const { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( - (m_track, m_map, i0, i1, i2, i3, i4, i5, i6, i7, args...)) + const I5& i5, const I6& i6, const I7& i7, + Args... KOKKOS_IMPL_SINK(args)) const { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY(KOKKOS_IMPL_SINK( + (m_track, m_map, i0, i1, i2, i3, i4, i5, i6, i7, args...))) return m_map .m_impl_handle[m_map.m_impl_offset(i0, i1, i2, i3, i4, i5, i6, i7)]; } @@ -1711,9 +1795,10 @@ class View : public ViewTraits { (8 == Rank) && !is_default_map), reference_type>::type access(const I0& i0, const I1& i1, const I2& i2, const I3& i3, const I4& i4, - const I5& i5, const I6& i6, const I7& i7, Args... args) const { - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( - (m_track, m_map, i0, i1, i2, i3, i4, i5, i6, i7, args...)) + const I5& i5, const I6& i6, const I7& i7, + Args... KOKKOS_IMPL_SINK(args)) const { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY(KOKKOS_IMPL_SINK( + (m_track, m_map, i0, i1, i2, i3, i4, i5, i6, i7, args...))) return m_map.reference(i0, i1, i2, i3, i4, i5, i6, i7); } @@ -1722,8 +1807,8 @@ class View : public ViewTraits { //---------------------------------------- // Standard destructor, constructors, and assignment operators - KOKKOS_INLINE_FUNCTION - ~View() {} + KOKKOS_DEFAULTED_FUNCTION + ~View() = default; KOKKOS_INLINE_FUNCTION View() : m_track(), m_map() {} @@ -1759,7 +1844,8 @@ class View : public ViewTraits { const View& rhs, typename std::enable_if::traits, - typename traits::specialize>::is_assignable_data_type>::type* = 0) + typename traits::specialize>::is_assignable_data_type>::type* = + nullptr) : m_track(rhs.m_track, traits::is_managed), m_map() { typedef typename View::traits SrcTraits; typedef Kokkos::Impl::ViewMapping { // If allocating in CudaUVMSpace must fence before and after // the allocation to protect against possible concurrent access // on the CPU and the GPU. - // Fence using the trait's executon space (which will be Kokkos::Cuda) - // to avoid incomplete type errors from usng Kokkos::Cuda directly. + // Fence using the trait's execution space (which will be Kokkos::Cuda) + // to avoid incomplete type errors from using Kokkos::Cuda directly. if (std::is_same::value) { typename traits::device_type::memory_space::execution_space().fence(); diff --git a/core/src/Kokkos_WorkGraphPolicy.hpp b/core/src/Kokkos_WorkGraphPolicy.hpp index 26df7e03a28..6ff2f0d4b72 100644 --- a/core/src/Kokkos_WorkGraphPolicy.hpp +++ b/core/src/Kokkos_WorkGraphPolicy.hpp @@ -199,6 +199,8 @@ class WorkGraphPolicy : public Kokkos::Impl::PolicyTraits { if (0 == count_queue[w]) push_work(w); } + execution_space space() const { return execution_space(); } + WorkGraphPolicy(const graph_type& arg_graph) : m_graph(arg_graph), m_queue(view_alloc("queue", WithoutInitializing), diff --git a/core/src/OpenMP/Kokkos_OpenMP_Exec.cpp b/core/src/OpenMP/Kokkos_OpenMP_Exec.cpp index d152fd62dc4..21151156e3d 100644 --- a/core/src/OpenMP/Kokkos_OpenMP_Exec.cpp +++ b/core/src/OpenMP/Kokkos_OpenMP_Exec.cpp @@ -149,12 +149,12 @@ void OpenMPExec::clear_thread_data() { { const int rank = omp_get_thread_num(); - if (0 != m_pool[rank]) { + if (nullptr != m_pool[rank]) { m_pool[rank]->disband_pool(); space.deallocate(m_pool[rank], old_alloc_bytes); - m_pool[rank] = 0; + m_pool[rank] = nullptr; } } /* END #pragma omp parallel */ @@ -211,7 +211,7 @@ void OpenMPExec::resize_thread_data(size_t pool_reduce_bytes, { const int rank = omp_get_thread_num(); - if (0 != m_pool[rank]) { + if (nullptr != m_pool[rank]) { m_pool[rank]->disband_pool(); space.deallocate(m_pool[rank], old_alloc_bytes); @@ -447,7 +447,7 @@ void OpenMP::impl_finalize() //---------------------------------------------------------------------------- -void OpenMP::print_configuration(std::ostream &s, const bool verbose) { +void OpenMP::print_configuration(std::ostream &s, const bool /*verbose*/) { s << "Kokkos::OpenMP"; const bool is_initialized = Impl::t_openmp_instance != nullptr; diff --git a/core/src/OpenMP/Kokkos_OpenMP_Exec.hpp b/core/src/OpenMP/Kokkos_OpenMP_Exec.hpp index 6a8773a58f5..6614050f025 100644 --- a/core/src/OpenMP/Kokkos_OpenMP_Exec.hpp +++ b/core/src/OpenMP/Kokkos_OpenMP_Exec.hpp @@ -129,8 +129,6 @@ class OpenMPExec { namespace Kokkos { -inline OpenMP::OpenMP() noexcept {} - inline #ifdef KOKKOS_ENABLE_DEPRECATED_CODE bool @@ -176,20 +174,24 @@ int OpenMP::impl_thread_pool_rank() noexcept #endif } -inline void OpenMP::impl_static_fence(OpenMP const& instance) noexcept {} +inline void OpenMP::impl_static_fence(OpenMP const& /*instance*/) noexcept {} #ifdef KOKKOS_ENABLE_DEPRECATED_CODE inline void OpenMP::fence(OpenMP const& instance) noexcept {} #endif -inline bool OpenMP::is_asynchronous(OpenMP const& instance) noexcept { +inline bool OpenMP::is_asynchronous(OpenMP const& /*instance*/) noexcept { return false; } template void OpenMP::partition_master(F const& f, int num_partitions, int partition_size) { - if (omp_get_nested()) { +#if _OPENMP >= 201811 + if (omp_get_max_active_levels() > 1) { +#else + if (omp_get_nested() > 1) { +#endif using Exec = Impl::OpenMPExec; Exec* prev_instance = Impl::t_openmp_instance; diff --git a/core/src/OpenMP/Kokkos_OpenMP_Parallel.hpp b/core/src/OpenMP/Kokkos_OpenMP_Parallel.hpp index d5dcf9fd96b..83773ac3055 100644 --- a/core/src/OpenMP/Kokkos_OpenMP_Parallel.hpp +++ b/core/src/OpenMP/Kokkos_OpenMP_Parallel.hpp @@ -389,7 +389,7 @@ class ParallelReduce, ReducerType, const ViewType& arg_view, typename std::enable_if::value && !Kokkos::is_reducer_type::value, - void*>::type = NULL) + void*>::type = nullptr) : m_instance(t_openmp_instance), m_functor(arg_functor), m_policy(arg_policy), @@ -551,7 +551,7 @@ class ParallelReduce, ReducerType, const ViewType& arg_view, typename std::enable_if::value && !Kokkos::is_reducer_type::value, - void*>::type = NULL) + void*>::type = nullptr) : m_instance(t_openmp_instance), m_functor(arg_functor), m_mdr_policy(arg_policy), @@ -930,7 +930,7 @@ class ParallelFor, if (is_dynamic) { // Must synchronize to make sure each team has set its - // partition before begining the work stealing loop. + // partition before beginning the work stealing loop. if (data.pool_rendezvous()) data.pool_rendezvous_release(); } @@ -1077,7 +1077,7 @@ class ParallelReduce, if (is_dynamic) { // Must synchronize to make sure each team has set its - // partition before begining the work stealing loop. + // partition before beginning the work stealing loop. if (data.pool_rendezvous()) data.pool_rendezvous_release(); } @@ -1146,7 +1146,7 @@ class ParallelReduce, const ViewType& arg_result, typename std::enable_if::value && !Kokkos::is_reducer_type::value, - void*>::type = NULL) + void*>::type = nullptr) : m_instance(t_openmp_instance), m_functor(arg_functor), m_policy(arg_policy), diff --git a/core/src/OpenMP/Kokkos_OpenMP_Task.hpp b/core/src/OpenMP/Kokkos_OpenMP_Task.hpp index 0cbdfbbdaa6..62f0a77d0e1 100644 --- a/core/src/OpenMP/Kokkos_OpenMP_Task.hpp +++ b/core/src/OpenMP/Kokkos_OpenMP_Task.hpp @@ -132,14 +132,14 @@ class TaskQueueSpecialization > { auto current_task = OptionalRef(nullptr); - while (not queue.is_done()) { + while (!queue.is_done()) { // Each team lead attempts to acquire either a thread team task // or a single thread task for the team. if (team_exec.team_rank() == 0) { // loop while both: // - the queue is not done // - the most recently popped task is a single task or empty - while (not queue.is_done()) { + while (!queue.is_done()) { current_task = queue.pop_ready_task(team_scheduler.team_scheduler_info()); diff --git a/core/src/OpenMP/Kokkos_OpenMP_Team.hpp b/core/src/OpenMP/Kokkos_OpenMP_Team.hpp index 39fd5ae0337..f54b6e2d51d 100644 --- a/core/src/OpenMP/Kokkos_OpenMP_Team.hpp +++ b/core/src/OpenMP/Kokkos_OpenMP_Team.hpp @@ -62,6 +62,11 @@ class TeamPolicyInternal typedef PolicyTraits traits; + const typename traits::execution_space& space() const { + static typename traits::execution_space m_space; + return m_space; + } + TeamPolicyInternal& operator=(const TeamPolicyInternal& p) { m_league_size = p.m_league_size; m_team_size = p.m_team_size; diff --git a/core/src/OpenMPTarget/Kokkos_OpenMPTargetSpace.cpp b/core/src/OpenMPTarget/Kokkos_OpenMPTargetSpace.cpp index 2dd038dce72..ab833b03637 100644 --- a/core/src/OpenMPTarget/Kokkos_OpenMPTargetSpace.cpp +++ b/core/src/OpenMPTarget/Kokkos_OpenMPTargetSpace.cpp @@ -111,14 +111,6 @@ std::string SharedAllocationRecord * -SharedAllocationRecord::allocate( - const Kokkos::Experimental::OpenMPTargetSpace &arg_space, - const std::string &arg_label, const size_t arg_alloc_size) { - return new SharedAllocationRecord(arg_space, arg_label, arg_alloc_size); -} - void SharedAllocationRecord::deallocate(SharedAllocationRecord *arg_rec) { @@ -151,6 +143,8 @@ SharedAllocationRecord:: header.m_label[SharedAllocationHeader::maximum_label_length - 1] = (char)0; // TODO DeepCopy // DeepCopy + Kokkos::Impl::DeepCopy( + RecordBase::m_alloc_ptr, &header, sizeof(SharedAllocationHeader)); } //---------------------------------------------------------------------------- @@ -203,18 +197,22 @@ SharedAllocationRecord typedef SharedAllocationRecord RecordHost; - SharedAllocationHeader const *const head = - alloc_ptr ? Header::get_header(alloc_ptr) : (SharedAllocationHeader *)0; - RecordHost *const record = - head ? static_cast(head->m_record) : (RecordHost *)0; + if (alloc_ptr) { + Header head; + const Header *const head_ompt = Header::get_header(alloc_ptr); - if (!alloc_ptr || record->m_alloc_ptr != head) { - Kokkos::Impl::throw_runtime_exception(std::string( - "Kokkos::Experimental::Impl::SharedAllocationRecord< " - "Kokkos::Experimental::OpenMPTargetSpace , void >::get_record ERROR")); - } + Kokkos::Impl::DeepCopy( + &head, head_ompt, sizeof(SharedAllocationHeader)); - return record; + RecordHost *record = static_cast(head.m_record); + if (record->m_alloc_ptr == head_ompt) { + return record; + } + } + Kokkos::Impl::throw_runtime_exception(std::string( + "Kokkos::Experimental::Impl::SharedAllocationRecord< " + "Kokkos::Experimental::OpenMPTargetSpace , void >::get_record ERROR")); + return nullptr; } // Iterate records to print orphaned memory ... diff --git a/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Exec.cpp b/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Exec.cpp index 140a4cb88ec..b09dbeba3a5 100644 --- a/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Exec.cpp +++ b/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Exec.cpp @@ -61,17 +61,8 @@ namespace { KOKKOS_INLINE_FUNCTION int kokkos_omp_in_parallel(); -int kokkos_omp_in_critical_region = - (Kokkos::HostSpace::register_in_parallel(kokkos_omp_in_parallel), 0); - KOKKOS_INLINE_FUNCTION -int kokkos_omp_in_parallel() { -#ifndef __CUDA_ARCH__ - return omp_in_parallel() && !kokkos_omp_in_critical_region; -#else - return 0; -#endif -} +int kokkos_omp_in_parallel() { return omp_in_parallel(); } bool s_using_hwloc = false; @@ -79,23 +70,9 @@ bool s_using_hwloc = false; } // namespace Impl } // namespace Kokkos -namespace Kokkos { -namespace Experimental { -bool OpenMPTarget::m_is_initialized = false; -} -} // namespace Kokkos - namespace Kokkos { namespace Impl { -// int OpenMPTargetExec::m_map_rank[ OpenMPTargetExec::MAX_THREAD_COUNT ] = { 0 -// }; - -// int OpenMPTargetExec::m_pool_topo[ 4 ] = { 0 }; - -// OpenMPTargetExec * OpenMPTargetExec::m_pool[ -// OpenMPTargetExec::MAX_THREAD_COUNT ] = { 0 }; - void OpenMPTargetExec::verify_is_process(const char* const label) { if (omp_in_parallel()) { std::string msg(label); @@ -105,28 +82,21 @@ void OpenMPTargetExec::verify_is_process(const char* const label) { } void OpenMPTargetExec::verify_initialized(const char* const label) { - if (0 == Kokkos::Experimental::OpenMPTarget::is_initialized()) { + if (0 == Kokkos::Experimental::OpenMPTarget().impl_is_initialized()) { std::string msg(label); msg.append(" ERROR: not initialized"); Kokkos::Impl::throw_runtime_exception(msg); } - - if (omp_get_max_threads() != - Kokkos::Experimental::OpenMPTarget::thread_pool_size(0)) { - std::string msg(label); - msg.append(" ERROR: Initialized but threads modified inappropriately"); - Kokkos::Impl::throw_runtime_exception(msg); - } } -void* OpenMPTargetExec::m_scratch_ptr = NULL; +void* OpenMPTargetExec::m_scratch_ptr = nullptr; int64_t OpenMPTargetExec::m_scratch_size = 0; void OpenMPTargetExec::clear_scratch() { Kokkos::Experimental::OpenMPTargetSpace space; space.deallocate(m_scratch_ptr, m_scratch_size); - m_scratch_ptr = NULL; - m_scratch_size = NULL; + m_scratch_ptr = nullptr; + m_scratch_size = 0; } void* OpenMPTargetExec::get_scratch_ptr() { return m_scratch_ptr; } @@ -151,122 +121,4 @@ void OpenMPTargetExec::resize_scratch(int64_t reduce_bytes, } // namespace Impl } // namespace Kokkos -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Experimental { -//---------------------------------------------------------------------------- - -int OpenMPTarget::is_initialized() { - return m_is_initialized; -} // != Impl::OpenMPTargetExec::m_pool[0]; } - -void OpenMPTarget::initialize(unsigned thread_count, unsigned use_numa_count, - unsigned use_cores_per_numa) { - // Before any other call to OMP query the maximum number of threads - // and save the value for re-initialization unit testing. - - // Init the array for used for arbitrarily sized atomics - Kokkos::Impl::init_lock_array_host_space(); - -#ifdef KOKKOS_ENABLE_PROFILING - Kokkos::Profiling::initialize(); -#endif - m_is_initialized = true; -} - -//---------------------------------------------------------------------------- - -void OpenMPTarget::finalize() { - Kokkos::Impl::OpenMPTargetExec::verify_initialized("OpenMPTarget::finalize"); - Kokkos::Impl::OpenMPTargetExec::verify_is_process("OpenMPTarget::finalize"); - - m_is_initialized = false; - - omp_set_num_threads(1); - - if (Kokkos::Impl::s_using_hwloc && Kokkos::hwloc::can_bind_threads()) { - hwloc::unbind_this_thread(); - } - -#ifdef KOKKOS_ENABLE_PROFILING - Kokkos::Profiling::finalize(); -#endif -} - -//---------------------------------------------------------------------------- - -void OpenMPTarget::print_configuration(std::ostream& s, const bool detail) { - Kokkos::Impl::OpenMPTargetExec::verify_is_process( - "OpenMPTarget::print_configuration"); - /* - s << "Kokkos::Experimental::OpenMPTarget" ; - - #if defined( KOKKOS_ENABLE_OPENMPTARGET ) - s << " KOKKOS_ENABLE_OPENMPTARGET" ; - #endif - #if defined( KOKKOS_ENABLE_HWLOC ) - - const unsigned numa_count_ = - Kokkos::hwloc::get_available_numa_count(); const unsigned cores_per_numa = - Kokkos::hwloc::get_available_cores_per_numa(); const unsigned threads_per_core - = Kokkos::hwloc::get_available_threads_per_core(); - - s << " hwloc[" << numa_count_ << "x" << cores_per_numa << "x" << - threads_per_core << "]" - << " hwloc_binding_" << ( Impl::s_using_hwloc ? "enabled" : "disabled" ) - ; - #endif - - const bool is_initialized = 0 != Impl::OpenMPTargetExec::m_pool[0] ; - - if ( is_initialized ) { - const int numa_count = Kokkos::Impl::OpenMPTargetExec::m_pool_topo[0] - / Kokkos::Impl::OpenMPTargetExec::m_pool_topo[1] ; const int core_per_numa = - Kokkos::Impl::OpenMPTargetExec::m_pool_topo[1] / - Kokkos::Impl::OpenMPTargetExec::m_pool_topo[2] ; const int thread_per_core = - Kokkos::Impl::OpenMPTargetExec::m_pool_topo[2] ; - - s << " thread_pool_topology[ " << numa_count - << " x " << core_per_numa - << " x " << thread_per_core - << " ]" - << std::endl ; - - if ( detail ) { - std::vector< std::pair > coord( - Kokkos::Impl::OpenMPTargetExec::m_pool_topo[0] ); - - #pragma omp parallel - { - #pragma omp critical - { - coord[ omp_get_thread_num() ] = hwloc::get_this_thread_coordinate(); - } - // END #pragma omp critical - } - // END #pragma omp parallel - - for ( unsigned i = 0 ; i < coord.size() ; ++i ) { - s << " thread omp_rank[" << i << "]" - << " kokkos_rank[" << Impl::OpenMPTargetExec::m_map_rank[ i ] << "]" - << " hwloc_coord[" << coord[i].first << "." << coord[i].second << - "]" - << std::endl ; - } - } - } - else { - s << " not initialized" << std::endl ; - } - */ -} - -int OpenMPTarget::concurrency() { return thread_pool_size(0); } - -const char* OpenMPTarget::name() { return "OpenMPTarget"; } -} // namespace Experimental -} // namespace Kokkos - #endif // KOKKOS_ENABLE_OPENMPTARGET diff --git a/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Exec.hpp b/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Exec.hpp index c9d8543eab0..be6ddb5ed42 100644 --- a/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Exec.hpp +++ b/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Exec.hpp @@ -341,19 +341,39 @@ class TeamPolicyInternal //---------------------------------------- template - inline static int team_size_max(const FunctorType&) { - return 1024; + inline static int team_size_max(const FunctorType&, const ParallelForTag&) { + return 256; } template - inline static int team_size_recommended(const FunctorType&) { + inline static int team_size_max(const FunctorType&, + const ParallelReduceTag&) { + return 256; + } + + template + inline static int team_size_max(const FunctorType&, const ReducerType&, + const ParallelReduceTag&) { return 256; } template inline static int team_size_recommended(const FunctorType&, - const int& vector_length) { - return 256 / vector_length; + const ParallelForTag&) { + return 128; + } + + template + inline static int team_size_recommended(const FunctorType&, + const ParallelReduceTag&) { + return 128; + } + + template + inline static int team_size_recommended(const FunctorType&, + const ReducerType&, + const ParallelReduceTag&) { + return 128; } //---------------------------------------- @@ -391,6 +411,10 @@ class TeamPolicyInternal team_size_ * m_thread_scratch_size[level]; } + inline Kokkos::Experimental::OpenMPTarget space() const { + return Kokkos::Experimental::OpenMPTarget(); + } + /** \brief Specify league size, request team size */ TeamPolicyInternal(typename traits::execution_space&, int league_size_request, int team_size_request, int vector_length_request = 1) @@ -540,8 +564,8 @@ class TeamPolicyInternal private: /** \brief finalize chunk_size if it was set to AUTO*/ inline void set_auto_chunk_size() { - int concurrency = - traits::execution_space::thread_pool_size(0) / m_team_alloc; + int concurrency = 2048 * 128; + if (concurrency == 0) concurrency = 1; if (m_chunk_size > 0) { @@ -568,23 +592,6 @@ class TeamPolicyInternal } // namespace Kokkos -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Experimental { - -inline int OpenMPTarget::thread_pool_size(int depth) { - // return Impl::OpenMPTargetExec::pool_size(depth); - return omp_get_max_threads(); -} - -KOKKOS_INLINE_FUNCTION -int OpenMPTarget::thread_pool_rank() { return omp_get_thread_num(); } - -} // namespace Experimental -} // namespace Kokkos - namespace Kokkos { template diff --git a/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Instance.cpp b/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Instance.cpp new file mode 100644 index 00000000000..011bb1c8e1a --- /dev/null +++ b/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Instance.cpp @@ -0,0 +1,50 @@ +#include + +namespace Kokkos { +namespace Experimental { +namespace Impl { +void OpenMPTargetInternal::fence() {} +int OpenMPTargetInternal::concurrency() { return 128000; } +const char* OpenMPTargetInternal::name() { return "OpenMPTarget"; } +void OpenMPTargetInternal::print_configuration(std::ostream& stream, + const bool) { + printf("Using OpenMPTarget\n"); +} + +void OpenMPTargetInternal::impl_finalize() { m_is_initialized = false; } +void OpenMPTargetInternal::impl_initialize() { m_is_initialized = true; } +int OpenMPTargetInternal::impl_is_initialized() { + return m_is_initialized ? 1 : 0; +} + +OpenMPTargetInternal* OpenMPTargetInternal::impl_singleton() { + static OpenMPTargetInternal self; + return &self; +} +} // Namespace Impl + +OpenMPTarget::OpenMPTarget() + : m_space_instance(Impl::OpenMPTargetInternal::impl_singleton()) {} + +const char* OpenMPTarget::name() { + return Impl::OpenMPTargetInternal::impl_singleton()->name(); +} +void OpenMPTarget::print_configuration(std::ostream& stream, + const bool detail) { + m_space_instance->print_configuration(stream, detail); +} + +int OpenMPTarget::concurrency() { + return Impl::OpenMPTargetInternal::impl_singleton()->concurrency(); +} +void OpenMPTarget::fence() { + Impl::OpenMPTargetInternal::impl_singleton()->fence(); +} + +void OpenMPTarget::impl_initialize() { m_space_instance->impl_initialize(); } +void OpenMPTarget::impl_finalize() { m_space_instance->impl_finalize(); } +int OpenMPTarget::impl_is_initialized() { + return Impl::OpenMPTargetInternal::impl_singleton()->impl_is_initialized(); +} +} // Namespace Experimental +} // Namespace Kokkos diff --git a/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Instance.hpp b/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Instance.hpp new file mode 100644 index 00000000000..2087226d7cc --- /dev/null +++ b/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Instance.hpp @@ -0,0 +1,40 @@ +#include + +namespace Kokkos { +namespace Experimental { +namespace Impl { + +class OpenMPTargetInternal { + private: + OpenMPTargetInternal() = default; + OpenMPTargetInternal(const OpenMPTargetInternal&) = default; + OpenMPTargetInternal& operator=(const OpenMPTargetInternal&) = default; + + public: + void fence(); + + /** \brief Return the maximum amount of concurrency. */ + int concurrency(); + + //! Print configuration information to the given output stream. + void print_configuration(std::ostream&, const bool detail = false); + + static const char* name(); + + //! Free any resources being consumed by the device. + void impl_finalize(); + + //! Has been initialized + int impl_is_initialized(); + + //! Initialize, telling the CUDA run-time library which device to use. + void impl_initialize(); + + static OpenMPTargetInternal* impl_singleton(); + + private: + bool m_is_initialized = false; +}; +} // Namespace Impl +} // Namespace Experimental +} // Namespace Kokkos diff --git a/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Parallel.hpp b/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Parallel.hpp index 2252cf0372f..d5b62f60b89 100644 --- a/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Parallel.hpp +++ b/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Parallel.hpp @@ -57,6 +57,374 @@ namespace Kokkos { namespace Impl { +template +struct OpenMPTargetReducerWrapper { + typedef typename Reducer::value_type value_type; + + KOKKOS_INLINE_FUNCTION + static void join(value_type&, const value_type&) { + printf( + "Using a generic unknown Reducer for the OpenMPTarget backend is not " + "implemented."); + } + + KOKKOS_INLINE_FUNCTION + static void join(volatile value_type&, const volatile value_type&) { + printf( + "Using a generic unknown Reducer for the OpenMPTarget backend is not " + "implemented."); + } + + KOKKOS_INLINE_FUNCTION + static void init(value_type&) { + printf( + "Using a generic unknown Reducer for the OpenMPTarget backend is not " + "implemented."); + } +}; + +template +struct OpenMPTargetReducerWrapper> { + public: + // Required + typedef typename std::remove_cv::type value_type; + + // Required + KOKKOS_INLINE_FUNCTION + static void join(value_type& dest, const value_type& src) { dest += src; } + + KOKKOS_INLINE_FUNCTION + static void join(volatile value_type& dest, const volatile value_type& src) { + dest += src; + } + + KOKKOS_INLINE_FUNCTION + static void init(value_type& val) { + val = reduction_identity::sum(); + } +}; + +template +struct OpenMPTargetReducerWrapper> { + public: + // Required + typedef typename std::remove_cv::type value_type; + + // Required + KOKKOS_INLINE_FUNCTION + static void join(value_type& dest, const value_type& src) { dest *= src; } + + KOKKOS_INLINE_FUNCTION + static void join(volatile value_type& dest, const volatile value_type& src) { + dest *= src; + } + + KOKKOS_INLINE_FUNCTION + static void init(value_type& val) { + val = reduction_identity::prod(); + } +}; + +template +struct OpenMPTargetReducerWrapper> { + public: + // Required + typedef typename std::remove_cv::type value_type; + + // Required + KOKKOS_INLINE_FUNCTION + static void join(value_type& dest, const value_type& src) { + if (src < dest) dest = src; + } + + KOKKOS_INLINE_FUNCTION + static void join(volatile value_type& dest, const volatile value_type& src) { + if (src < dest) dest = src; + } + + KOKKOS_INLINE_FUNCTION + static void init(value_type& val) { + val = reduction_identity::min(); + } +}; + +template +struct OpenMPTargetReducerWrapper> { + public: + // Required + typedef typename std::remove_cv::type value_type; + + // Required + KOKKOS_INLINE_FUNCTION + static void join(value_type& dest, const value_type& src) { + if (src > dest) dest = src; + } + + KOKKOS_INLINE_FUNCTION + static void join(volatile value_type& dest, const volatile value_type& src) { + if (src > dest) dest = src; + } + + // Required + KOKKOS_INLINE_FUNCTION + static void init(value_type& val) { + val = reduction_identity::max(); + } +}; + +template +struct OpenMPTargetReducerWrapper> { + public: + // Required + typedef typename std::remove_cv::type value_type; + + KOKKOS_INLINE_FUNCTION + static void join(value_type& dest, const value_type& src) { + dest = dest && src; + } + + KOKKOS_INLINE_FUNCTION + static void join(volatile value_type& dest, const volatile value_type& src) { + dest = dest && src; + } + + KOKKOS_INLINE_FUNCTION + static void init(value_type& val) { + val = reduction_identity::land(); + } +}; + +template +struct OpenMPTargetReducerWrapper> { + public: + // Required + typedef typename std::remove_cv::type value_type; + + typedef Kokkos::View result_view_type; + + // Required + KOKKOS_INLINE_FUNCTION + static void join(value_type& dest, const value_type& src) { + dest = dest || src; + } + + KOKKOS_INLINE_FUNCTION + static void join(volatile value_type& dest, const volatile value_type& src) { + dest = dest || src; + } + + KOKKOS_INLINE_FUNCTION + static void init(value_type& val) { + val = reduction_identity::lor(); + } +}; + +template +struct OpenMPTargetReducerWrapper> { + public: + // Required + typedef typename std::remove_cv::type value_type; + + // Required + KOKKOS_INLINE_FUNCTION + static void join(value_type& dest, const value_type& src) { + dest = dest & src; + } + + KOKKOS_INLINE_FUNCTION + static void join(volatile value_type& dest, const volatile value_type& src) { + dest = dest & src; + } + + KOKKOS_INLINE_FUNCTION + static void init(value_type& val) { + val = reduction_identity::band(); + } +}; + +template +struct OpenMPTargetReducerWrapper> { + public: + // Required + typedef typename std::remove_cv::type value_type; + + // Required + KOKKOS_INLINE_FUNCTION + static void join(value_type& dest, const value_type& src) { + dest = dest | src; + } + + KOKKOS_INLINE_FUNCTION + static void join(volatile value_type& dest, const volatile value_type& src) { + dest = dest | src; + } + + KOKKOS_INLINE_FUNCTION + static void init(value_type& val) { + val = reduction_identity::bor(); + } +}; + +template +struct OpenMPTargetReducerWrapper> { + private: + typedef typename std::remove_cv::type scalar_type; + typedef typename std::remove_cv::type index_type; + + public: + // Required + typedef ValLocScalar value_type; + + // Required + KOKKOS_INLINE_FUNCTION + static void join(value_type& dest, const value_type& src) { + if (src.val < dest.val) dest = src; + } + + KOKKOS_INLINE_FUNCTION + static void join(volatile value_type& dest, const volatile value_type& src) { + if (src.val < dest.val) dest = src; + } + + KOKKOS_INLINE_FUNCTION + static void init(value_type& val) { + val.val = reduction_identity::min(); + val.loc = reduction_identity::min(); + } +}; + +template +struct OpenMPTargetReducerWrapper> { + private: + typedef typename std::remove_cv::type scalar_type; + typedef typename std::remove_cv::type index_type; + + public: + // Required + typedef ValLocScalar value_type; + + KOKKOS_INLINE_FUNCTION + static void join(value_type& dest, const value_type& src) { + if (src.val > dest.val) dest = src; + } + + KOKKOS_INLINE_FUNCTION + static void join(volatile value_type& dest, const volatile value_type& src) { + if (src.val > dest.val) dest = src; + } + + KOKKOS_INLINE_FUNCTION + static void init(value_type& val) { + val.val = reduction_identity::max(); + val.loc = reduction_identity::min(); + } +}; + +template +struct OpenMPTargetReducerWrapper> { + private: + typedef typename std::remove_cv::type scalar_type; + + public: + // Required + typedef MinMaxScalar value_type; + + // Required + KOKKOS_INLINE_FUNCTION + static void join(value_type& dest, const value_type& src) { + if (src.min_val < dest.min_val) { + dest.min_val = src.min_val; + } + if (src.max_val > dest.max_val) { + dest.max_val = src.max_val; + } + } + + KOKKOS_INLINE_FUNCTION + static void join(volatile value_type& dest, const volatile value_type& src) { + if (src.min_val < dest.min_val) { + dest.min_val = src.min_val; + } + if (src.max_val > dest.max_val) { + dest.max_val = src.max_val; + } + } + + KOKKOS_INLINE_FUNCTION + static void init(value_type& val) { + val.max_val = reduction_identity::max(); + val.min_val = reduction_identity::min(); + } +}; + +template +struct OpenMPTargetReducerWrapper> { + private: + typedef typename std::remove_cv::type scalar_type; + typedef typename std::remove_cv::type index_type; + + public: + // Required + typedef MinMaxLocScalar value_type; + + // Required + KOKKOS_INLINE_FUNCTION + static void join(value_type& dest, const value_type& src) { + if (src.min_val < dest.min_val) { + dest.min_val = src.min_val; + dest.min_loc = src.min_loc; + } + if (src.max_val > dest.max_val) { + dest.max_val = src.max_val; + dest.max_loc = src.max_loc; + } + } + + KOKKOS_INLINE_FUNCTION + static void join(volatile value_type& dest, const volatile value_type& src) { + if (src.min_val < dest.min_val) { + dest.min_val = src.min_val; + dest.min_loc = src.min_loc; + } + if (src.max_val > dest.max_val) { + dest.max_val = src.max_val; + dest.max_loc = src.max_loc; + } + } + + KOKKOS_INLINE_FUNCTION + static void init(value_type& val) { + val.max_val = reduction_identity::max(); + val.min_val = reduction_identity::min(); + val.max_loc = reduction_identity::min(); + val.min_loc = reduction_identity::min(); + } +}; +/* +template +class OpenMPTargetReducerWrapper { + public: + const ReducerType& reducer; + typedef typename ReducerType::value_type value_type; + value_type& value; + + KOKKOS_INLINE_FUNCTION + void join(const value_type& upd) { + reducer.join(value,upd); + } + + KOKKOS_INLINE_FUNCTION + void init(const value_type& upd) { + reducer.init(value,upd); + } +};*/ + +} // namespace Impl +} // namespace Kokkos + +namespace Kokkos { +namespace Impl { + template class ParallelFor, Kokkos::Experimental::OpenMPTarget> { @@ -71,7 +439,21 @@ class ParallelFor, public: inline void execute() const { execute_impl(); } - + /* + template + inline typename std::enable_if::value>::type + execute_impl() const { + OpenMPTargetExec::verify_is_process( + "Kokkos::Experimental::OpenMPTarget parallel_for"); + OpenMPTargetExec::verify_initialized( + "Kokkos::Experimental::OpenMPTarget parallel_for"); + const typename Policy::member_type begin = m_policy.begin(); + const typename Policy::member_type end = m_policy.end(); + + #pragma omp target teams distribute parallel for map(to: this->m_functor) + for (int i = begin; i < end; i++) m_functor(i); + } + */ template inline typename std::enable_if::value>::type execute_impl() const { @@ -82,8 +464,12 @@ class ParallelFor, const typename Policy::member_type begin = m_policy.begin(); const typename Policy::member_type end = m_policy.end(); -#pragma omp target teams distribute parallel for map(to : this->m_functor) - for (int i = begin; i < end; i++) m_functor(i); + if (end <= begin) return; + + FunctorType a_functor(m_functor); + +#pragma omp target teams distribute parallel for map(to : a_functor) + for (int i = begin; i < end; i++) a_functor(i); } template @@ -96,10 +482,13 @@ class ParallelFor, const typename Policy::member_type begin = m_policy.begin(); const typename Policy::member_type end = m_policy.end(); + if (end <= begin) return; + + FunctorType a_functor(m_functor); #pragma omp target teams distribute parallel for num_threads(128) \ map(to \ - : this->m_functor) - for (int i = begin; i < end; i++) m_functor(TagType(), i); + : a_functor) + for (int i = begin; i < end; i++) a_functor(TagType(), i); } inline ParallelFor(const FunctorType& arg_functor, Policy arg_policy) @@ -143,8 +532,11 @@ struct ParallelReduceSpecialize, const typename PolicyType::member_type begin = p.begin(); const typename PolicyType::member_type end = p.end(); + if (end <= begin) return; + ValueType result = ValueType(); -#pragma omp target teams distribute parallel for num_teams(512) map(to:f) map(tofrom:result) reduction(+: result) +#pragma omp target teams distribute parallel for num_teams(512) \ + map(to:f) map(tofrom:result) reduction(+: result) for (int i = begin; i < end; i++) f(i, result); *result_ptr = result; @@ -162,8 +554,12 @@ struct ParallelReduceSpecialize, const typename PolicyType::member_type begin = p.begin(); const typename PolicyType::member_type end = p.end(); + if (end <= begin) return; + ValueType result = ValueType(); -#pragma omp target teams distribute parallel for num_teams(512) map(to:f) map(tofrom: result) reduction(+: result) +#pragma omp target teams distribute parallel for \ + num_teams(512) map(to:f) map(tofrom: result) \ + reduction(+: result) for (int i = begin; i < end; i++) f(TagType(), i, result); *result_ptr = result; @@ -174,62 +570,73 @@ struct ParallelReduceSpecialize, execute_impl(f, p, ptr); } }; -/* -template struct ParallelReduceSpecialize { - #pragma omp declare reduction(custom: ValueType : ReducerType::join(omp_out, -omp_in)) initializer ( ReducerType::init(omp_priv) ) +template +struct ParallelReduceSpecialize { +#pragma omp declare reduction( \ + custom:ValueType \ + : OpenMPTargetReducerWrapper ::join(omp_out, omp_in)) \ + initializer(OpenMPTargetReducerWrapper ::init(omp_priv)) - template< class TagType > + template inline static - typename std::enable_if< std::is_same< TagType , void >::value >::type - execute_impl(const FunctorType& f, const PolicyType& p, PointerType -result_ptr) - { - OpenMPTargetExec::verify_is_process("Kokkos::Experimental::OpenMPTarget -parallel_for"); - OpenMPTargetExec::verify_initialized("Kokkos::Experimental::OpenMPTarget -parallel_for"); const typename PolicyType::member_type begin = p.begin(); const -typename PolicyType::member_type end = p.end(); - - ValueType result = ValueType(); - #pragma omp target teams distribute parallel for num_teams(512) map(to:f) -map(tofrom:result) reduction(custom: result) for(int i=begin; i::value>::type + execute_impl(const FunctorType& f, const PolicyType& p, + PointerType result_ptr) { + OpenMPTargetExec::verify_is_process( + "Kokkos::Experimental::OpenMPTarget parallel_for"); + OpenMPTargetExec::verify_initialized( + "Kokkos::Experimental::OpenMPTarget parallel_for"); + const typename PolicyType::member_type begin = p.begin(); + const typename PolicyType::member_type end = p.end(); + if (end <= begin) return; - template< class TagType > + ValueType result = ValueType(); + OpenMPTargetReducerWrapper::init(result); + +// clang-format off +#pragma omp target teams distribute parallel for num_teams(512) map(to: f) \ + map(tofrom: result) reduction(custom: result) + for (int i = begin; i < end; i++) f(i, result); + // clang-format on + + *result_ptr = result; + } + + template inline static - typename std::enable_if< ! std::is_same< TagType , void >::value >::type - execute_impl(const FunctorType& f, const PolicyType& p, PointerType -result_ptr) - { - OpenMPTargetExec::verify_is_process("Kokkos::Experimental::OpenMPTarget -parallel_for"); - OpenMPTargetExec::verify_initialized("Kokkos::Experimental::OpenMPTarget -parallel_for"); const typename PolicyType::member_type begin = p.begin(); const -typename PolicyType::member_type end = p.end(); - - ValueType result = ValueType(); - #pragma omp target teams distribute parallel for num_teams(512) map(to:f) -map(tofrom: result) reduction(custom: result) for(int i=begin; i::value>::type + execute_impl(const FunctorType& f, const PolicyType& p, + PointerType result_ptr) { + OpenMPTargetExec::verify_is_process( + "Kokkos::Experimental::OpenMPTarget parallel_for"); + OpenMPTargetExec::verify_initialized( + "Kokkos::Experimental::OpenMPTarget parallel_for"); + const typename PolicyType::member_type begin = p.begin(); + const typename PolicyType::member_type end = p.end(); + if (end <= begin) return; - inline static - void execute(const FunctorType& f, const PolicyType& p, PointerType ptr) { - execute_impl(f,p,ptr); - } + ValueType result = ValueType(); + OpenMPTargetReducerWrapper::init(result); + +// clang-format off +#pragma omp target teams distribute parallel for num_teams(512) map(to: f) \ + map(tofrom: result) reduction(custom: result) + for (int i = begin; i < end; i++) f(TagType(), i, result); +//clang format on + + *result_ptr = result; + } + + inline static void execute(const FunctorType& f, const PolicyType& p, + PointerType ptr) { + execute_impl(f, p, ptr); + } }; -*/ template class ParallelReduce, ReducerType, @@ -265,7 +672,7 @@ class ParallelReduce, ReducerType, typedef ParallelReduceSpecialize< FunctorType, Policy, ReducerType, pointer_type, typename ValueTraits::value_type, HasJoin, UseReducer> - ParForSpecialize; + ParReduceSpecialize; const FunctorType m_functor; const Policy m_policy; @@ -274,7 +681,7 @@ class ParallelReduce, ReducerType, public: inline void execute() const { - ParForSpecialize::execute(m_functor, m_policy, m_result_ptr); + ParReduceSpecialize::execute(m_functor, m_policy, m_result_ptr); } template @@ -283,28 +690,18 @@ class ParallelReduce, ReducerType, const ViewType& arg_result_view, typename std::enable_if::value && !Kokkos::is_reducer_type::value, - void*>::type = NULL) + void*>::type = nullptr) : m_functor(arg_functor), m_policy(arg_policy), m_reducer(InvalidType()), - m_result_ptr(arg_result_view.data()) { - /*static_assert( std::is_same< typename ViewType::memory_space - , Kokkos::HostSpace >::value - , "Reduction result on Kokkos::Experimental::OpenMPTarget must be a - Kokkos::View in HostSpace" );*/ - } + m_result_ptr(arg_result_view.data()) {} inline ParallelReduce(const FunctorType& arg_functor, Policy arg_policy, const ReducerType& reducer) : m_functor(arg_functor), m_policy(arg_policy), m_reducer(reducer), - m_result_ptr(reducer.result_view().data()) { - /*static_assert( std::is_same< typename ViewType::memory_space - , Kokkos::HostSpace >::value - , "Reduction result on Kokkos::Experimental::OpenMPTarget must be a - Kokkos::View in HostSpace" );*/ - } + m_result_ptr(reducer.view().data()) {} }; } // namespace Impl @@ -488,10 +885,11 @@ class ParallelFor, 0, 0); void* scratch_ptr = OpenMPTargetExec::get_scratch_ptr(); + FunctorType a_functor(m_functor); #pragma omp target teams distribute parallel for num_teams(league_size) \ num_threads(team_size* vector_length) schedule(static, 1) \ map(to \ - : this->m_functor, scratch_ptr) + : a_functor, scratch_ptr) for (int i = 0; i < league_size * team_size * vector_length; i++) { typename Policy::member_type team(i / (team_size * vector_length), league_size, team_size, vector_length, @@ -513,6 +911,7 @@ class ParallelFor, const int nteams = OpenMPTargetExec::MAX_ACTIVE_TEAMS < league_size ? OpenMPTargetExec::MAX_ACTIVE_TEAMS : league_size; + FunctorType a_functor(m_functor); OpenMPTargetExec::resize_scratch(0, Policy::member_type::TEAM_REDUCE_SIZE, 0, 0); @@ -520,7 +919,7 @@ class ParallelFor, #pragma omp target teams distribute parallel for num_teams(league_size) \ num_threads(team_size* vector_length) schedule(static, 1) \ map(to \ - : this->m_functor, scratch_ptr) + : a_functor, scratch_ptr) for (int i = 0; i < league_size; i++) { typename Policy::member_type team(i / (team_size * vector_length), league_size, team_size, vector_length, @@ -663,7 +1062,7 @@ class ParallelReduce, public: inline void execute() const { - ParForSpecialize::execute(m_functor, m_policy, m_result_ptr); + // ParForSpecialize::execute(m_functor, m_policy, m_result_ptr); } template @@ -672,11 +1071,11 @@ class ParallelReduce, const ViewType& arg_result, typename std::enable_if::value && !Kokkos::is_reducer_type::value, - void*>::type = NULL) + void*>::type = nullptr) : m_functor(arg_functor), m_policy(arg_policy), m_reducer(InvalidType()), - m_result_ptr(arg_result.ptr_on_device()), + m_result_ptr(arg_result.data()), m_shmem_size(arg_policy.scratch_size(0) + arg_policy.scratch_size(1) + FunctorTeamShmemSize::value( arg_functor, arg_policy.team_size())) {} @@ -686,7 +1085,7 @@ class ParallelReduce, : m_functor(arg_functor), m_policy(arg_policy), m_reducer(reducer), - m_result_ptr(reducer.result_view().data()), + m_result_ptr(reducer.view().data()), m_shmem_size(arg_policy.scratch_size(0) + arg_policy.scratch_size(1) + FunctorTeamShmemSize::value( arg_functor, arg_policy.team_size())) { diff --git a/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Parallel_MDRange.hpp b/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Parallel_MDRange.hpp new file mode 100644 index 00000000000..4ce2dee122e --- /dev/null +++ b/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Parallel_MDRange.hpp @@ -0,0 +1,549 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_OPENMPTARGET_PARALLEL_MDRANGE_HPP +#define KOKKOS_OPENMPTARGET_PARALLEL_MDRANGE_HPP + +#include +#include +#include +#include +#include + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +template +class ParallelFor, + Kokkos::Experimental::OpenMPTarget> { + private: + typedef Kokkos::MDRangePolicy Policy; + typedef typename Policy::work_tag WorkTag; + typedef typename Policy::member_type Member; + + const FunctorType m_functor; + const Policy m_policy; + + public: + inline void execute() const { + OpenMPTargetExec::verify_is_process( + "Kokkos::Experimental::OpenMPTarget parallel_for"); + OpenMPTargetExec::verify_initialized( + "Kokkos::Experimental::OpenMPTarget parallel_for"); + const int64_t begin = 0; + const int64_t end = m_policy.m_num_tiles; + FunctorType functor(m_functor); + Policy policy = m_policy; +#pragma omp target teams distribute map(to : functor) num_teams(end - begin) + { + for (ptrdiff_t tile_idx = begin; tile_idx < end; tile_idx++) { +#pragma omp parallel + { + typename Policy::point_type offset; + if (Policy::outer_direction == Policy::Left) { + for (int i = 0; i < Policy::rank; ++i) { + offset[i] = (tile_idx % policy.m_tile_end[i]) * policy.m_tile[i] + + policy.m_lower[i]; + tile_idx /= policy.m_tile_end[i]; + } + } else { + for (int i = Policy::rank - 1; i >= 0; --i) { + offset[i] = (tile_idx % policy.m_tile_end[i]) * policy.m_tile[i] + + policy.m_lower[i]; + tile_idx /= policy.m_tile_end[i]; + } + } + execute_tile(offset, functor, policy); + } + } + } + } + + template + inline typename std::enable_if::type execute_tile( + typename Policy::point_type offset, const FunctorType& functor, + const Policy& policy) const { + const ptrdiff_t begin_0 = offset[0]; + ptrdiff_t end_0 = begin_0 + policy.m_tile[0]; + end_0 = end_0 < policy.m_upper[0] ? end_0 : policy.m_upper[0]; +#pragma omp for + for (ptrdiff_t i0 = begin_0; i0 < end_0; i0++) { + functor(i0); + } + } + + template + inline typename std::enable_if::type execute_tile( + typename Policy::point_type offset, const FunctorType& functor, + const Policy& policy) const { + const ptrdiff_t begin_0 = offset[0]; + ptrdiff_t end_0 = begin_0 + policy.m_tile[0]; + end_0 = end_0 < policy.m_upper[0] ? end_0 : policy.m_upper[0]; + + const ptrdiff_t begin_1 = offset[1]; + ptrdiff_t end_1 = begin_1 + policy.m_tile[1]; + end_1 = end_1 < policy.m_upper[1] ? end_1 : policy.m_upper[1]; + +#pragma omp for collapse(2) + for (ptrdiff_t i0 = begin_0; i0 < end_0; i0++) + for (ptrdiff_t i1 = begin_1; i1 < end_1; i1++) functor(i0, i1); + } + + template + inline typename std::enable_if::type execute_tile( + typename Policy::point_type offset, const FunctorType& functor, + const Policy& policy) const { + const ptrdiff_t begin_0 = offset[0]; + ptrdiff_t end_0 = begin_0 + policy.m_tile[0]; + end_0 = end_0 < policy.m_upper[0] ? end_0 : policy.m_upper[0]; + + const ptrdiff_t begin_1 = offset[1]; + ptrdiff_t end_1 = begin_1 + policy.m_tile[1]; + end_1 = end_1 < policy.m_upper[1] ? end_1 : policy.m_upper[1]; + + const ptrdiff_t begin_2 = offset[2]; + ptrdiff_t end_2 = begin_2 + policy.m_tile[2]; + end_2 = end_2 < policy.m_upper[2] ? end_2 : policy.m_upper[2]; + +#pragma omp for collapse(3) + for (ptrdiff_t i0 = begin_0; i0 < end_0; i0++) + for (ptrdiff_t i1 = begin_1; i1 < end_1; i1++) + for (ptrdiff_t i2 = begin_2; i2 < end_2; i2++) functor(i0, i1, i2); + } + + template + inline typename std::enable_if::type execute_tile( + typename Policy::point_type offset, const FunctorType& functor, + const Policy& policy) const { + const ptrdiff_t begin_0 = offset[0]; + ptrdiff_t end_0 = begin_0 + policy.m_tile[0]; + end_0 = end_0 < policy.m_upper[0] ? end_0 : policy.m_upper[0]; + + const ptrdiff_t begin_1 = offset[1]; + ptrdiff_t end_1 = begin_1 + policy.m_tile[1]; + end_1 = end_1 < policy.m_upper[1] ? end_1 : policy.m_upper[1]; + + const ptrdiff_t begin_2 = offset[2]; + ptrdiff_t end_2 = begin_2 + policy.m_tile[2]; + end_2 = end_2 < policy.m_upper[2] ? end_2 : policy.m_upper[2]; + + const ptrdiff_t begin_3 = offset[3]; + ptrdiff_t end_3 = begin_3 + policy.m_tile[3]; + end_3 = end_3 < policy.m_upper[3] ? end_3 : policy.m_upper[3]; + +#pragma omp for collapse(4) + for (ptrdiff_t i0 = begin_0; i0 < end_0; i0++) + for (ptrdiff_t i1 = begin_1; i1 < end_1; i1++) + for (ptrdiff_t i2 = begin_2; i2 < end_2; i2++) + for (ptrdiff_t i3 = begin_3; i3 < end_3; i3++) + functor(i0, i1, i2, i3); + } + + template + inline typename std::enable_if::type execute_tile( + typename Policy::point_type offset, const FunctorType& functor, + const Policy& policy) const { + const ptrdiff_t begin_0 = offset[0]; + ptrdiff_t end_0 = begin_0 + policy.m_tile[0]; + end_0 = end_0 < policy.m_upper[0] ? end_0 : policy.m_upper[0]; + + const ptrdiff_t begin_1 = offset[1]; + ptrdiff_t end_1 = begin_1 + policy.m_tile[1]; + end_1 = end_1 < policy.m_upper[1] ? end_1 : policy.m_upper[1]; + + const ptrdiff_t begin_2 = offset[2]; + ptrdiff_t end_2 = begin_2 + policy.m_tile[2]; + end_2 = end_2 < policy.m_upper[2] ? end_2 : policy.m_upper[2]; + + const ptrdiff_t begin_3 = offset[3]; + ptrdiff_t end_3 = begin_3 + policy.m_tile[3]; + end_3 = end_3 < policy.m_upper[3] ? end_3 : policy.m_upper[3]; + + const ptrdiff_t begin_4 = offset[4]; + ptrdiff_t end_4 = begin_4 + policy.m_tile[4]; + end_4 = end_4 < policy.m_upper[4] ? end_4 : policy.m_upper[4]; + +#pragma omp for collapse(5) + for (ptrdiff_t i0 = begin_0; i0 < end_0; i0++) + for (ptrdiff_t i1 = begin_1; i1 < end_1; i1++) + for (ptrdiff_t i2 = begin_2; i2 < end_2; i2++) + for (ptrdiff_t i3 = begin_3; i3 < end_3; i3++) + for (ptrdiff_t i4 = begin_4; i4 < end_4; i4++) + functor(i0, i1, i2, i3, i4); + } + + template + inline typename std::enable_if::type execute_tile( + typename Policy::point_type offset, const FunctorType& functor, + const Policy& policy) const { + const ptrdiff_t begin_0 = offset[0]; + ptrdiff_t end_0 = begin_0 + policy.m_tile[0]; + end_0 = end_0 < policy.m_upper[0] ? end_0 : policy.m_upper[0]; + + const ptrdiff_t begin_1 = offset[1]; + ptrdiff_t end_1 = begin_1 + policy.m_tile[1]; + end_1 = end_1 < policy.m_upper[1] ? end_1 : policy.m_upper[1]; + + const ptrdiff_t begin_2 = offset[2]; + ptrdiff_t end_2 = begin_2 + policy.m_tile[2]; + end_2 = end_2 < policy.m_upper[2] ? end_2 : policy.m_upper[2]; + + const ptrdiff_t begin_3 = offset[3]; + ptrdiff_t end_3 = begin_3 + policy.m_tile[3]; + end_3 = end_3 < policy.m_upper[3] ? end_3 : policy.m_upper[3]; + + const ptrdiff_t begin_4 = offset[4]; + ptrdiff_t end_4 = begin_4 + policy.m_tile[4]; + end_4 = end_4 < policy.m_upper[4] ? end_4 : policy.m_upper[4]; + + const ptrdiff_t begin_5 = offset[5]; + ptrdiff_t end_5 = begin_5 + policy.m_tile[5]; + end_5 = end_5 < policy.m_upper[5] ? end_5 : policy.m_upper[5]; + +#pragma omp for collapse(6) + for (ptrdiff_t i0 = begin_0; i0 < end_0; i0++) + for (ptrdiff_t i1 = begin_1; i1 < end_1; i1++) + for (ptrdiff_t i2 = begin_2; i2 < end_2; i2++) + for (ptrdiff_t i3 = begin_3; i3 < end_3; i3++) + for (ptrdiff_t i4 = begin_4; i4 < end_4; i4++) + for (ptrdiff_t i5 = begin_5; i5 < end_5; i5++) + functor(i0, i1, i2, i3, i4, i5); + } + + template + inline typename std::enable_if::type execute_tile( + typename Policy::point_type offset, const FunctorType& functor, + const Policy& policy) const { + const ptrdiff_t begin_0 = offset[0]; + ptrdiff_t end_0 = begin_0 + policy.m_tile[0]; + end_0 = end_0 < policy.m_upper[0] ? end_0 : policy.m_upper[0]; + + const ptrdiff_t begin_1 = offset[1]; + ptrdiff_t end_1 = begin_1 + policy.m_tile[1]; + end_1 = end_1 < policy.m_upper[1] ? end_1 : policy.m_upper[1]; + + const ptrdiff_t begin_2 = offset[2]; + ptrdiff_t end_2 = begin_2 + policy.m_tile[2]; + end_2 = end_2 < policy.m_upper[2] ? end_2 : policy.m_upper[2]; + + const ptrdiff_t begin_3 = offset[3]; + ptrdiff_t end_3 = begin_3 + policy.m_tile[3]; + end_3 = end_3 < policy.m_upper[3] ? end_3 : policy.m_upper[3]; + + const ptrdiff_t begin_4 = offset[4]; + ptrdiff_t end_4 = begin_4 + policy.m_tile[4]; + end_4 = end_4 < policy.m_upper[4] ? end_4 : policy.m_upper[4]; + + const ptrdiff_t begin_5 = offset[5]; + ptrdiff_t end_5 = begin_5 + policy.m_tile[5]; + end_5 = end_5 < policy.m_upper[5] ? end_5 : policy.m_upper[5]; + + const ptrdiff_t begin_6 = offset[6]; + ptrdiff_t end_6 = begin_6 + policy.m_tile[6]; + end_6 = end_6 < policy.m_upper[6] ? end_6 : policy.m_upper[6]; + +#pragma omp for collapse(7) + for (ptrdiff_t i0 = begin_0; i0 < end_0; i0++) + for (ptrdiff_t i1 = begin_1; i1 < end_1; i1++) + for (ptrdiff_t i2 = begin_2; i2 < end_2; i2++) + for (ptrdiff_t i3 = begin_3; i3 < end_3; i3++) + for (ptrdiff_t i4 = begin_4; i4 < end_4; i4++) + for (ptrdiff_t i5 = begin_5; i5 < end_5; i5++) + for (ptrdiff_t i6 = begin_6; i6 < end_6; i6++) + functor(i0, i1, i2, i3, i4, i5, i6); + } + + template + inline typename std::enable_if::type execute_tile( + typename Policy::point_type offset, const FunctorType& functor, + const Policy& policy) const { + const ptrdiff_t begin_0 = offset[0]; + ptrdiff_t end_0 = begin_0 + policy.m_tile[0]; + end_0 = end_0 < policy.m_upper[0] ? end_0 : policy.m_upper[0]; + + const ptrdiff_t begin_1 = offset[1]; + ptrdiff_t end_1 = begin_1 + policy.m_tile[1]; + end_1 = end_1 < policy.m_upper[1] ? end_1 : policy.m_upper[1]; + + const ptrdiff_t begin_2 = offset[2]; + ptrdiff_t end_2 = begin_2 + policy.m_tile[2]; + end_2 = end_2 < policy.m_upper[2] ? end_2 : policy.m_upper[2]; + + const ptrdiff_t begin_3 = offset[3]; + ptrdiff_t end_3 = begin_3 + policy.m_tile[3]; + end_3 = end_3 < policy.m_upper[3] ? end_3 : policy.m_upper[3]; + + const ptrdiff_t begin_4 = offset[4]; + ptrdiff_t end_4 = begin_4 + policy.m_tile[4]; + end_4 = end_4 < policy.m_upper[4] ? end_4 : policy.m_upper[4]; + + const ptrdiff_t begin_5 = offset[5]; + ptrdiff_t end_5 = begin_5 + policy.m_tile[5]; + end_5 = end_5 < policy.m_upper[5] ? end_5 : policy.m_upper[5]; + + const ptrdiff_t begin_6 = offset[6]; + ptrdiff_t end_6 = begin_6 + policy.m_tile[6]; + end_6 = end_6 < policy.m_upper[6] ? end_6 : policy.m_upper[6]; + + const ptrdiff_t begin_7 = offset[7]; + ptrdiff_t end_7 = begin_7 + policy.m_tile[7]; + end_7 = end_7 < policy.m_upper[7] ? end_7 : policy.m_upper[7]; + +#pragma omp for collapse(8) + for (ptrdiff_t i0 = begin_0; i0 < end_0; i0++) + for (ptrdiff_t i1 = begin_1; i1 < end_1; i1++) + for (ptrdiff_t i2 = begin_2; i2 < end_2; i2++) + for (ptrdiff_t i3 = begin_3; i3 < end_3; i3++) + for (ptrdiff_t i4 = begin_4; i4 < end_4; i4++) + for (ptrdiff_t i5 = begin_5; i5 < end_5; i5++) + for (ptrdiff_t i6 = begin_6; i6 < end_6; i6++) + for (ptrdiff_t i7 = begin_7; i7 < end_7; i7++) + functor(i0, i1, i2, i3, i4, i5, i6, i7); + } + + inline ParallelFor(const FunctorType& arg_functor, Policy arg_policy) + : m_functor(arg_functor), m_policy(arg_policy) {} +}; + +} // namespace Impl +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +template +struct ParallelReduceSpecialize, + ReducerType, PointerType, ValueType, 0, 0> { + typedef Kokkos::RangePolicy PolicyType; + template + inline static + typename std::enable_if::value>::type + execute_impl(const FunctorType& f, const PolicyType& p, + PointerType result_ptr) { + OpenMPTargetExec::verify_is_process( + "Kokkos::Experimental::OpenMPTarget parallel_for"); + OpenMPTargetExec::verify_initialized( + "Kokkos::Experimental::OpenMPTarget parallel_for"); + const typename PolicyType::member_type begin = p.begin(); + const typename PolicyType::member_type end = p.end(); + + ValueType result = ValueType(); +#pragma omp target teams distribute parallel for num_teams(512) map(to:f) map(tofrom:result) reduction(+: result) + for (int i = begin; i < end; i++) f(i, result); + + *result_ptr = result; + } + + template + inline static + typename std::enable_if::value>::type + execute_impl(const FunctorType& f, const PolicyType& p, + PointerType result_ptr) { + OpenMPTargetExec::verify_is_process( + "Kokkos::Experimental::OpenMPTarget parallel_for"); + OpenMPTargetExec::verify_initialized( + "Kokkos::Experimental::OpenMPTarget parallel_for"); + const typename PolicyType::member_type begin = p.begin(); + const typename PolicyType::member_type end = p.end(); + + ValueType result = ValueType(); +#pragma omp target teams distribute parallel for num_teams(512) map(to:f) map(tofrom: result) reduction(+: result) + for (int i = begin; i < end; i++) f(TagType(), i, result); + + *result_ptr = result; + } + + inline static void execute(const FunctorType& f, const PolicyType& p, + PointerType ptr) { + execute_impl(f, p, ptr); + } +}; +/* +template struct ParallelReduceSpecialize { + + #pragma omp declare reduction(custom: ValueType : ReducerType::join(omp_out, +omp_in)) initializer ( ReducerType::init(omp_priv) ) + + template< class TagType > + inline static + typename std::enable_if< std::is_same< TagType , void >::value >::type + execute_impl(const FunctorType& f, const PolicyType& p, PointerType +result_ptr) + { + OpenMPTargetExec::verify_is_process("Kokkos::Experimental::OpenMPTarget +parallel_for"); + OpenMPTargetExec::verify_initialized("Kokkos::Experimental::OpenMPTarget +parallel_for"); const typename PolicyType::member_type begin = p.begin(); const +typename PolicyType::member_type end = p.end(); + + ValueType result = ValueType(); + #pragma omp target teams distribute parallel for num_teams(512) map(to:f) +map(tofrom:result) reduction(custom: result) for(int i=begin; i + inline static + typename std::enable_if< ! std::is_same< TagType , void >::value >::type + execute_impl(const FunctorType& f, const PolicyType& p, PointerType +result_ptr) + { + OpenMPTargetExec::verify_is_process("Kokkos::Experimental::OpenMPTarget +parallel_for"); + OpenMPTargetExec::verify_initialized("Kokkos::Experimental::OpenMPTarget +parallel_for"); const typename PolicyType::member_type begin = p.begin(); const +typename PolicyType::member_type end = p.end(); + + ValueType result = ValueType(); + #pragma omp target teams distribute parallel for num_teams(512) map(to:f) +map(tofrom: result) reduction(custom: result) for(int i=begin; i(f,p,ptr); + } +}; + + +template +class ParallelReduce, ReducerType, + Kokkos::Experimental::OpenMPTarget> { + private: + typedef Kokkos::MDRangePolicy Policy; + + typedef typename Policy::work_tag WorkTag; + typedef typename Policy::WorkRange WorkRange; + typedef typename Policy::member_type Member; + + typedef Kokkos::Impl::if_c::value, + FunctorType, ReducerType> + ReducerConditional; + typedef typename ReducerConditional::type ReducerTypeFwd; + typedef + typename Kokkos::Impl::if_c::value, + WorkTag, void>::type WorkTagFwd; + + // Static Assert WorkTag void if ReducerType not InvalidType + + typedef Kokkos::Impl::FunctorValueTraits + ValueTraits; + typedef Kokkos::Impl::FunctorValueInit ValueInit; + typedef Kokkos::Impl::FunctorValueJoin ValueJoin; + + enum { HasJoin = ReduceFunctorHasJoin::value }; + enum { UseReducer = is_reducer_type::value }; + + typedef typename ValueTraits::pointer_type pointer_type; + typedef typename ValueTraits::reference_type reference_type; + + typedef ParallelReduceSpecialize< + FunctorType, Policy, ReducerType, pointer_type, + typename ValueTraits::value_type, HasJoin, UseReducer> + ParForSpecialize; + + const FunctorType m_functor; + const Policy m_policy; + const ReducerType m_reducer; + const pointer_type m_result_ptr; + + public: + inline void execute() const { + ParForSpecialize::execute(m_functor, m_policy, m_result_ptr); + } + + template + inline ParallelReduce( + const FunctorType& arg_functor, Policy arg_policy, + const ViewType& arg_result_view, + typename std::enable_if::value && + !Kokkos::is_reducer_type::value, + void*>::type = NULL) + : m_functor(arg_functor), + m_policy(arg_policy), + m_reducer(InvalidType()), + m_result_ptr(arg_result_view.data()) { + //static_assert( std::is_same< typename ViewType::memory_space + // , Kokkos::HostSpace >::value + // , "Reduction result on Kokkos::Experimental::OpenMPTarget must be a + // Kokkos::View in HostSpace" ); + } + + inline ParallelReduce(const FunctorType& arg_functor, Policy arg_policy, + const ReducerType& reducer) + : m_functor(arg_functor), + m_policy(arg_policy), + m_reducer(reducer), + m_result_ptr(reducer.view().data()) { + //static_assert( std::is_same< typename ViewType::memory_space + // , Kokkos::HostSpace >::value + // , "Reduction result on Kokkos::Experimental::OpenMPTarget must be a + // Kokkos::View in HostSpace" ); + } +};*/ + +} // namespace Impl +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#endif /* KOKKOS_OPENMPTARGET_PARALLEL_HPP */ diff --git a/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Task.hpp b/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Task.hpp index a40ec19e798..19956dad2e7 100644 --- a/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Task.hpp +++ b/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Task.hpp @@ -113,7 +113,7 @@ class TaskExec { public: #if defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST) void* team_shared() const { - return m_team_exec ? m_team_exec->scratch_thread() : (void*)0; + return m_team_exec ? m_team_exec->scratch_thread() : nullptr; } int team_shared_size() const { diff --git a/core/src/Qthreads/Kokkos_QthreadsExec.cpp b/core/src/Qthreads/Kokkos_QthreadsExec.cpp deleted file mode 100644 index 4372f3ce807..00000000000 --- a/core/src/Qthreads/Kokkos_QthreadsExec.cpp +++ /dev/null @@ -1,535 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include -#if defined(KOKKOS_ENABLE_QTHREADS) - -#include - -#include -#include -#include -#include -#include - -#include -#include -#include - -// Defines to enable experimental Qthreads functionality. -//#define QTHREAD_LOCAL_PRIORITY -//#define CLONED_TASKS - -//#include - -//---------------------------------------------------------------------------- - -namespace Kokkos { - -namespace Impl { - -namespace { - -enum { MAXIMUM_QTHREADS_WORKERS = 1024 }; - -/** s_exec is indexed by the reverse rank of the workers - * for faster fan-in / fan-out lookups - * [ n - 1, n - 2, ..., 0 ] - */ -QthreadsExec *s_exec[MAXIMUM_QTHREADS_WORKERS]; - -int s_number_shepherds = 0; -int s_number_workers_per_shepherd = 0; -int s_number_workers = 0; - -inline QthreadsExec **worker_exec() { - return s_exec + s_number_workers - - (qthread_shep() * s_number_workers_per_shepherd + - qthread_worker_local(NULL) + 1); -} - -const int s_base_size = QthreadsExec::align_alloc(sizeof(QthreadsExec)); - -int s_worker_reduce_end = 0; // End of worker reduction memory. -int s_worker_shared_end = 0; // Total of worker scratch memory. -int s_worker_shared_begin = 0; // Beginning of worker shared memory. - -QthreadsExecFunctionPointer volatile s_active_function = 0; -const void *volatile s_active_function_arg = 0; - -} // namespace - -} // namespace Impl - -} // namespace Kokkos - -//---------------------------------------------------------------------------- - -namespace Kokkos { - -int Qthreads::is_initialized() { return Impl::s_number_workers != 0; } - -int Qthreads::concurrency() { return Impl::s_number_workers_per_shepherd; } - -int Qthreads::in_parallel() { return Impl::s_active_function != 0; } - -void Qthreads::initialize(int thread_count) { - // Environment variable: QTHREAD_NUM_SHEPHERDS - // Environment variable: QTHREAD_NUM_WORKERS_PER_SHEP - // Environment variable: QTHREAD_HWPAR - - { - char buffer[256]; - snprintf(buffer, sizeof(buffer), "QTHREAD_HWPAR=%d", thread_count); - putenv(buffer); - } - - const bool ok_init = - (QTHREAD_SUCCESS == qthread_initialize()) && - (thread_count == - qthread_num_shepherds() * qthread_num_workers_local(NO_SHEPHERD)) && - (thread_count == qthread_num_workers()); - - bool ok_symmetry = true; - - if (ok_init) { - Impl::s_number_shepherds = qthread_num_shepherds(); - Impl::s_number_workers_per_shepherd = - qthread_num_workers_local(NO_SHEPHERD); - Impl::s_number_workers = - Impl::s_number_shepherds * Impl::s_number_workers_per_shepherd; - - for (int i = 0; ok_symmetry && i < Impl::s_number_shepherds; ++i) { - ok_symmetry = - (Impl::s_number_workers_per_shepherd == qthread_num_workers_local(i)); - } - } - - if (!ok_init || !ok_symmetry) { - std::ostringstream msg; - - msg << "Kokkos::Qthreads::initialize(" << thread_count << ") FAILED"; - msg << " : qthread_num_shepherds = " << qthread_num_shepherds(); - msg << " : qthread_num_workers_per_shepherd = " - << qthread_num_workers_local(NO_SHEPHERD); - msg << " : qthread_num_workers = " << qthread_num_workers(); - - if (!ok_symmetry) { - msg << " : qthread_num_workers_local = {"; - for (int i = 0; i < Impl::s_number_shepherds; ++i) { - msg << " " << qthread_num_workers_local(i); - } - msg << " }"; - } - - Impl::s_number_workers = 0; - Impl::s_number_shepherds = 0; - Impl::s_number_workers_per_shepherd = 0; - - if (ok_init) { - qthread_finalize(); - } - - Kokkos::Impl::throw_runtime_exception(msg.str()); - } - - Impl::QthreadsExec::resize_worker_scratch(256, 256); - - // Init the array for used for arbitrarily sized atomics. - Impl::init_lock_array_host_space(); -} - -void Qthreads::finalize() { - Impl::QthreadsExec::clear_workers(); - - if (Impl::s_number_workers) { - qthread_finalize(); - } - - Impl::s_number_workers = 0; - Impl::s_number_shepherds = 0; - Impl::s_number_workers_per_shepherd = 0; -} - -void Qthreads::print_configuration(std::ostream &s, const bool detail) { - s << "Kokkos::Qthreads {" - << " num_shepherds(" << Impl::s_number_shepherds << ")" - << " num_workers_per_shepherd(" << Impl::s_number_workers_per_shepherd - << ")" - << " }" << std::endl; -} - -Qthreads &Qthreads::instance(int) { - static Qthreads q; - return q; -} - -void Qthreads::fence() {} - -int Qthreads::shepherd_size() const { return Impl::s_number_shepherds; } -int Qthreads::shepherd_worker_size() const { - return Impl::s_number_workers_per_shepherd; -} - -const char *Qthreads::name() { return "Qthreads"; } - -} // namespace Kokkos - -//---------------------------------------------------------------------------- - -namespace Kokkos { - -namespace Impl { - -namespace { - -aligned_t driver_exec_all(void *arg) { - QthreadsExec &exec = **worker_exec(); - - (*s_active_function)(exec, s_active_function_arg); - - /* - fprintf( stdout - , "QthreadsExec driver worker(%d:%d) shepherd(%d:%d) - shepherd_worker(%d:%d) done\n" , exec.worker_rank() , exec.worker_size() , - exec.shepherd_rank() , exec.shepherd_size() , exec.shepherd_worker_rank() , - exec.shepherd_worker_size() - ); - fflush(stdout); - */ - - return 0; -} - -aligned_t driver_resize_worker_scratch(void *arg) { - static volatile int lock_begin = 0; - static volatile int lock_end = 0; - - QthreadsExec **const exec = worker_exec(); - - //---------------------------------------- - // Serialize allocation for thread safety. - - while (!atomic_compare_exchange_strong(&lock_begin, 0, 1)) - ; // Spin wait to claim lock. - - const bool ok = 0 == *exec; - - if (ok) { - *exec = (QthreadsExec *)malloc(s_base_size + s_worker_shared_end); - } - - lock_begin = 0; // Release lock. - - if (ok) { - new (*exec) QthreadsExec(); - } - - //---------------------------------------- - // Wait for all calls to complete to insure that each worker has executed. - - if (s_number_workers == 1 + atomic_fetch_add(&lock_end, 1)) { - lock_end = 0; - } - - while (lock_end) - ; - - /* - fprintf( stdout - , "QthreadsExec resize worker(%d:%d) shepherd(%d:%d) - shepherd_worker(%d:%d) done\n" , (**exec).worker_rank() , - (**exec).worker_size() , (**exec).shepherd_rank() , (**exec).shepherd_size() - , (**exec).shepherd_worker_rank() - , (**exec).shepherd_worker_size() - ); - fflush(stdout); - */ - - //---------------------------------------- - - if (!ok) { - fprintf(stderr, "Kokkos::QthreadsExec resize failed\n"); - fflush(stderr); - } - - return 0; -} - -void verify_is_process(const char *const label, bool not_active = false) { - const bool not_process = - 0 != qthread_shep() || 0 != qthread_worker_local(NULL); - const bool is_active = - not_active && (s_active_function || s_active_function_arg); - - if (not_process || is_active) { - std::string msg(label); - msg.append(" : FAILED"); - if (not_process) msg.append(" : not called by main process"); - if (is_active) msg.append(" : parallel execution in progress"); - Kokkos::Impl::throw_runtime_exception(msg); - } -} - -} // namespace - -int QthreadsExec::worker_per_shepherd() { - return s_number_workers_per_shepherd; -} - -QthreadsExec::QthreadsExec() { - const int shepherd_rank = qthread_shep(); - const int shepherd_worker_rank = qthread_worker_local(NULL); - const int worker_rank = - shepherd_rank * s_number_workers_per_shepherd + shepherd_worker_rank; - - m_worker_base = s_exec; - m_shepherd_base = s_exec + s_number_workers_per_shepherd * - ((s_number_shepherds - (shepherd_rank + 1))); - m_scratch_alloc = ((unsigned char *)this) + s_base_size; - m_reduce_end = s_worker_reduce_end; - m_shepherd_rank = shepherd_rank; - m_shepherd_size = s_number_shepherds; - m_shepherd_worker_rank = shepherd_worker_rank; - m_shepherd_worker_size = s_number_workers_per_shepherd; - m_worker_rank = worker_rank; - m_worker_size = s_number_workers; - m_worker_state = QthreadsExec::Active; -} - -void QthreadsExec::clear_workers() { - for (int iwork = 0; iwork < s_number_workers; ++iwork) { - QthreadsExec *const exec = s_exec[iwork]; - s_exec[iwork] = 0; - free(exec); - } -} - -void QthreadsExec::shared_reset(Qthreads::scratch_memory_space &space) { - new (&space) Qthreads::scratch_memory_space( - ((unsigned char *)(**m_shepherd_base).m_scratch_alloc) + - s_worker_shared_begin, - s_worker_shared_end - s_worker_shared_begin); -} - -void QthreadsExec::resize_worker_scratch(const int reduce_size, - const int shared_size) { - const int exec_all_reduce_alloc = align_alloc(reduce_size); - const int shepherd_scan_alloc = align_alloc(8); - const int shepherd_shared_end = - exec_all_reduce_alloc + shepherd_scan_alloc + align_alloc(shared_size); - - if (s_worker_reduce_end < exec_all_reduce_alloc || - s_worker_shared_end < shepherd_shared_end) { - /* - fprintf( stdout, "QthreadsExec::resize\n"); - fflush(stdout); - */ - - // Clear current worker memory before allocating new worker memory. - clear_workers(); - - // Increase the buffers to an aligned allocation. - s_worker_reduce_end = exec_all_reduce_alloc; - s_worker_shared_begin = exec_all_reduce_alloc + shepherd_scan_alloc; - s_worker_shared_end = shepherd_shared_end; - - // Need to query which shepherd this main 'process' is running. - - const int main_shep = qthread_shep(); - - // Have each worker resize its memory for proper first-touch. -#if 0 - for ( int jshep = 0; jshep < s_number_shepherds; ++jshep ) { - for ( int i = jshep != main_shep ? 0 : 1; i < s_number_workers_per_shepherd; ++i ) { - qthread_fork_to( driver_resize_worker_scratch, NULL, NULL, jshep ); - } - } -#else - // If this function is used before the 'qthreads.task_policy' unit test, - // the 'qthreads.task_policy' unit test fails with a seg-fault within - // libqthread.so. - for (int jshep = 0; jshep < s_number_shepherds; ++jshep) { - const int num_clone = jshep != main_shep - ? s_number_workers_per_shepherd - : s_number_workers_per_shepherd - 1; - - if (num_clone) { - const int ret = qthread_fork_clones_to_local_priority( - driver_resize_worker_scratch // Function - , - NULL // Function data block - , - NULL // Pointer to return value feb - , - jshep // Shepherd number - , - num_clone - 1 // Number of instances - 1 - ); - - assert(ret == QTHREAD_SUCCESS); - } - } -#endif - - driver_resize_worker_scratch(NULL); - - // Verify all workers allocated. - - bool ok = true; - for (int iwork = 0; ok && iwork < s_number_workers; ++iwork) { - ok = 0 != s_exec[iwork]; - } - - if (!ok) { - std::ostringstream msg; - msg << "Kokkos::Impl::QthreadsExec::resize : FAILED for workers {"; - for (int iwork = 0; iwork < s_number_workers; ++iwork) { - if (0 == s_exec[iwork]) { - msg << " " << (s_number_workers - (iwork + 1)); - } - } - msg << " }"; - Kokkos::Impl::throw_runtime_exception(msg.str()); - } - } -} - -void QthreadsExec::exec_all(Qthreads &, QthreadsExecFunctionPointer func, - const void *arg) { - verify_is_process("QthreadsExec::exec_all(...)", true); - - /* - fprintf( stdout, "QthreadsExec::exec_all\n"); - fflush(stdout); - */ - - s_active_function = func; - s_active_function_arg = arg; - - // Need to query which shepherd this main 'process' is running. - - const int main_shep = qthread_shep(); - -#if 0 - for ( int jshep = 0, iwork = 0; jshep < s_number_shepherds; ++jshep ) { - for ( int i = jshep != main_shep ? 0 : 1; i < s_number_workers_per_shepherd; ++i, ++iwork ) { - qthread_fork_to( driver_exec_all, NULL, NULL, jshep ); - } - } -#else - // If this function is used before the 'qthreads.task_policy' unit test, - // the 'qthreads.task_policy' unit test fails with a seg-fault within - // libqthread.so. - for (int jshep = 0; jshep < s_number_shepherds; ++jshep) { - const int num_clone = jshep != main_shep - ? s_number_workers_per_shepherd - : s_number_workers_per_shepherd - 1; - - if (num_clone) { - const int ret = qthread_fork_clones_to_local_priority( - driver_exec_all // Function - , - NULL // Function data block - , - NULL // Pointer to return value feb - , - jshep // Shepherd number - , - num_clone - 1 // Number of instances - 1 - ); - - assert(ret == QTHREAD_SUCCESS); - } - } -#endif - - driver_exec_all(NULL); - - s_active_function = 0; - s_active_function_arg = 0; -} - -void *QthreadsExec::exec_all_reduce_result() { - return s_exec[0]->m_scratch_alloc; -} - -} // namespace Impl - -} // namespace Kokkos - -namespace Kokkos { - -namespace Impl { - -QthreadsTeamPolicyMember::QthreadsTeamPolicyMember() - : m_exec(**worker_exec()), - m_team_shared(0, 0), - m_team_size(1), - m_team_rank(0), - m_league_size(1), - m_league_end(1), - m_league_rank(0) { - m_exec.shared_reset(m_team_shared); -} - -QthreadsTeamPolicyMember::QthreadsTeamPolicyMember( - const QthreadsTeamPolicyMember::TaskTeam &) - : m_exec(**worker_exec()), - m_team_shared(0, 0), - m_team_size(s_number_workers_per_shepherd), - m_team_rank(m_exec.shepherd_worker_rank()), - m_league_size(1), - m_league_end(1), - m_league_rank(0) { - m_exec.shared_reset(m_team_shared); -} - -} // namespace Impl - -} // namespace Kokkos - -#else -void KOKKOS_SRC_QTHREADS_EXEC_PREVENT_LINK_ERROR() {} -#endif // #if defined( KOKKOS_ENABLE_QTHREADS ) diff --git a/core/src/Qthreads/Kokkos_QthreadsExec.hpp b/core/src/Qthreads/Kokkos_QthreadsExec.hpp deleted file mode 100644 index aa0fd73a4ab..00000000000 --- a/core/src/Qthreads/Kokkos_QthreadsExec.hpp +++ /dev/null @@ -1,687 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#ifndef KOKKOS_QTHREADSEXEC_HPP -#define KOKKOS_QTHREADSEXEC_HPP - -#include -#if defined(KOKKOS_ENABLE_QTHREADS) - -#include - -//---------------------------------------------------------------------------- - -namespace Kokkos { - -namespace Impl { - -class QthreadsExec; - -typedef void (*QthreadsExecFunctionPointer)(QthreadsExec &, const void *); - -class QthreadsExec { - private: - enum { Inactive = 0, Active = 1 }; - - const QthreadsExec *const *m_worker_base; - const QthreadsExec *const *m_shepherd_base; - - void *m_scratch_alloc; ///< Scratch memory [ reduce, team, shared ] - int m_reduce_end; ///< End of scratch reduction memory - - int m_shepherd_rank; - int m_shepherd_size; - - int m_shepherd_worker_rank; - int m_shepherd_worker_size; - - /* - * m_worker_rank = m_shepherd_rank * m_shepherd_worker_size + - * m_shepherd_worker_rank m_worker_size = m_shepherd_size * - * m_shepherd_worker_size - */ - int m_worker_rank; - int m_worker_size; - - int mutable volatile m_worker_state; - - friend class Kokkos::Qthreads; - - ~QthreadsExec(); - QthreadsExec(const QthreadsExec &); - QthreadsExec &operator=(const QthreadsExec &); - - public: - QthreadsExec(); - - /** Execute the input function on all available Qthreads workers. */ - static void exec_all(Qthreads &, QthreadsExecFunctionPointer, const void *); - - /** Barrier across all workers participating in the 'exec_all'. */ - void exec_all_barrier() const { - const int rev_rank = m_worker_size - (m_worker_rank + 1); - - int n, j; - - for (n = 1; (!(rev_rank & n)) && ((j = rev_rank + n) < m_worker_size); - n <<= 1) { - Impl::spinwait_while_equal(m_worker_base[j]->m_worker_state, - QthreadsExec::Active); - } - - if (rev_rank) { - m_worker_state = QthreadsExec::Inactive; - Impl::spinwait_while_equal(m_worker_state, QthreadsExec::Inactive); - } - - for (n = 1; (!(rev_rank & n)) && ((j = rev_rank + n) < m_worker_size); - n <<= 1) { - m_worker_base[j]->m_worker_state = QthreadsExec::Active; - } - } - - /** Barrier across workers within the shepherd with rank < team_rank. */ - void shepherd_barrier(const int team_size) const { - if (m_shepherd_worker_rank < team_size) { - const int rev_rank = team_size - (m_shepherd_worker_rank + 1); - - int n, j; - - for (n = 1; (!(rev_rank & n)) && ((j = rev_rank + n) < team_size); - n <<= 1) { - Impl::spinwait_while_equal(m_shepherd_base[j]->m_worker_state, - QthreadsExec::Active); - } - - if (rev_rank) { - m_worker_state = QthreadsExec::Inactive; - Impl::spinwait_while_equal(m_worker_state, QthreadsExec::Inactive); - } - - for (n = 1; (!(rev_rank & n)) && ((j = rev_rank + n) < team_size); - n <<= 1) { - m_shepherd_base[j]->m_worker_state = QthreadsExec::Active; - } - } - } - - /** Reduce across all workers participating in the 'exec_all'. */ - template - inline void exec_all_reduce(const FunctorType &func, - const ReducerType &reduce) const { - typedef Kokkos::Impl::if_c::value, - FunctorType, ReducerType> - ReducerConditional; - typedef typename ReducerConditional::type ReducerTypeFwd; - typedef Kokkos::Impl::FunctorValueJoin ValueJoin; - - const int rev_rank = m_worker_size - (m_worker_rank + 1); - - int n, j; - - for (n = 1; (!(rev_rank & n)) && ((j = rev_rank + n) < m_worker_size); - n <<= 1) { - const QthreadsExec &fan = *m_worker_base[j]; - - Impl::spinwait_while_equal(fan.m_worker_state, QthreadsExec::Active); - - ValueJoin::join(ReducerConditional::select(func, reduce), m_scratch_alloc, - fan.m_scratch_alloc); - } - - if (rev_rank) { - m_worker_state = QthreadsExec::Inactive; - Impl::spinwait_while_equal(m_worker_state, QthreadsExec::Inactive); - } - - for (n = 1; (!(rev_rank & n)) && ((j = rev_rank + n) < m_worker_size); - n <<= 1) { - m_worker_base[j]->m_worker_state = QthreadsExec::Active; - } - } - - /** Scan across all workers participating in the 'exec_all'. */ - template - inline void exec_all_scan(const FunctorType &func) const { - typedef Kokkos::Impl::FunctorValueInit ValueInit; - typedef Kokkos::Impl::FunctorValueJoin ValueJoin; - typedef Kokkos::Impl::FunctorValueOps ValueOps; - - const int rev_rank = m_worker_size - (m_worker_rank + 1); - - int n, j; - - for (n = 1; (!(rev_rank & n)) && ((j = rev_rank + n) < m_worker_size); - n <<= 1) { - Impl::spinwait_while_equal(m_worker_base[j]->m_worker_state, - QthreadsExec::Active); - } - - if (rev_rank) { - m_worker_state = QthreadsExec::Inactive; - Impl::spinwait_while_equal(m_worker_state, QthreadsExec::Inactive); - } else { - // Root thread scans across values before releasing threads. - // Worker data is in reverse order, so m_worker_base[0] is the - // highest ranking thread. - - // Copy from lower ranking to higher ranking worker. - for (int i = 1; i < m_worker_size; ++i) { - ValueOps::copy(func, m_worker_base[i - 1]->m_scratch_alloc, - m_worker_base[i]->m_scratch_alloc); - } - - ValueInit::init(func, m_worker_base[m_worker_size - 1]->m_scratch_alloc); - - // Join from lower ranking to higher ranking worker. - // Value at m_worker_base[n-1] is zero so skip adding it to - // m_worker_base[n-2]. - for (int i = m_worker_size - 1; --i > 0;) { - ValueJoin::join(func, m_worker_base[i - 1]->m_scratch_alloc, - m_worker_base[i]->m_scratch_alloc); - } - } - - for (n = 1; (!(rev_rank & n)) && ((j = rev_rank + n) < m_worker_size); - n <<= 1) { - m_worker_base[j]->m_worker_state = QthreadsExec::Active; - } - } - - //---------------------------------------- - - template - inline volatile Type *shepherd_team_scratch_value() const { - return (volatile Type *)(((unsigned char *)m_scratch_alloc) + m_reduce_end); - } - - template - inline void shepherd_broadcast(Type &value, const int team_size, - const int team_rank) const { - if (m_shepherd_base) { - Type *const shared_value = - m_shepherd_base[0]->shepherd_team_scratch_value(); - if (m_shepherd_worker_rank == team_rank) { - *shared_value = value; - } - memory_fence(); - shepherd_barrier(team_size); - value = *shared_value; - } - } - - template - inline Type shepherd_reduce(const int team_size, const Type &value) const { - volatile Type *const shared_value = shepherd_team_scratch_value(); - *shared_value = value; - // *shepherd_team_scratch_value() = value; - - memory_fence(); - - const int rev_rank = team_size - (m_shepherd_worker_rank + 1); - - int n, j; - - for (n = 1; (!(rev_rank & n)) && ((j = rev_rank + n) < team_size); - n <<= 1) { - Impl::spinwait_while_equal(m_shepherd_base[j]->m_worker_state, - QthreadsExec::Active); - } - - if (rev_rank) { - m_worker_state = QthreadsExec::Inactive; - Impl::spinwait_while_equal(m_worker_state, QthreadsExec::Inactive); - } else { - Type &accum = *m_shepherd_base[0]->shepherd_team_scratch_value(); - for (int i = 1; i < n; ++i) { - accum += *m_shepherd_base[i]->shepherd_team_scratch_value(); - } - for (int i = 1; i < n; ++i) { - *m_shepherd_base[i]->shepherd_team_scratch_value() = accum; - } - - memory_fence(); - } - - for (n = 1; (!(rev_rank & n)) && ((j = rev_rank + n) < team_size); - n <<= 1) { - m_shepherd_base[j]->m_worker_state = QthreadsExec::Active; - } - - return *shepherd_team_scratch_value(); - } - - template - inline typename JoinOp::value_type shepherd_reduce( - const int team_size, const typename JoinOp::value_type &value, - const JoinOp &op) const { - typedef typename JoinOp::value_type Type; - - volatile Type *const shared_value = shepherd_team_scratch_value(); - *shared_value = value; - // *shepherd_team_scratch_value() = value; - - memory_fence(); - - const int rev_rank = team_size - (m_shepherd_worker_rank + 1); - - int n, j; - - for (n = 1; (!(rev_rank & n)) && ((j = rev_rank + n) < team_size); - n <<= 1) { - Impl::spinwait_while_equal(m_shepherd_base[j]->m_worker_state, - QthreadsExec::Active); - } - - if (rev_rank) { - m_worker_state = QthreadsExec::Inactive; - Impl::spinwait_while_equal(m_worker_state, QthreadsExec::Inactive); - } else { - volatile Type &accum = - *m_shepherd_base[0]->shepherd_team_scratch_value(); - for (int i = 1; i < team_size; ++i) { - op.join(accum, - *m_shepherd_base[i]->shepherd_team_scratch_value()); - } - for (int i = 1; i < team_size; ++i) { - *m_shepherd_base[i]->shepherd_team_scratch_value() = accum; - } - - memory_fence(); - } - - for (n = 1; (!(rev_rank & n)) && ((j = rev_rank + n) < team_size); - n <<= 1) { - m_shepherd_base[j]->m_worker_state = QthreadsExec::Active; - } - - return *shepherd_team_scratch_value(); - } - - template - inline Type shepherd_scan(const int team_size, const Type &value, - Type *const global_value = 0) const { - *shepherd_team_scratch_value() = value; - - memory_fence(); - - const int rev_rank = team_size - (m_shepherd_worker_rank + 1); - - int n, j; - - for (n = 1; (!(rev_rank & n)) && ((j = rev_rank + n) < team_size); - n <<= 1) { - Impl::spinwait_while_equal(m_shepherd_base[j]->m_worker_state, - QthreadsExec::Active); - } - - if (rev_rank) { - m_worker_state = QthreadsExec::Inactive; - Impl::spinwait_while_equal(m_worker_state, QthreadsExec::Inactive); - } else { - // Root thread scans across values before releasing threads. - // Worker data is in reverse order, so m_shepherd_base[0] is the - // highest ranking thread. - - // Copy from lower ranking to higher ranking worker. - - Type accum = *m_shepherd_base[0]->shepherd_team_scratch_value(); - for (int i = 1; i < team_size; ++i) { - const Type tmp = - *m_shepherd_base[i]->shepherd_team_scratch_value(); - accum += tmp; - *m_shepherd_base[i - 1]->shepherd_team_scratch_value() = tmp; - } - - *m_shepherd_base[team_size - 1]->shepherd_team_scratch_value() = - global_value ? atomic_fetch_add(global_value, accum) : 0; - - // Join from lower ranking to higher ranking worker. - for (int i = team_size; --i;) { - *m_shepherd_base[i - 1]->shepherd_team_scratch_value() += - *m_shepherd_base[i]->shepherd_team_scratch_value(); - } - - memory_fence(); - } - - for (n = 1; (!(rev_rank & n)) && ((j = rev_rank + n) < team_size); - n <<= 1) { - m_shepherd_base[j]->m_worker_state = QthreadsExec::Active; - } - - return *shepherd_team_scratch_value(); - } - - //---------------------------------------- - - static inline int align_alloc(int size) { - enum { ALLOC_GRAIN = 1 << 6 /* power of two, 64bytes */ }; - enum { ALLOC_GRAIN_MASK = ALLOC_GRAIN - 1 }; - return (size + ALLOC_GRAIN_MASK) & ~ALLOC_GRAIN_MASK; - } - - void shared_reset(Qthreads::scratch_memory_space &); - - void *exec_all_reduce_value() const { return m_scratch_alloc; } - - static void *exec_all_reduce_result(); - - static void resize_worker_scratch(const int reduce_size, - const int shared_size); - static void clear_workers(); - - //---------------------------------------- - - inline int worker_rank() const { return m_worker_rank; } - inline int worker_size() const { return m_worker_size; } - inline int shepherd_worker_rank() const { return m_shepherd_worker_rank; } - inline int shepherd_worker_size() const { return m_shepherd_worker_size; } - inline int shepherd_rank() const { return m_shepherd_rank; } - inline int shepherd_size() const { return m_shepherd_size; } - - static int worker_per_shepherd(); -}; - -} // namespace Impl - -} // namespace Kokkos - -//---------------------------------------------------------------------------- - -namespace Kokkos { - -namespace Impl { - -class QthreadsTeamPolicyMember { - private: - typedef Kokkos::Qthreads execution_space; - typedef execution_space::scratch_memory_space scratch_memory_space; - - Impl::QthreadsExec &m_exec; - scratch_memory_space m_team_shared; - const int m_team_size; - const int m_team_rank; - const int m_league_size; - const int m_league_end; - int m_league_rank; - - public: - KOKKOS_INLINE_FUNCTION - const scratch_memory_space &team_shmem() const { return m_team_shared; } - - KOKKOS_INLINE_FUNCTION int league_rank() const { return m_league_rank; } - KOKKOS_INLINE_FUNCTION int league_size() const { return m_league_size; } - KOKKOS_INLINE_FUNCTION int team_rank() const { return m_team_rank; } - KOKKOS_INLINE_FUNCTION int team_size() const { return m_team_size; } - - KOKKOS_INLINE_FUNCTION void team_barrier() const -#if !defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST) - { - } -#else - { - m_exec.shepherd_barrier(m_team_size); - } -#endif - - template - KOKKOS_INLINE_FUNCTION Type team_broadcast(const Type &value, int rank) const -#if !defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST) - { - return Type(); - } -#else - { - return m_exec.template shepherd_broadcast(value, m_team_size, rank); - } -#endif - - template - KOKKOS_INLINE_FUNCTION Type team_reduce(const Type &value) const -#if !defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST) - { - return Type(); - } -#else - { - return m_exec.template shepherd_reduce(m_team_size, value); - } -#endif - - template - KOKKOS_INLINE_FUNCTION typename JoinOp::value_type team_reduce( - const typename JoinOp::value_type &value, const JoinOp &op) const -#if !defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST) - { - return typename JoinOp::value_type(); - } -#else - { - return m_exec.template shepherd_reduce(m_team_size, value, op); - } -#endif - - /** \brief Intra-team exclusive prefix sum with team_rank() ordering. - * - * The highest rank thread can compute the reduction total as - * reduction_total = dev.team_scan( value ) + value; - */ - template - KOKKOS_INLINE_FUNCTION Type team_scan(const Type &value) const -#if !defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST) - { - return Type(); - } -#else - { - return m_exec.template shepherd_scan(m_team_size, value); - } -#endif - - /** \brief Intra-team exclusive prefix sum with team_rank() ordering - * with intra-team non-deterministic ordering accumulation. - * - * The global inter-team accumulation value will, at the end of the league's - * parallel execution, be the scan's total. Parallel execution ordering of - * the league's teams is non-deterministic. As such the base value for each - * team's scan operation is similarly non-deterministic. - */ - template - KOKKOS_INLINE_FUNCTION Type team_scan(const Type &value, - Type *const global_accum) const -#if !defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST) - { - return Type(); - } -#else - { - return m_exec.template shepherd_scan(m_team_size, value, - global_accum); - } -#endif - - //---------------------------------------- - // Private driver for task-team parallel. - - struct TaskTeam {}; - - QthreadsTeamPolicyMember(); - explicit QthreadsTeamPolicyMember(const TaskTeam &); - - //---------------------------------------- - // Private for the driver ( for ( member_type i( exec, team ); i; - // i.next_team() ) { ... } - - // Initialize. - template - QthreadsTeamPolicyMember( - Impl::QthreadsExec &exec, - const Kokkos::Impl::TeamPolicyInternal &team) - : m_exec(exec), - m_team_shared(0, 0), - m_team_size(team.m_team_size), - m_team_rank(exec.shepherd_worker_rank()), - m_league_size(team.m_league_size), - m_league_end(team.m_league_size - - team.m_shepherd_iter * - (exec.shepherd_size() - (exec.shepherd_rank() + 1))), - m_league_rank(m_league_end > team.m_shepherd_iter - ? m_league_end - team.m_shepherd_iter - : 0) { - m_exec.shared_reset(m_team_shared); - } - - // Continue. - operator bool() const { return m_league_rank < m_league_end; } - - // Iterate. - void next_team() { - ++m_league_rank; - m_exec.shared_reset(m_team_shared); - } -}; - -template -class TeamPolicyInternal - : public PolicyTraits { - private: - const int m_league_size; - const int m_team_size; - const int m_shepherd_iter; - - public: - //! Tag this class as a kokkos execution policy. - typedef TeamPolicyInternal execution_policy; - typedef Qthreads execution_space; - typedef PolicyTraits traits; - - //---------------------------------------- - - template - inline static int team_size_max(const FunctorType &) { - return Qthreads::instance().shepherd_worker_size(); - } - - template - static int team_size_recommended(const FunctorType &f) { - return team_size_max(f); - } - - template - inline static int team_size_recommended(const FunctorType &f, const int &) { - return team_size_max(f); - } - - //---------------------------------------- - - inline int team_size() const { return m_team_size; } - inline int league_size() const { return m_league_size; } - - // One active team per shepherd. - TeamPolicyInternal(Kokkos::Qthreads &q, const int league_size, - const int team_size, const int /* vector_length */ = 0) - : m_league_size(league_size), - m_team_size(team_size < q.shepherd_worker_size() - ? team_size - : q.shepherd_worker_size()), - m_shepherd_iter((league_size + q.shepherd_size() - 1) / - q.shepherd_size()) {} - - // TODO: Make sure this is correct. - // One active team per shepherd. - TeamPolicyInternal(Kokkos::Qthreads &q, const int league_size, - const Kokkos::AUTO_t & /* team_size_request */ - , - const int /* vector_length */ = 0) - : m_league_size(league_size), - m_team_size(q.shepherd_worker_size()), - m_shepherd_iter((league_size + q.shepherd_size() - 1) / - q.shepherd_size()) {} - - // One active team per shepherd. - TeamPolicyInternal(const int league_size, const int team_size, - const int /* vector_length */ = 0) - : m_league_size(league_size), - m_team_size(team_size < Qthreads::instance().shepherd_worker_size() - ? team_size - : Qthreads::instance().shepherd_worker_size()), - m_shepherd_iter( - (league_size + Qthreads::instance().shepherd_size() - 1) / - Qthreads::instance().shepherd_size()) {} - - // TODO: Make sure this is correct. - // One active team per shepherd. - TeamPolicyInternal(const int league_size, - const Kokkos::AUTO_t & /* team_size_request */ - , - const int /* vector_length */ = 0) - : m_league_size(league_size), - m_team_size(Qthreads::instance().shepherd_worker_size()), - m_shepherd_iter( - (league_size + Qthreads::instance().shepherd_size() - 1) / - Qthreads::instance().shepherd_size()) {} - - // TODO: Doesn't do anything yet. Fix this. - /** \brief set chunk_size to a discrete value*/ - inline TeamPolicyInternal set_chunk_size( - typename traits::index_type chunk_size_) const { - TeamPolicyInternal p = *this; - // p.m_chunk_size = chunk_size_; - return p; - } - - typedef Impl::QthreadsTeamPolicyMember member_type; - - friend class Impl::QthreadsTeamPolicyMember; -}; - -} // namespace Impl - -} // namespace Kokkos - -//---------------------------------------------------------------------------- - -#endif -#endif // #define KOKKOS_QTHREADSEXEC_HPP diff --git a/core/src/Qthreads/Kokkos_Qthreads_Parallel.hpp b/core/src/Qthreads/Kokkos_Qthreads_Parallel.hpp deleted file mode 100644 index 8611818e27c..00000000000 --- a/core/src/Qthreads/Kokkos_Qthreads_Parallel.hpp +++ /dev/null @@ -1,753 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#ifndef KOKKOS_QTHREADS_PARALLEL_HPP -#define KOKKOS_QTHREADS_PARALLEL_HPP - -#include -#if defined(KOKKOS_ENABLE_QTHREADS) - -#include - -#include - -#include - -#include - -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - -//---------------------------------------------------------------------------- - -template -class ParallelFor, - Kokkos::Qthreads> { - private: - typedef Kokkos::RangePolicy Policy; - - typedef typename Policy::work_tag WorkTag; - typedef typename Policy::member_type Member; - typedef typename Policy::WorkRange WorkRange; - - const FunctorType m_functor; - const Policy m_policy; - - template - inline static - typename std::enable_if::value>::type - exec_range(const FunctorType& functor, const Member ibeg, - const Member iend) { - for (Member i = ibeg; i < iend; ++i) { - functor(i); - } - } - - template - inline static - typename std::enable_if::value>::type - exec_range(const FunctorType& functor, const Member ibeg, - const Member iend) { - const TagType t{}; - for (Member i = ibeg; i < iend; ++i) { - functor(t, i); - } - } - - // Function is called once by every concurrent thread. - static void exec(QthreadsExec& exec, const void* arg) { - const ParallelFor& self = *((const ParallelFor*)arg); - - const WorkRange range(self.m_policy, exec.worker_rank(), - exec.worker_size()); - - ParallelFor::template exec_range(self.m_functor, range.begin(), - range.end()); - - // All threads wait for completion. - exec.exec_all_barrier(); - } - - public: - inline void execute() const { - Impl::QthreadsExec::exec_all(Qthreads::instance(), &ParallelFor::exec, - this); - } - - ParallelFor(const FunctorType& arg_functor, const Policy& arg_policy) - : m_functor(arg_functor), m_policy(arg_policy) {} -}; - -//---------------------------------------------------------------------------- - -template -class ParallelReduce, ReducerType, - Kokkos::Qthreads> { - private: - typedef Kokkos::RangePolicy Policy; - - typedef typename Policy::work_tag WorkTag; - typedef typename Policy::WorkRange WorkRange; - typedef typename Policy::member_type Member; - - typedef Kokkos::Impl::if_c::value, - FunctorType, ReducerType> - ReducerConditional; - typedef typename ReducerConditional::type ReducerTypeFwd; - typedef - typename Kokkos::Impl::if_c::value, - WorkTag, void>::type WorkTagFwd; - - // Static Assert WorkTag void if ReducerType not InvalidType - - typedef Kokkos::Impl::FunctorValueTraits - ValueTraits; - typedef Kokkos::Impl::FunctorValueInit ValueInit; - - typedef typename ValueTraits::pointer_type pointer_type; - typedef typename ValueTraits::reference_type reference_type; - - const FunctorType m_functor; - const Policy m_policy; - const ReducerType m_reducer; - const pointer_type m_result_ptr; - - template - inline static - typename std::enable_if::value>::type - exec_range(const FunctorType& functor, const Member ibeg, - const Member iend, reference_type update) { - for (Member i = ibeg; i < iend; ++i) { - functor(i, update); - } - } - - template - inline static - typename std::enable_if::value>::type - exec_range(const FunctorType& functor, const Member ibeg, - const Member iend, reference_type update) { - const TagType t{}; - for (Member i = ibeg; i < iend; ++i) { - functor(t, i, update); - } - } - - static void exec(QthreadsExec& exec, const void* arg) { - const ParallelReduce& self = *((const ParallelReduce*)arg); - - const WorkRange range(self.m_policy, exec.worker_rank(), - exec.worker_size()); - - ParallelReduce::template exec_range( - self.m_functor, range.begin(), range.end(), - ValueInit::init( - ReducerConditional::select(self.m_functor, self.m_reducer), - exec.exec_all_reduce_value())); - - exec.template exec_all_reduce( - self.m_functor, self.m_reducer); - } - - public: - inline void execute() const { - QthreadsExec::resize_worker_scratch( - ValueTraits::value_size( - ReducerConditional::select(m_functor, m_reducer)), - 0); - Impl::QthreadsExec::exec_all(Qthreads::instance(), &ParallelReduce::exec, - this); - - const pointer_type data = - (pointer_type)QthreadsExec::exec_all_reduce_result(); - - Kokkos::Impl::FunctorFinal::final( - ReducerConditional::select(m_functor, m_reducer), data); - - if (m_result_ptr) { - const unsigned n = ValueTraits::value_count( - ReducerConditional::select(m_functor, m_reducer)); - for (unsigned i = 0; i < n; ++i) { - m_result_ptr[i] = data[i]; - } - } - } - - template - ParallelReduce( - const FunctorType& arg_functor, const Policy& arg_policy, - const ViewType& arg_result_view, - typename std::enable_if::value && - !Kokkos::is_reducer_type::value, - void*>::type = NULL) - : m_functor(arg_functor), - m_policy(arg_policy), - m_reducer(InvalidType()), - m_result_ptr(arg_result_view.data()) {} - - ParallelReduce(const FunctorType& arg_functor, Policy arg_policy, - const ReducerType& reducer) - : m_functor(arg_functor), - m_policy(arg_policy), - m_reducer(reducer), - m_result_ptr(reducer.result_view().data()) {} -}; - -//---------------------------------------------------------------------------- - -template -class ParallelFor, Kokkos::Qthreads> { - private: - typedef Kokkos::Impl::TeamPolicyInternal - Policy; - typedef typename Policy::member_type Member; - typedef typename Policy::work_tag WorkTag; - - const FunctorType m_functor; - const Policy m_policy; - - template - inline static - typename std::enable_if::value>::type - exec_team(const FunctorType& functor, Member member) { - while (member) { - functor(member); - member.team_barrier(); - member.next_team(); - } - } - - template - inline static - typename std::enable_if::value>::type - exec_team(const FunctorType& functor, Member member) { - const TagType t{}; - while (member) { - functor(t, member); - member.team_barrier(); - member.next_team(); - } - } - - static void exec(QthreadsExec& exec, const void* arg) { - const ParallelFor& self = *((const ParallelFor*)arg); - - ParallelFor::template exec_team(self.m_functor, - Member(exec, self.m_policy)); - - exec.exec_all_barrier(); - } - - public: - inline void execute() const { - QthreadsExec::resize_worker_scratch( - /* reduction memory */ 0, - /* team shared memory */ FunctorTeamShmemSize::value( - m_functor, m_policy.team_size())); - Impl::QthreadsExec::exec_all(Qthreads::instance(), &ParallelFor::exec, - this); - } - - ParallelFor(const FunctorType& arg_functor, const Policy& arg_policy) - : m_functor(arg_functor), m_policy(arg_policy) {} -}; - -//---------------------------------------------------------------------------- - -template -class ParallelReduce, ReducerType, - Kokkos::Qthreads> { - private: - typedef Kokkos::Impl::TeamPolicyInternal - Policy; - - typedef typename Policy::work_tag WorkTag; - typedef typename Policy::member_type Member; - - typedef Kokkos::Impl::if_c::value, - FunctorType, ReducerType> - ReducerConditional; - typedef typename ReducerConditional::type ReducerTypeFwd; - typedef - typename Kokkos::Impl::if_c::value, - WorkTag, void>::type WorkTagFwd; - - typedef Kokkos::Impl::FunctorValueTraits - ValueTraits; - typedef Kokkos::Impl::FunctorValueInit ValueInit; - - typedef typename ValueTraits::pointer_type pointer_type; - typedef typename ValueTraits::reference_type reference_type; - - const FunctorType m_functor; - const Policy m_policy; - const ReducerType m_reducer; - const pointer_type m_result_ptr; - - template - inline static - typename std::enable_if::value>::type - exec_team(const FunctorType& functor, Member member, - reference_type update) { - while (member) { - functor(member, update); - member.team_barrier(); - member.next_team(); - } - } - - template - inline static - typename std::enable_if::value>::type - exec_team(const FunctorType& functor, Member member, - reference_type update) { - const TagType t{}; - while (member) { - functor(t, member, update); - member.team_barrier(); - member.next_team(); - } - } - - static void exec(QthreadsExec& exec, const void* arg) { - const ParallelReduce& self = *((const ParallelReduce*)arg); - - ParallelReduce::template exec_team( - self.m_functor, Member(exec, self.m_policy), - ValueInit::init( - ReducerConditional::select(self.m_functor, self.m_reducer), - exec.exec_all_reduce_value())); - - exec.template exec_all_reduce( - self.m_functor, self.m_reducer); - } - - public: - inline void execute() const { - QthreadsExec::resize_worker_scratch( - /* reduction memory */ ValueTraits::value_size( - ReducerConditional::select(m_functor, m_reducer)), - /* team shared memory */ FunctorTeamShmemSize::value( - m_functor, m_policy.team_size())); - - Impl::QthreadsExec::exec_all(Qthreads::instance(), &ParallelReduce::exec, - this); - - const pointer_type data = - (pointer_type)QthreadsExec::exec_all_reduce_result(); - - Kokkos::Impl::FunctorFinal::final( - ReducerConditional::select(m_functor, m_reducer), data); - - if (m_result_ptr) { - const unsigned n = ValueTraits::value_count( - ReducerConditional::select(m_functor, m_reducer)); - for (unsigned i = 0; i < n; ++i) { - m_result_ptr[i] = data[i]; - } - } - } - - template - ParallelReduce( - const FunctorType& arg_functor, const Policy& arg_policy, - const ViewType& arg_result, - typename std::enable_if::value && - !Kokkos::is_reducer_type::value, - void*>::type = NULL) - : m_functor(arg_functor), - m_policy(arg_policy), - m_reducer(InvalidType()), - m_result_ptr(arg_result.ptr_on_device()) {} - - inline ParallelReduce(const FunctorType& arg_functor, Policy arg_policy, - const ReducerType& reducer) - : m_functor(arg_functor), - m_policy(arg_policy), - m_reducer(reducer), - m_result_ptr(reducer.result_view().data()) {} -}; - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -template -class ParallelScan, - Kokkos::Qthreads> { - private: - typedef Kokkos::RangePolicy Policy; - - typedef typename Policy::work_tag WorkTag; - typedef typename Policy::WorkRange WorkRange; - typedef typename Policy::member_type Member; - - typedef Kokkos::Impl::FunctorValueTraits ValueTraits; - typedef Kokkos::Impl::FunctorValueInit ValueInit; - - typedef typename ValueTraits::pointer_type pointer_type; - typedef typename ValueTraits::reference_type reference_type; - - const FunctorType m_functor; - const Policy m_policy; - - template - inline static - typename std::enable_if::value>::type - exec_range(const FunctorType& functor, const Member ibeg, - const Member iend, reference_type update, const bool final) { - for (Member i = ibeg; i < iend; ++i) { - functor(i, update, final); - } - } - - template - inline static - typename std::enable_if::value>::type - exec_range(const FunctorType& functor, const Member ibeg, - const Member iend, reference_type update, const bool final) { - const TagType t{}; - for (Member i = ibeg; i < iend; ++i) { - functor(t, i, update, final); - } - } - - static void exec(QthreadsExec& exec, const void* arg) { - const ParallelScan& self = *((const ParallelScan*)arg); - - const WorkRange range(self.m_policy, exec.worker_rank(), - exec.worker_size()); - - // Initialize thread-local value - reference_type update = - ValueInit::init(self.m_functor, exec.exec_all_reduce_value()); - - ParallelScan::template exec_range(self.m_functor, range.begin(), - range.end(), update, false); - - exec.template exec_all_scan( - self.m_functor); - - ParallelScan::template exec_range(self.m_functor, range.begin(), - range.end(), update, true); - - exec.exec_all_barrier(); - } - - public: - inline void execute() const { - QthreadsExec::resize_worker_scratch(ValueTraits::value_size(m_functor), 0); - Impl::QthreadsExec::exec_all(Qthreads::instance(), &ParallelScan::exec, - this); - } - - ParallelScan(const FunctorType& arg_functor, const Policy& arg_policy) - : m_functor(arg_functor), m_policy(arg_policy) {} -}; - -} // namespace Impl - -} // namespace Kokkos - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { - -template -KOKKOS_INLINE_FUNCTION - Impl::TeamThreadRangeBoundariesStruct - TeamThreadRange(const Impl::QthreadsTeamPolicyMember& thread, - const iType& count) { - return Impl::TeamThreadRangeBoundariesStruct( - thread, count); -} - -template -KOKKOS_INLINE_FUNCTION Impl::TeamThreadRangeBoundariesStruct< - typename std::common_type::type, - Impl::QthreadsTeamPolicyMember> -TeamThreadRange(const Impl::QthreadsTeamPolicyMember& thread, - const iType1& begin, const iType2& end) { - typedef typename std::common_type::type iType; - return Impl::TeamThreadRangeBoundariesStruct( - thread, iType(begin), iType(end)); -} - -template -KOKKOS_INLINE_FUNCTION Impl::ThreadVectorRangeBoundariesStruct< - iType, Impl::QthreadsTeamPolicyMember> -ThreadVectorRange(const Impl::QthreadsTeamPolicyMember& thread, - const iType& count) { - return Impl::ThreadVectorRangeBoundariesStruct< - iType, Impl::QthreadsTeamPolicyMember>(thread, count); -} - -template -KOKKOS_INLINE_FUNCTION Impl::ThreadVectorRangeBoundariesStruct< - iType, Impl::QthreadsTeamPolicyMember> -ThreadVectorRange(const Impl::QthreadsTeamPolicyMember& thread, - const iType& arg_begin, const iType& arg_end) { - return Impl::ThreadVectorRangeBoundariesStruct< - iType, Impl::QthreadsTeamPolicyMember>(thread, arg_begin, arg_end); -} - -KOKKOS_INLINE_FUNCTION -Impl::ThreadSingleStruct PerTeam( - const Impl::QthreadsTeamPolicyMember& thread) { - return Impl::ThreadSingleStruct(thread); -} - -KOKKOS_INLINE_FUNCTION -Impl::VectorSingleStruct PerThread( - const Impl::QthreadsTeamPolicyMember& thread) { - return Impl::VectorSingleStruct(thread); -} - -/** \brief Inter-thread parallel_for. Executes lambda(iType i) for each - * i=0..N-1. - * - * The range i=0..N-1 is mapped to all threads of the the calling thread team. - * This functionality requires C++11 support.*/ -template -KOKKOS_INLINE_FUNCTION void parallel_for( - const Impl::TeamThreadRangeBoundariesStruct< - iType, Impl::QthreadsTeamPolicyMember>& loop_boundaries, - const Lambda& lambda) { - for (iType i = loop_boundaries.start; i < loop_boundaries.end; - i += loop_boundaries.increment) - lambda(i); -} - -/** \brief Inter-thread vector parallel_reduce. Executes lambda(iType i, - * ValueType & val) for each i=0..N-1. - * - * The range i=0..N-1 is mapped to all threads of the the calling thread team - * and a summation of val is performed and put into result. This functionality - * requires C++11 support.*/ -template -KOKKOS_INLINE_FUNCTION void parallel_reduce( - const Impl::TeamThreadRangeBoundariesStruct< - iType, Impl::QthreadsTeamPolicyMember>& loop_boundaries, - const Lambda& lambda, ValueType& result) { - result = ValueType(); - - for (iType i = loop_boundaries.start; i < loop_boundaries.end; - i += loop_boundaries.increment) { - ValueType tmp = ValueType(); - lambda(i, tmp); - result += tmp; - } - - result = - loop_boundaries.thread.team_reduce(result, Impl::JoinAdd()); -} - -/** \brief Intra-thread vector parallel_reduce. Executes lambda(iType i, - * ValueType & val) for each i=0..N-1. - * - * The range i=0..N-1 is mapped to all vector lanes of the the calling thread - * and a reduction of val is performed using JoinType(ValueType& val, const - * ValueType& update) and put into init_result. The input value of init_result - * is used as initializer for temporary variables of ValueType. Therefore the - * input value should be the neutral element with respect to the join operation - * (e.g. '0 for +-' or '1 for *'). This functionality requires C++11 support.*/ -template -KOKKOS_INLINE_FUNCTION void parallel_reduce( - const Impl::TeamThreadRangeBoundariesStruct< - iType, Impl::QthreadsTeamPolicyMember>& loop_boundaries, - const Lambda& lambda, const JoinType& join, ValueType& init_result) { - ValueType result = init_result; - - for (iType i = loop_boundaries.start; i < loop_boundaries.end; - i += loop_boundaries.increment) { - ValueType tmp = ValueType(); - lambda(i, tmp); - join(result, tmp); - } - - init_result = loop_boundaries.thread.team_reduce( - result, Impl::JoinLambdaAdapter(join)); -} - -/** \brief Intra-thread vector parallel_for. Executes lambda(iType i) for each - * i=0..N-1. - * - * The range i=0..N-1 is mapped to all vector lanes of the the calling thread. - * This functionality requires C++11 support.*/ -template -KOKKOS_INLINE_FUNCTION void parallel_for( - const Impl::ThreadVectorRangeBoundariesStruct< - iType, Impl::QthreadsTeamPolicyMember>& loop_boundaries, - const Lambda& lambda) { -#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP -#pragma ivdep -#endif - for (iType i = loop_boundaries.start; i < loop_boundaries.end; - i += loop_boundaries.increment) - lambda(i); -} - -/** \brief Intra-thread vector parallel_reduce. Executes lambda(iType i, - * ValueType & val) for each i=0..N-1. - * - * The range i=0..N-1 is mapped to all vector lanes of the the calling thread - * and a summation of val is performed and put into result. This functionality - * requires C++11 support.*/ -template -KOKKOS_INLINE_FUNCTION void parallel_reduce( - const Impl::ThreadVectorRangeBoundariesStruct< - iType, Impl::QthreadsTeamPolicyMember>& loop_boundaries, - const Lambda& lambda, ValueType& result) { - result = ValueType(); -#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP -#pragma ivdep -#endif - for (iType i = loop_boundaries.start; i < loop_boundaries.end; - i += loop_boundaries.increment) { - ValueType tmp = ValueType(); - lambda(i, tmp); - result += tmp; - } -} - -/** \brief Intra-thread vector parallel_reduce. Executes lambda(iType i, - * ValueType & val) for each i=0..N-1. - * - * The range i=0..N-1 is mapped to all vector lanes of the the calling thread - * and a reduction of val is performed using JoinType(ValueType& val, const - * ValueType& update) and put into init_result. The input value of init_result - * is used as initializer for temporary variables of ValueType. Therefore the - * input value should be the neutral element with respect to the join operation - * (e.g. '0 for +-' or '1 for *'). This functionality requires C++11 support.*/ -template -KOKKOS_INLINE_FUNCTION void parallel_reduce( - const Impl::ThreadVectorRangeBoundariesStruct< - iType, Impl::QthreadsTeamPolicyMember>& loop_boundaries, - const Lambda& lambda, const JoinType& join, ValueType& init_result) { - ValueType result = init_result; -#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP -#pragma ivdep -#endif - for (iType i = loop_boundaries.start; i < loop_boundaries.end; - i += loop_boundaries.increment) { - ValueType tmp = ValueType(); - lambda(i, tmp); - join(result, tmp); - } - init_result = result; -} - -/** \brief Intra-thread vector parallel exclusive prefix sum. Executes - * lambda(iType i, ValueType & val, bool final) for each i=0..N-1. - * - * The range i=0..N-1 is mapped to all vector lanes in the thread and a scan - * operation is performed. Depending on the target execution space the operator - * might be called twice: once with final=false and once with final=true. When - * final==true val contains the prefix sum value. The contribution of this "i" - * needs to be added to val no matter whether final==true or not. In a serial - * execution (i.e. team_size==1) the operator is only called once with - * final==true. Scan_val will be set to the final sum value over all vector - * lanes. This functionality requires C++11 support.*/ -template -KOKKOS_INLINE_FUNCTION void parallel_scan( - const Impl::ThreadVectorRangeBoundariesStruct< - iType, Impl::QthreadsTeamPolicyMember>& loop_boundaries, - const FunctorType& lambda) { - typedef Kokkos::Impl::FunctorValueTraits ValueTraits; - typedef typename ValueTraits::value_type value_type; - - value_type scan_val = value_type(); - -#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP -#pragma ivdep -#endif - for (iType i = loop_boundaries.start; i < loop_boundaries.end; - i += loop_boundaries.increment) { - lambda(i, scan_val, true); - } -} - -template -KOKKOS_INLINE_FUNCTION void single( - const Impl::VectorSingleStruct& - single_struct, - const FunctorType& lambda) { - lambda(); -} - -template -KOKKOS_INLINE_FUNCTION void single( - const Impl::ThreadSingleStruct& - single_struct, - const FunctorType& lambda) { - if (single_struct.team_member.team_rank() == 0) lambda(); -} - -template -KOKKOS_INLINE_FUNCTION void single( - const Impl::VectorSingleStruct& - single_struct, - const FunctorType& lambda, ValueType& val) { - lambda(val); -} - -template -KOKKOS_INLINE_FUNCTION void single( - const Impl::ThreadSingleStruct& - single_struct, - const FunctorType& lambda, ValueType& val) { - if (single_struct.team_member.team_rank() == 0) { - lambda(val); - } - single_struct.team_member.team_broadcast(val, 0); -} - -} // namespace Kokkos - -#endif -#endif /* #define KOKKOS_QTHREADS_PARALLEL_HPP */ diff --git a/core/src/Qthreads/Kokkos_Qthreads_Task.cpp b/core/src/Qthreads/Kokkos_Qthreads_Task.cpp deleted file mode 100644 index b3a903494a6..00000000000 --- a/core/src/Qthreads/Kokkos_Qthreads_Task.cpp +++ /dev/null @@ -1,308 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include -#if defined(KOKKOS_ENABLE_QTHREADS) && defined(KOKKOS_ENABLE_TASKPOLICY) - -#include -#include - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - -template class TaskQueue; - -//---------------------------------------------------------------------------- - -TaskExec::TaskExec() - : m_self_exec(0), - m_team_exec(0), - m_sync_mask(0), - m_sync_value(0), - m_sync_step(0), - m_group_rank(0), - m_team_rank(0), - m_team_size(1) {} - -TaskExec::TaskExec(Kokkos::Impl::QthreadsExec &arg_exec, - int const arg_team_size) - : m_self_exec(&arg_exec), - m_team_exec(arg_exec.pool_rev(arg_exec.pool_rank_rev() / arg_team_size)), - m_sync_mask(0), - m_sync_value(0), - m_sync_step(0), - m_group_rank(arg_exec.pool_rank_rev() / arg_team_size), - m_team_rank(arg_exec.pool_rank_rev() % arg_team_size), - m_team_size(arg_team_size) { - // This team spans - // m_self_exec->pool_rev( team_size * group_rank ) - // m_self_exec->pool_rev( team_size * ( group_rank + 1 ) - 1 ) - - int64_t volatile *const sync = (int64_t *)m_self_exec->scratch_reduce(); - - sync[0] = int64_t(0); - sync[1] = int64_t(0); - - for (int i = 0; i < m_team_size; ++i) { - m_sync_value |= int64_t(1) << (8 * i); - m_sync_mask |= int64_t(3) << (8 * i); - } - - Kokkos::memory_fence(); -} - -#if defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST) - -void TaskExec::team_barrier() const { - if (1 < m_team_size) { - if (m_team_exec->scratch_reduce_size() < int(2 * sizeof(int64_t))) { - Kokkos::abort("TaskQueue scratch_reduce memory too small"); - } - - // Use team shared memory to synchronize. - // Alternate memory locations between barriers to avoid a sequence - // of barriers overtaking one another. - - int64_t volatile *const sync = - ((int64_t *)m_team_exec->scratch_reduce()) + (m_sync_step & 0x01); - - // This team member sets one byte within the sync variable - int8_t volatile *const sync_self = ((int8_t *)sync) + m_team_rank; - -#if 0 -fprintf( stdout, - "barrier group(%d) member(%d) step(%d) wait(%lx) : before(%lx)\n", - m_group_rank, - m_team_rank, - m_sync_step, - m_sync_value, - *sync - ); -fflush(stdout); -#endif - - *sync_self = int8_t(m_sync_value & 0x03); // signal arrival - - while (m_sync_value != *sync) - ; // wait for team to arrive - -#if 0 -fprintf( stdout, - "barrier group(%d) member(%d) step(%d) wait(%lx) : after(%lx)\n", - m_group_rank, - m_team_rank, - m_sync_step, - m_sync_value, - *sync - ); -fflush(stdout); -#endif - - ++m_sync_step; - - if (0 == (0x01 & m_sync_step)) { // Every other step - m_sync_value ^= m_sync_mask; - if (1000 < m_sync_step) m_sync_step = 0; - } - } -} - -#endif - -//---------------------------------------------------------------------------- - -void TaskQueueSpecialization::execute( - TaskQueue *const queue) { - using execution_space = Kokkos::Qthreads; - using queue_type = TaskQueue; - using task_root_type = TaskBase; - using PoolExec = Kokkos::Impl::QthreadsExec; - using Member = TaskExec; - - task_root_type *const end = (task_root_type *)task_root_type::EndTag; - - // Required: team_size <= 8 - - const int team_size = PoolExec::pool_size(2); // Threads per core - // const int team_size = PoolExec::pool_size(1); // Threads per NUMA - - if (8 < team_size) { - Kokkos::abort("TaskQueue unsupported team size"); - } - -#pragma omp parallel - { - PoolExec &self = *PoolExec::get_thread_omp(); - - Member single_exec; - Member team_exec(self, team_size); - - // Team shared memory - task_root_type *volatile *const task_shared = - (task_root_type **)team_exec.m_team_exec->scratch_thread(); - -// Barrier across entire Qthreads thread pool to insure initialization -#pragma omp barrier - - // Loop until all queues are empty and no tasks in flight - - do { - // Each team lead attempts to acquire either a thread team task - // or collection of single thread tasks for the team. - - if (0 == team_exec.team_rank()) { - task_root_type *tmp = - 0 < *((volatile int *)&queue->m_ready_count) ? end : 0; - - // Loop by priority and then type - for (int i = 0; i < queue_type::NumQueue && end == tmp; ++i) { - for (int j = 0; j < 2 && end == tmp; ++j) { - tmp = queue_type::pop_task(&queue->m_ready[i][j]); - } - } - - *task_shared = tmp; - - // Fence to be sure shared_task_array is stored - Kokkos::memory_fence(); - } - - // Whole team waits for every team member to reach this statement - team_exec.team_barrier(); - - Kokkos::memory_fence(); - - task_root_type *const task = *task_shared; - -#if 0 -fprintf( stdout, - "\nexecute group(%d) member(%d) task_shared(0x%lx) task(0x%lx)\n", - team_exec.m_group_rank, - team_exec.m_team_rank, - uintptr_t(task_shared), - uintptr_t(task) - ); -fflush(stdout); -#endif - - if (0 == task) break; // 0 == m_ready_count - - if (end == task) { - team_exec.team_barrier(); - } else if (task_root_type::TaskTeam == task->m_task_type) { - // Thread Team Task - (*task->m_apply)(task, &team_exec); - - // The m_apply function performs a barrier - - if (0 == team_exec.team_rank()) { - // team member #0 completes the task, which may delete the task - queue->complete(task); - } - } else { - // Single Thread Task - - if (0 == team_exec.team_rank()) { - (*task->m_apply)(task, &single_exec); - - queue->complete(task); - } - - // All team members wait for whole team to reach this statement. - // Not necessary to complete the task. - // Is necessary to prevent task_shared from being updated - // before it is read by all threads. - team_exec.team_barrier(); - } - } while (1); - } - // END #pragma omp parallel -} - -void TaskQueueSpecialization:: - iff_single_thread_recursive_execute( - TaskQueue *const queue) { - using execution_space = Kokkos::Qthreads; - using queue_type = TaskQueue; - using task_root_type = TaskBase; - using Member = TaskExec; - - if (1 == omp_get_num_threads()) { - task_root_type *const end = (task_root_type *)task_root_type::EndTag; - - Member single_exec; - - task_root_type *task = end; - - do { - task = end; - - // Loop by priority and then type - for (int i = 0; i < queue_type::NumQueue && end == task; ++i) { - for (int j = 0; j < 2 && end == task; ++j) { - task = queue_type::pop_task(&queue->m_ready[i][j]); - } - } - - if (end == task) break; - - (*task->m_apply)(task, &single_exec); - - queue->complete(task); - - } while (1); - } -} - -} // namespace Impl -} // namespace Kokkos - -//---------------------------------------------------------------------------- -#else -void KOKKOS_SRC_QTHREADS_TASK_PREVENT_LINK_ERROR() {} -#endif /* #if defined( KOKKOS_ENABLE_QTHREADS ) && defined( \ - KOKKOS_ENABLE_TASKPOLICY ) */ diff --git a/core/src/Qthreads/Kokkos_Qthreads_Task.hpp b/core/src/Qthreads/Kokkos_Qthreads_Task.hpp deleted file mode 100644 index 1b2c3d3855e..00000000000 --- a/core/src/Qthreads/Kokkos_Qthreads_Task.hpp +++ /dev/null @@ -1,149 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#ifndef KOKKOS_IMPL_QTHREADS_TASK_HPP -#define KOKKOS_IMPL_QTHREADS_TASK_HPP - -#include -#if defined(KOKKOS_ENABLE_QTHREADS) && defined(KOKKOS_ENABLE_TASKPOLICY) - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - -template <> -class TaskQueueSpecialization { - public: - using execution_space = Kokkos::Qthreads; - using queue_type = Kokkos::Impl::TaskQueue; - using task_base_type = Kokkos::Impl::TaskBase; - - // Must specify memory space - using memory_space = Kokkos::HostSpace; - - static void iff_single_thread_recursive_execute(queue_type* const); - - // Must provide task queue execution function - static void execute(queue_type* const); - - // Must provide mechanism to set function pointer in - // execution space from the host process. - template - static void proc_set_apply(task_base_type::function_type* ptr) { - using TaskType = TaskBase; - *ptr = TaskType::apply; - } -}; - -extern template class TaskQueue; - -//---------------------------------------------------------------------------- - -template <> -class TaskExec { - private: - TaskExec(TaskExec&&) = delete; - TaskExec(TaskExec const&) = delete; - TaskExec& operator=(TaskExec&&) = delete; - TaskExec& operator=(TaskExec const&) = delete; - - using PoolExec = Kokkos::Impl::QthreadsExec; - - friend class Kokkos::Impl::TaskQueue; - friend class Kokkos::Impl::TaskQueueSpecialization; - - PoolExec* const m_self_exec; ///< This thread's thread pool data structure - PoolExec* const m_team_exec; ///< Team thread's thread pool data structure - int64_t m_sync_mask; - int64_t mutable m_sync_value; - int mutable m_sync_step; - int m_group_rank; ///< Which "team" subset of thread pool - int m_team_rank; ///< Which thread within a team - int m_team_size; - - TaskExec(); - TaskExec(PoolExec& arg_exec, int arg_team_size); - - public: -#if defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST) - void* team_shared() const { - return m_team_exec ? m_team_exec->scratch_thread() : (void*)0; - } - - int team_shared_size() const { - return m_team_exec ? m_team_exec->scratch_thread_size() : 0; - } - - /**\brief Whole team enters this function call - * before any teeam member returns from - * this function call. - */ - void team_barrier() const; -#else - KOKKOS_INLINE_FUNCTION void team_barrier() const {} - KOKKOS_INLINE_FUNCTION void* team_shared() const { return 0; } - KOKKOS_INLINE_FUNCTION int team_shared_size() const { return 0; } -#endif - - KOKKOS_INLINE_FUNCTION - int team_rank() const { return m_team_rank; } - - KOKKOS_INLINE_FUNCTION - int team_size() const { return m_team_size; } -}; - -} // namespace Impl -} // namespace Kokkos - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -#endif /* #if defined( KOKKOS_ENABLE_TASKPOLICY ) */ -#endif /* #ifndef KOKKOS_IMPL_QTHREADS_TASK_HPP */ diff --git a/core/src/Qthreads/Kokkos_Qthreads_TaskPolicy.cpp.old b/core/src/Qthreads/Kokkos_Qthreads_TaskPolicy.cpp.old deleted file mode 100644 index a59afb2881e..00000000000 --- a/core/src/Qthreads/Kokkos_Qthreads_TaskPolicy.cpp.old +++ /dev/null @@ -1,493 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 2.0 -// Copyright (2014) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -// Experimental unified task-data parallel manycore LDRD. - - -#include -#if defined( KOKKOS_ENABLE_QTHREADS ) - -#include - -#include -#include - -#include -#include -#include -#include - -#include -#include - -#if defined( KOKKOS_ENABLE_TASKDAG ) - -namespace Kokkos { -namespace Experimental { -namespace Impl { - -typedef TaskMember< Kokkos::Qthreads , void , void > Task ; - -namespace { - -inline -unsigned padded_sizeof_derived( unsigned sizeof_derived ) -{ - return sizeof_derived + - ( sizeof_derived % sizeof(Task*) ? sizeof(Task*) - sizeof_derived % sizeof(Task*) : 0 ); -} - -// int lock_alloc_dealloc = 0 ; - -} // namespace - -void Task::deallocate( void * ptr ) -{ - // Counting on 'free' thread safety so lock/unlock not required. - // However, isolate calls here to mitigate future need to introduce lock/unlock. - - // lock - - // while ( ! Kokkos::atomic_compare_exchange_strong( & lock_alloc_dealloc , 0 , 1 ) ); - - free( ptr ); - - // unlock - - // Kokkos::atomic_compare_exchange_strong( & lock_alloc_dealloc , 1 , 0 ); -} - -void * Task::allocate( const unsigned arg_sizeof_derived - , const unsigned arg_dependence_capacity ) -{ - // Counting on 'malloc' thread safety so lock/unlock not required. - // However, isolate calls here to mitigate future need to introduce lock/unlock. - - // lock - - // while ( ! Kokkos::atomic_compare_exchange_strong( & lock_alloc_dealloc , 0 , 1 ) ); - - void * const ptr = malloc( padded_sizeof_derived( arg_sizeof_derived ) + arg_dependence_capacity * sizeof(Task*) ); - - // unlock - - // Kokkos::atomic_compare_exchange_strong( & lock_alloc_dealloc , 1 , 0 ); - - return ptr ; -} - -Task::~TaskMember() -{ - -} - - -Task::TaskMember( const function_verify_type arg_verify - , const function_dealloc_type arg_dealloc - , const function_single_type arg_apply_single - , const function_team_type arg_apply_team - , volatile int & arg_active_count - , const unsigned arg_sizeof_derived - , const unsigned arg_dependence_capacity - ) - : m_dealloc( arg_dealloc ) - , m_verify( arg_verify ) - , m_apply_single( arg_apply_single ) - , m_apply_team( arg_apply_team ) - , m_active_count( & arg_active_count ) - , m_qfeb(0) - , m_dep( (Task **)( ((unsigned char *) this) + padded_sizeof_derived( arg_sizeof_derived ) ) ) - , m_dep_capacity( arg_dependence_capacity ) - , m_dep_size( 0 ) - , m_ref_count( 0 ) - , m_state( Kokkos::Experimental::TASK_STATE_CONSTRUCTING ) -{ - qthread_empty( & m_qfeb ); // Set to full when complete - for ( unsigned i = 0 ; i < arg_dependence_capacity ; ++i ) m_dep[i] = 0 ; -} - -Task::TaskMember( const function_dealloc_type arg_dealloc - , const function_single_type arg_apply_single - , const function_team_type arg_apply_team - , volatile int & arg_active_count - , const unsigned arg_sizeof_derived - , const unsigned arg_dependence_capacity - ) - : m_dealloc( arg_dealloc ) - , m_verify( & Task::verify_type ) - , m_apply_single( arg_apply_single ) - , m_apply_team( arg_apply_team ) - , m_active_count( & arg_active_count ) - , m_qfeb(0) - , m_dep( (Task **)( ((unsigned char *) this) + padded_sizeof_derived( arg_sizeof_derived ) ) ) - , m_dep_capacity( arg_dependence_capacity ) - , m_dep_size( 0 ) - , m_ref_count( 0 ) - , m_state( Kokkos::Experimental::TASK_STATE_CONSTRUCTING ) -{ - qthread_empty( & m_qfeb ); // Set to full when complete - for ( unsigned i = 0 ; i < arg_dependence_capacity ; ++i ) m_dep[i] = 0 ; -} - -//---------------------------------------------------------------------------- - -void Task::throw_error_add_dependence() const -{ - std::cerr << "TaskMember< Qthreads >::add_dependence ERROR" - << " state(" << m_state << ")" - << " dep_size(" << m_dep_size << ")" - << std::endl ; - throw std::runtime_error("TaskMember< Qthreads >::add_dependence ERROR"); -} - -void Task::throw_error_verify_type() -{ - throw std::runtime_error("TaskMember< Qthreads >::verify_type ERROR"); -} - -//---------------------------------------------------------------------------- - -#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) -void Task::assign( Task ** const lhs , Task * rhs , const bool no_throw ) -{ - static const char msg_error_header[] = "Kokkos::Impl::TaskManager::assign ERROR" ; - static const char msg_error_count[] = ": negative reference count" ; - static const char msg_error_complete[] = ": destroy task that is not complete" ; - static const char msg_error_dependences[] = ": destroy task that has dependences" ; - static const char msg_error_exception[] = ": caught internal exception" ; - - if ( rhs ) { Kokkos::atomic_increment( &(*rhs).m_ref_count ); } - - Task * const lhs_val = Kokkos::atomic_exchange( lhs , rhs ); - - if ( lhs_val ) { - - const int count = Kokkos::atomic_fetch_add( & (*lhs_val).m_ref_count , -1 ); - - const char * msg_error = 0 ; - - try { - - if ( 1 == count ) { - - // Reference count at zero, delete it - - // Should only be deallocating a completed task - if ( (*lhs_val).m_state == Kokkos::Experimental::TASK_STATE_COMPLETE ) { - - // A completed task should not have dependences... - for ( int i = 0 ; i < (*lhs_val).m_dep_size && 0 == msg_error ; ++i ) { - if ( (*lhs_val).m_dep[i] ) msg_error = msg_error_dependences ; - } - } - else { - msg_error = msg_error_complete ; - } - - if ( 0 == msg_error ) { - // Get deletion function and apply it - const Task::function_dealloc_type d = (*lhs_val).m_dealloc ; - - (*d)( lhs_val ); - } - } - else if ( count <= 0 ) { - msg_error = msg_error_count ; - } - } - catch( ... ) { - if ( 0 == msg_error ) msg_error = msg_error_exception ; - } - - if ( 0 != msg_error ) { - if ( no_throw ) { - std::cerr << msg_error_header << msg_error << std::endl ; - std::cerr.flush(); - } - else { - std::string msg(msg_error_header); - msg.append(msg_error); - throw std::runtime_error( msg ); - } - } - } -} -#endif - - -//---------------------------------------------------------------------------- - -void Task::closeout() -{ - enum { RESPAWN = int( Kokkos::Experimental::TASK_STATE_WAITING ) | - int( Kokkos::Experimental::TASK_STATE_EXECUTING ) }; - -#if 0 -fprintf( stdout - , "worker(%d.%d) task 0x%.12lx %s\n" - , qthread_shep() - , qthread_worker_local(NULL) - , reinterpret_cast(this) - , ( m_state == RESPAWN ? "respawn" : "complete" ) - ); -fflush(stdout); -#endif - - // When dependent tasks run there would be a race - // condition between destroying this task and - // querying the active count pointer from this task. - int volatile * const active_count = m_active_count ; - - if ( m_state == RESPAWN ) { - // Task requests respawn, set state to waiting and reschedule the task - m_state = Kokkos::Experimental::TASK_STATE_WAITING ; - schedule(); - } - else { - - // Task did not respawn, is complete - m_state = Kokkos::Experimental::TASK_STATE_COMPLETE ; - - // Release dependences before allowing dependent tasks to run. - // Otherwise there is a thread race condition for removing dependences. - for ( int i = 0 ; i < m_dep_size ; ++i ) { - assign( & m_dep[i] , 0 ); - } - - // Set Qthreads FEB to full so that dependent tasks are allowed to execute. - // This 'task' may be deleted immediately following this function call. - qthread_fill( & m_qfeb ); - - // The dependent task could now complete and destroy 'this' task - // before the call to 'qthread_fill' returns. Therefore, for - // thread safety assume that 'this' task has now been destroyed. - } - - // Decrement active task count before returning. - Kokkos::atomic_decrement( active_count ); -} - -aligned_t Task::qthread_func( void * arg ) -{ - Task * const task = reinterpret_cast< Task * >(arg); - - // First member of the team change state to executing. - // Use compare-exchange to avoid race condition with a respawn. - Kokkos::atomic_compare_exchange_strong( & task->m_state - , int(Kokkos::Experimental::TASK_STATE_WAITING) - , int(Kokkos::Experimental::TASK_STATE_EXECUTING) - ); - - if ( task->m_apply_team && ! task->m_apply_single ) { - Kokkos::Impl::QthreadsTeamPolicyMember::TaskTeam task_team_tag ; - - // Initialize team size and rank with shephered info - Kokkos::Impl::QthreadsTeamPolicyMember member( task_team_tag ); - - (*task->m_apply_team)( task , member ); - -#if 0 -fprintf( stdout - , "worker(%d.%d) task 0x%.12lx executed by member(%d:%d)\n" - , qthread_shep() - , qthread_worker_local(NULL) - , reinterpret_cast(task) - , member.team_rank() - , member.team_size() - ); -fflush(stdout); -#endif - - member.team_barrier(); - if ( member.team_rank() == 0 ) task->closeout(); - member.team_barrier(); - } - else if ( task->m_apply_team && task->m_apply_single == reinterpret_cast(1) ) { - // Team hard-wired to one, no cloning - Kokkos::Impl::QthreadsTeamPolicyMember member ; - (*task->m_apply_team)( task , member ); - task->closeout(); - } - else { - (*task->m_apply_single)( task ); - task->closeout(); - } - -#if 0 -fprintf( stdout - , "worker(%d.%d) task 0x%.12lx return\n" - , qthread_shep() - , qthread_worker_local(NULL) - , reinterpret_cast(task) - ); -fflush(stdout); -#endif - - return 0 ; -} - -void Task::respawn() -{ - // Change state from pure executing to ( waiting | executing ) - // to avoid confusion with simply waiting. - Kokkos::atomic_compare_exchange_strong( & m_state - , int(Kokkos::Experimental::TASK_STATE_EXECUTING) - , int(Kokkos::Experimental::TASK_STATE_WAITING | - Kokkos::Experimental::TASK_STATE_EXECUTING) - ); -} - -void Task::schedule() -{ - // Is waiting for execution - - // Increment active task count before spawning. - Kokkos::atomic_increment( m_active_count ); - - // spawn in Qthreads. must malloc the precondition array and give to Qthreads. - // Qthreads will eventually free this allocation so memory will not be leaked. - - // concern with thread safety of malloc, does this need to be guarded? - aligned_t ** qprecon = (aligned_t **) malloc( ( m_dep_size + 1 ) * sizeof(aligned_t *) ); - - qprecon[0] = reinterpret_cast( uintptr_t(m_dep_size) ); - - for ( int i = 0 ; i < m_dep_size ; ++i ) { - qprecon[i+1] = & m_dep[i]->m_qfeb ; // Qthreads precondition flag - } - - if ( m_apply_team && ! m_apply_single ) { - // If more than one shepherd spawn on a shepherd other than this shepherd - const int num_shepherd = qthread_num_shepherds(); - const int num_worker_per_shepherd = qthread_num_workers_local(NO_SHEPHERD); - const int this_shepherd = qthread_shep(); - - int spawn_shepherd = ( this_shepherd + 1 ) % num_shepherd ; - -#if 0 -fprintf( stdout - , "worker(%d.%d) task 0x%.12lx spawning on shepherd(%d) clone(%d)\n" - , qthread_shep() - , qthread_worker_local(NULL) - , reinterpret_cast(this) - , spawn_shepherd - , num_worker_per_shepherd - 1 - ); -fflush(stdout); -#endif - - qthread_spawn_cloneable - ( & Task::qthread_func - , this - , 0 - , NULL - , m_dep_size , qprecon /* dependences */ - , spawn_shepherd - , unsigned( QTHREAD_SPAWN_SIMPLE | QTHREAD_SPAWN_LOCAL_PRIORITY ) - , num_worker_per_shepherd - 1 - ); - } - else { - qthread_spawn( & Task::qthread_func /* function */ - , this /* function argument */ - , 0 - , NULL - , m_dep_size , qprecon /* dependences */ - , NO_SHEPHERD - , QTHREAD_SPAWN_SIMPLE /* allows optimization for non-blocking task */ - ); - } -} - -} // namespace Impl -} // namespace Experimental -} // namespace Kokkos - -namespace Kokkos { -namespace Experimental { - -TaskPolicy< Kokkos::Qthreads >:: -TaskPolicy - ( const unsigned /* arg_task_max_count */ - , const unsigned /* arg_task_max_size */ - , const unsigned arg_task_default_dependence_capacity - , const unsigned arg_task_team_size - ) - : m_default_dependence_capacity( arg_task_default_dependence_capacity ) - , m_team_size( arg_task_team_size != 0 ? arg_task_team_size : unsigned(qthread_num_workers_local(NO_SHEPHERD)) ) - , m_active_count_root(0) - , m_active_count( m_active_count_root ) -{ - const unsigned num_worker_per_shepherd = unsigned( qthread_num_workers_local(NO_SHEPHERD) ); - - if ( m_team_size != 1 && m_team_size != num_worker_per_shepherd ) { - std::ostringstream msg ; - msg << "Kokkos::Experimental::TaskPolicy< Kokkos::Qthreads >( " - << "default_depedence = " << arg_task_default_dependence_capacity - << " , team_size = " << arg_task_team_size - << " ) ERROR, valid team_size arguments are { (omitted) , 1 , " << num_worker_per_shepherd << " }" ; - Kokkos::Impl::throw_runtime_exception(msg.str()); - } -} - -TaskPolicy< Kokkos::Qthreads >::member_type & -TaskPolicy< Kokkos::Qthreads >::member_single() -{ - static member_type s ; - return s ; -} - -void wait( Kokkos::Experimental::TaskPolicy< Kokkos::Qthreads > & policy ) -{ - volatile int * const active_task_count = & policy.m_active_count ; - while ( *active_task_count ) qthread_yield(); -} - -} // namespace Experimental -} // namespace Kokkos - -#else -void KOKKOS_CORE_SRC_QTHREADS_KOKKOS_QTHREADS_TASKPOLICY_PREVENT_LINK_ERROR() {} -#endif // #if defined( KOKKOS_ENABLE_TASKDAG ) -#endif // #if defined( KOKKOS_ENABLE_QTHREADS ) - diff --git a/core/src/Qthreads/Kokkos_Qthreads_TaskPolicy.hpp.old b/core/src/Qthreads/Kokkos_Qthreads_TaskPolicy.hpp.old deleted file mode 100644 index adb6859763d..00000000000 --- a/core/src/Qthreads/Kokkos_Qthreads_TaskPolicy.hpp.old +++ /dev/null @@ -1,666 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 2.0 -// Copyright (2014) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -// Experimental unified task-data parallel manycore LDRD - -#ifndef KOKKOS_QTHREADS_TASKSCHEDULER_HPP -#define KOKKOS_QTHREADS_TASKSCHEDULER_HPP - -#include -#if defined( KOKKOS_ENABLE_TASKDAG ) - -#include -#include -#include - -//---------------------------------------------------------------------------- -// Defines to enable experimental Qthreads functionality - -#define QTHREAD_LOCAL_PRIORITY -#define CLONED_TASKS - -#include - -#undef QTHREAD_LOCAL_PRIORITY -#undef CLONED_TASKS - -//---------------------------------------------------------------------------- - -#include -#include -#include - -#include - - -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Experimental { -namespace Impl { - -template<> -class TaskMember< Kokkos::Qthreads , void , void > -{ -public: - - typedef TaskMember * (* function_verify_type) ( TaskMember * ); - typedef void (* function_single_type) ( TaskMember * ); - typedef void (* function_team_type) ( TaskMember * , Kokkos::Impl::QthreadsTeamPolicyMember & ); - typedef void (* function_dealloc_type)( TaskMember * ); - -private: - - const function_dealloc_type m_dealloc ; ///< Deallocation - const function_verify_type m_verify ; ///< Result type verification - const function_single_type m_apply_single ; ///< Apply function - const function_team_type m_apply_team ; ///< Apply function - int volatile * const m_active_count ; ///< Count of active tasks on this policy - aligned_t m_qfeb ; ///< Qthreads full/empty bit - TaskMember ** const m_dep ; ///< Dependences - const int m_dep_capacity ; ///< Capacity of dependences - int m_dep_size ; ///< Actual count of dependences - int m_ref_count ; ///< Reference count - int m_state ; ///< State of the task - - TaskMember() /* = delete */ ; - TaskMember( const TaskMember & ) /* = delete */ ; - TaskMember & operator = ( const TaskMember & ) /* = delete */ ; - - static aligned_t qthread_func( void * arg ); - - static void * allocate( const unsigned arg_sizeof_derived , const unsigned arg_dependence_capacity ); - static void deallocate( void * ); - - void throw_error_add_dependence() const ; - static void throw_error_verify_type(); - - template < class DerivedTaskType > - static - void deallocate( TaskMember * t ) - { - DerivedTaskType * ptr = static_cast< DerivedTaskType * >(t); - ptr->~DerivedTaskType(); - deallocate( (void *) ptr ); - } - - void schedule(); - void closeout(); - -protected : - - ~TaskMember(); - - // Used by TaskMember< Qthreads , ResultType , void > - TaskMember( const function_verify_type arg_verify - , const function_dealloc_type arg_dealloc - , const function_single_type arg_apply_single - , const function_team_type arg_apply_team - , volatile int & arg_active_count - , const unsigned arg_sizeof_derived - , const unsigned arg_dependence_capacity - ); - - // Used for TaskMember< Qthreads , void , void > - TaskMember( const function_dealloc_type arg_dealloc - , const function_single_type arg_apply_single - , const function_team_type arg_apply_team - , volatile int & arg_active_count - , const unsigned arg_sizeof_derived - , const unsigned arg_dependence_capacity - ); - -public: - - template< typename ResultType > - KOKKOS_FUNCTION static - TaskMember * verify_type( TaskMember * t ) - { - enum { check_type = ! std::is_same< ResultType , void >::value }; - - if ( check_type && t != 0 ) { - - // Verify that t->m_verify is this function - const function_verify_type self = & TaskMember::template verify_type< ResultType > ; - - if ( t->m_verify != self ) { - t = 0 ; -#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) - throw_error_verify_type(); -#endif - } - } - return t ; - } - - //---------------------------------------- - /* Inheritence Requirements on task types: - * typedef FunctorType::value_type value_type ; - * class DerivedTaskType - * : public TaskMember< Qthreads , value_type , FunctorType > - * { ... }; - * class TaskMember< Qthreads , value_type , FunctorType > - * : public TaskMember< Qthreads , value_type , void > - * , public Functor - * { ... }; - * If value_type != void - * class TaskMember< Qthreads , value_type , void > - * : public TaskMember< Qthreads , void , void > - * - * Allocate space for DerivedTaskType followed by TaskMember*[ dependence_capacity ] - * - */ - - /** \brief Allocate and construct a single-thread task */ - template< class DerivedTaskType > - static - TaskMember * create_single( const typename DerivedTaskType::functor_type & arg_functor - , volatile int & arg_active_count - , const unsigned arg_dependence_capacity ) - { - typedef typename DerivedTaskType::functor_type functor_type ; - typedef typename functor_type::value_type value_type ; - - DerivedTaskType * const task = - new( allocate( sizeof(DerivedTaskType) , arg_dependence_capacity ) ) - DerivedTaskType( & TaskMember::template deallocate< DerivedTaskType > - , & TaskMember::template apply_single< functor_type , value_type > - , 0 - , arg_active_count - , sizeof(DerivedTaskType) - , arg_dependence_capacity - , arg_functor ); - - return static_cast< TaskMember * >( task ); - } - - /** \brief Allocate and construct a team-thread task */ - template< class DerivedTaskType > - static - TaskMember * create_team( const typename DerivedTaskType::functor_type & arg_functor - , volatile int & arg_active_count - , const unsigned arg_dependence_capacity - , const bool arg_is_team ) - { - typedef typename DerivedTaskType::functor_type functor_type ; - typedef typename functor_type::value_type value_type ; - - const function_single_type flag = reinterpret_cast( arg_is_team ? 0 : 1 ); - - DerivedTaskType * const task = - new( allocate( sizeof(DerivedTaskType) , arg_dependence_capacity ) ) - DerivedTaskType( & TaskMember::template deallocate< DerivedTaskType > - , flag - , & TaskMember::template apply_team< functor_type , value_type > - , arg_active_count - , sizeof(DerivedTaskType) - , arg_dependence_capacity - , arg_functor ); - - return static_cast< TaskMember * >( task ); - } - - void respawn(); - void spawn() - { - m_state = Kokkos::Experimental::TASK_STATE_WAITING ; - schedule(); - } - - //---------------------------------------- - - typedef FutureValueTypeIsVoidError get_result_type ; - - KOKKOS_INLINE_FUNCTION - get_result_type get() const { return get_result_type() ; } - - KOKKOS_INLINE_FUNCTION - Kokkos::Experimental::TaskState get_state() const { return Kokkos::Experimental::TaskState( m_state ); } - - //---------------------------------------- - -#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) - static - void assign( TaskMember ** const lhs , TaskMember * const rhs , const bool no_throw = false ); -#else - KOKKOS_INLINE_FUNCTION static - void assign( TaskMember ** const lhs , TaskMember * const rhs , const bool no_throw = false ) {} -#endif - - KOKKOS_INLINE_FUNCTION - TaskMember * get_dependence( int i ) const - { return ( Kokkos::Experimental::TASK_STATE_EXECUTING == m_state && 0 <= i && i < m_dep_size ) ? m_dep[i] : (TaskMember*) 0 ; } - - KOKKOS_INLINE_FUNCTION - int get_dependence() const - { return m_dep_size ; } - - KOKKOS_INLINE_FUNCTION - void clear_dependence() - { - for ( int i = 0 ; i < m_dep_size ; ++i ) assign( m_dep + i , 0 ); - m_dep_size = 0 ; - } - - KOKKOS_INLINE_FUNCTION - void add_dependence( TaskMember * before ) - { - if ( ( Kokkos::Experimental::TASK_STATE_CONSTRUCTING == m_state || - Kokkos::Experimental::TASK_STATE_EXECUTING == m_state ) && - m_dep_size < m_dep_capacity ) { - assign( m_dep + m_dep_size , before ); - ++m_dep_size ; - } - else { - throw_error_add_dependence(); - } - } - - //---------------------------------------- - - template< class FunctorType , class ResultType > - KOKKOS_INLINE_FUNCTION static - void apply_single( typename std::enable_if< ! std::is_same< ResultType , void >::value , TaskMember * >::type t ) - { - typedef TaskMember< Kokkos::Qthreads , ResultType , FunctorType > derived_type ; - - // TaskMember< Kokkos::Qthreads , ResultType , FunctorType > - // : public TaskMember< Kokkos::Qthreads , ResultType , void > - // , public FunctorType - // { ... }; - - derived_type & m = * static_cast< derived_type * >( t ); - - Kokkos::Impl::FunctorApply< FunctorType , void , ResultType & >::apply( (FunctorType &) m , & m.m_result ); - } - - template< class FunctorType , class ResultType > - KOKKOS_INLINE_FUNCTION static - void apply_single( typename std::enable_if< std::is_same< ResultType , void >::value , TaskMember * >::type t ) - { - typedef TaskMember< Kokkos::Qthreads , ResultType , FunctorType > derived_type ; - - // TaskMember< Kokkos::Qthreads , ResultType , FunctorType > - // : public TaskMember< Kokkos::Qthreads , ResultType , void > - // , public FunctorType - // { ... }; - - derived_type & m = * static_cast< derived_type * >( t ); - - Kokkos::Impl::FunctorApply< FunctorType , void , void >::apply( (FunctorType &) m ); - } - - //---------------------------------------- - - template< class FunctorType , class ResultType > - KOKKOS_INLINE_FUNCTION static - void apply_team( typename std::enable_if< ! std::is_same< ResultType , void >::value , TaskMember * >::type t - , Kokkos::Impl::QthreadsTeamPolicyMember & member ) - { - typedef TaskMember< Kokkos::Qthreads , ResultType , FunctorType > derived_type ; - - derived_type & m = * static_cast< derived_type * >( t ); - - m.FunctorType::apply( member , m.m_result ); - } - - template< class FunctorType , class ResultType > - KOKKOS_INLINE_FUNCTION static - void apply_team( typename std::enable_if< std::is_same< ResultType , void >::value , TaskMember * >::type t - , Kokkos::Impl::QthreadsTeamPolicyMember & member ) - { - typedef TaskMember< Kokkos::Qthreads , ResultType , FunctorType > derived_type ; - - derived_type & m = * static_cast< derived_type * >( t ); - - m.FunctorType::apply( member ); - } -}; - -//---------------------------------------------------------------------------- -/** \brief Base class for tasks with a result value in the Qthreads execution space. - * - * The FunctorType must be void because this class is accessed by the - * Future class for the task and result value. - * - * Must be derived from TaskMember 'root class' so the Future class - * can correctly static_cast from the 'root class' to this class. - */ -template < class ResultType > -class TaskMember< Kokkos::Qthreads , ResultType , void > - : public TaskMember< Kokkos::Qthreads , void , void > -{ -public: - - ResultType m_result ; - - typedef const ResultType & get_result_type ; - - KOKKOS_INLINE_FUNCTION - get_result_type get() const { return m_result ; } - -protected: - - typedef TaskMember< Kokkos::Qthreads , void , void > task_root_type ; - typedef task_root_type::function_dealloc_type function_dealloc_type ; - typedef task_root_type::function_single_type function_single_type ; - typedef task_root_type::function_team_type function_team_type ; - - inline - TaskMember( const function_dealloc_type arg_dealloc - , const function_single_type arg_apply_single - , const function_team_type arg_apply_team - , volatile int & arg_active_count - , const unsigned arg_sizeof_derived - , const unsigned arg_dependence_capacity - ) - : task_root_type( & task_root_type::template verify_type< ResultType > - , arg_dealloc - , arg_apply_single - , arg_apply_team - , arg_active_count - , arg_sizeof_derived - , arg_dependence_capacity ) - , m_result() - {} -}; - -template< class ResultType , class FunctorType > -class TaskMember< Kokkos::Qthreads , ResultType , FunctorType > - : public TaskMember< Kokkos::Qthreads , ResultType , void > - , public FunctorType -{ -public: - - typedef FunctorType functor_type ; - - typedef TaskMember< Kokkos::Qthreads , void , void > task_root_type ; - typedef TaskMember< Kokkos::Qthreads , ResultType , void > task_base_type ; - typedef task_root_type::function_dealloc_type function_dealloc_type ; - typedef task_root_type::function_single_type function_single_type ; - typedef task_root_type::function_team_type function_team_type ; - - inline - TaskMember( const function_dealloc_type arg_dealloc - , const function_single_type arg_apply_single - , const function_team_type arg_apply_team - , volatile int & arg_active_count - , const unsigned arg_sizeof_derived - , const unsigned arg_dependence_capacity - , const functor_type & arg_functor - ) - : task_base_type( arg_dealloc - , arg_apply_single - , arg_apply_team - , arg_active_count - , arg_sizeof_derived - , arg_dependence_capacity ) - , functor_type( arg_functor ) - {} -}; - -} /* namespace Impl */ -} /* namespace Experimental */ -} /* namespace Kokkos */ - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Experimental { - -void wait( TaskPolicy< Kokkos::Qthreads > & ); - -template<> -class TaskPolicy< Kokkos::Qthreads > -{ -public: - - typedef Kokkos::Qthreads execution_space ; - typedef TaskPolicy execution_policy ; - typedef Kokkos::Impl::QthreadsTeamPolicyMember member_type ; - -private: - - typedef Impl::TaskMember< execution_space , void , void > task_root_type ; - - template< class FunctorType > - static inline - const task_root_type * get_task_root( const FunctorType * f ) - { - typedef Impl::TaskMember< execution_space , typename FunctorType::value_type , FunctorType > task_type ; - return static_cast< const task_root_type * >( static_cast< const task_type * >(f) ); - } - - template< class FunctorType > - static inline - task_root_type * get_task_root( FunctorType * f ) - { - typedef Impl::TaskMember< execution_space , typename FunctorType::value_type , FunctorType > task_type ; - return static_cast< task_root_type * >( static_cast< task_type * >(f) ); - } - - unsigned m_default_dependence_capacity ; - unsigned m_team_size ; - volatile int m_active_count_root ; - volatile int & m_active_count ; - -public: - - TaskPolicy - ( const unsigned arg_task_max_count - , const unsigned arg_task_max_size - , const unsigned arg_task_default_dependence_capacity = 4 - , const unsigned arg_task_team_size = 0 /* choose default */ - ); - - KOKKOS_FUNCTION TaskPolicy() = default ; - KOKKOS_FUNCTION TaskPolicy( TaskPolicy && rhs ) = default ; - KOKKOS_FUNCTION TaskPolicy( const TaskPolicy & rhs ) = default ; - KOKKOS_FUNCTION TaskPolicy & operator = ( TaskPolicy && rhs ) = default ; - KOKKOS_FUNCTION TaskPolicy & operator = ( const TaskPolicy & rhs ) = default ; - - //---------------------------------------- - - KOKKOS_INLINE_FUNCTION - int allocated_task_count() const { return m_active_count ; } - - template< class ValueType > - const Future< ValueType , execution_space > & - spawn( const Future< ValueType , execution_space > & f - , const bool priority = false ) const - { -#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) - f.m_task->spawn(); -#endif - return f ; - } - - // Create single-thread task - - template< class FunctorType > - KOKKOS_INLINE_FUNCTION - Future< typename FunctorType::value_type , execution_space > - task_create( const FunctorType & functor - , const unsigned dependence_capacity = ~0u ) const - { - typedef typename FunctorType::value_type value_type ; - typedef Impl::TaskMember< execution_space , value_type , FunctorType > task_type ; - return Future< value_type , execution_space >( -#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) - task_root_type::create_single< task_type > - ( functor - , m_active_count - , ( ~0u == dependence_capacity ? m_default_dependence_capacity : dependence_capacity ) - ) -#endif - ); - } - - template< class FunctorType > - Future< typename FunctorType::value_type , execution_space > - proc_create( const FunctorType & functor - , const unsigned dependence_capacity = ~0u ) const - { return task_create( functor , dependence_capacity ); } - - // Create thread-team task - - template< class FunctorType > - KOKKOS_INLINE_FUNCTION - Future< typename FunctorType::value_type , execution_space > - task_create_team( const FunctorType & functor - , const unsigned dependence_capacity = ~0u ) const - { - typedef typename FunctorType::value_type value_type ; - typedef Impl::TaskMember< execution_space , value_type , FunctorType > task_type ; - - return Future< value_type , execution_space >( -#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) - task_root_type::create_team< task_type > - ( functor - , m_active_count - , ( ~0u == dependence_capacity ? m_default_dependence_capacity : dependence_capacity ) - , 1 < m_team_size - ) -#endif - ); - } - - template< class FunctorType > - KOKKOS_INLINE_FUNCTION - Future< typename FunctorType::value_type , execution_space > - proc_create_team( const FunctorType & functor - , const unsigned dependence_capacity = ~0u ) const - { return task_create_team( functor , dependence_capacity ); } - - // Add dependence - template< class A1 , class A2 , class A3 , class A4 > - void add_dependence( const Future & after - , const Future & before - , typename std::enable_if - < std::is_same< typename Future::execution_space , execution_space >::value - && - std::is_same< typename Future::execution_space , execution_space >::value - >::type * = 0 - ) - { -#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) - after.m_task->add_dependence( before.m_task ); -#endif - } - - //---------------------------------------- - // Functions for an executing task functor to query dependences, - // set new dependences, and respawn itself. - - template< class FunctorType > - Future< void , execution_space > - get_dependence( const FunctorType * task_functor , int i ) const - { - return Future( -#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) - get_task_root(task_functor)->get_dependence(i) -#endif - ); - } - - template< class FunctorType > - int get_dependence( const FunctorType * task_functor ) const -#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) - { return get_task_root(task_functor)->get_dependence(); } -#else - { return 0 ; } -#endif - - template< class FunctorType > - void clear_dependence( FunctorType * task_functor ) const - { -#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) - get_task_root(task_functor)->clear_dependence(); -#endif - } - - template< class FunctorType , class A3 , class A4 > - void add_dependence( FunctorType * task_functor - , const Future & before - , typename std::enable_if - < std::is_same< typename Future::execution_space , execution_space >::value - >::type * = 0 - ) - { -#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) - get_task_root(task_functor)->add_dependence( before.m_task ); -#endif - } - - template< class FunctorType > - void respawn( FunctorType * task_functor - , const bool priority = false ) const - { -#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) - get_task_root(task_functor)->respawn(); -#endif - } - - template< class FunctorType > - void respawn_needing_memory( FunctorType * task_functor ) const - { -#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) - get_task_root(task_functor)->respawn(); -#endif - } - - static member_type & member_single(); - - friend void wait( TaskPolicy< Kokkos::Qthreads > & ); -}; - -} /* namespace Experimental */ -} // namespace Kokkos - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */ -#endif /* #define KOKKOS_QTHREADS_TASK_HPP */ - diff --git a/core/src/Qthreads/Kokkos_Qthreads_TaskQueue.hpp b/core/src/Qthreads/Kokkos_Qthreads_TaskQueue.hpp deleted file mode 100644 index b9b6dd86e41..00000000000 --- a/core/src/Qthreads/Kokkos_Qthreads_TaskQueue.hpp +++ /dev/null @@ -1,321 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#ifndef KOKKOS_QTHREADS_TASKQUEUE_HPP -#define KOKKOS_QTHREADS_TASKQUEUE_HPP - -#include -#if defined(KOKKOS_ENABLE_QTHREADS) && defined(KOKKOS_ENABLE_TASKPOLICY) - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - -/** \brief Manage task allocation, deallocation, and scheduling. - * - * Task execution is handled here directly for the Qthread implementation. - */ -template <> -class TaskQueue { - private: - using execution_space = Kokkos::Qthread; - using memory_space = Kokkos::HostSpace using device_type = - Kokkos::Device; - using memory_pool = Kokkos::MemoryPool; - using task_root_type = Kokkos::Impl::TaskBase; - - friend class Kokkos::TaskScheduler; - - struct Destroy { - TaskQueue* m_queue; - void destroy_shared_allocation(); - }; - - //---------------------------------------- - - enum : int { - TASK_STATE_NULL = 0, ///< Does not exist - TASK_STATE_CONSTRUCTING = 1, ///< Is under construction - TASK_STATE_WAITING = 2, ///< Is waiting for execution - TASK_STATE_EXECUTING = 4, ///< Is executing - TASK_STATE_RESPAWN = 8, ///< Requested respawn - TASK_STATE_COMPLETE = 16 ///< Execution is complete - }; - - // Queue is organized as [ priority ][ type ] - - memory_pool m_memory; - unsigned m_team_size; // Number of threads in a team - long m_accum_alloc; // Accumulated number of allocations - int m_count_alloc; // Current number of allocations - int m_max_alloc; // Maximum number of allocations - int m_ready_count; // Number of ready or executing - - //---------------------------------------- - - ~TaskQueue(); - TaskQueue() = delete; - TaskQueue(TaskQueue&&) = delete; - TaskQueue(TaskQueue const&) = delete; - TaskQueue& operator=(TaskQueue&&) = delete; - TaskQueue& operator=(TaskQueue const&) = delete; - - TaskQueue(const memory_space& arg_space, - unsigned const arg_memory_pool_capacity, - unsigned const arg_memory_pool_superblock_capacity_log2); - - // Schedule a task - // Precondition: - // task is not executing - // task->m_next is the dependence or zero - // Postcondition: - // task->m_next is linked list membership - KOKKOS_FUNCTION - void schedule(task_root_type* const); - - // Reschedule a task - // Precondition: - // task is in Executing state - // task->m_next == LockTag - // Postcondition: - // task is in Executing-Respawn state - // task->m_next == 0 (no dependence) - KOKKOS_FUNCTION - void reschedule(task_root_type*); - - // Complete a task - // Precondition: - // task is not executing - // task->m_next == LockTag => task is complete - // task->m_next != LockTag => task is respawn - // Postcondition: - // task->m_wait == LockTag => task is complete - // task->m_wait != LockTag => task is waiting - KOKKOS_FUNCTION - void complete(task_root_type*); - - public: - // If and only if the execution space is a single thread - // then execute ready tasks. - KOKKOS_INLINE_FUNCTION - void iff_single_thread_recursive_execute() { -#if defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST) - specialization::iff_single_thread_recursive_execute(this); -#endif - } - - void execute() { specialization::execute(this); } - - template - void proc_set_apply(typename task_root_type::function_type* ptr) { - specialization::template proc_set_apply(ptr); - } - - // Assign task pointer with reference counting of assigned tasks - template - KOKKOS_FUNCTION static void assign( - TaskBase** const lhs, - TaskBase* const rhs) { - using task_lhs = TaskBase; -#if 0 - { - printf( "assign( 0x%lx { 0x%lx %d %d }, 0x%lx { 0x%lx %d %d } )\n", - uintptr_t( lhs ? *lhs : 0 ), - uintptr_t( lhs && *lhs ? (*lhs)->m_next : 0 ), - int( lhs && *lhs ? (*lhs)->m_task_type : 0 ), - int( lhs && *lhs ? (*lhs)->m_ref_count : 0 ), - uintptr_t(rhs), - uintptr_t( rhs ? rhs->m_next : 0 ), - int( rhs ? rhs->m_task_type : 0 ), - int( rhs ? rhs->m_ref_count : 0 ) - ); - fflush( stdout ); - } -#endif - - if (*lhs) { - const int count = Kokkos::atomic_fetch_add(&((*lhs)->m_ref_count), -1); - - if ((1 == count) && ((*lhs)->m_state == TASK_STATE_COMPLETE)) { - // Reference count is zero and task is complete, deallocate. - (*lhs)->m_queue->deallocate(*lhs, (*lhs)->m_alloc_size); - } else if (count <= 1) { - Kokkos::abort( - "TaskScheduler task has negative reference count or is incomplete"); - } - - // GEM: Should I check that there are no dependences here? Can the state - // be set to complete while there are still dependences? - } - - if (rhs) { - Kokkos::atomic_fetch_add(&(rhs->m_ref_count), 1); - } - - // Force write of *lhs - - *static_cast(lhs) = rhs; - - Kokkos::memory_fence(); - } - - KOKKOS_FUNCTION - size_t allocate_block_size(size_t n); ///< Actual block size allocated - - KOKKOS_FUNCTION - void* allocate(size_t n); ///< Allocate from the memory pool - - KOKKOS_FUNCTION - void deallocate(void* p, size_t n); ///< Deallocate to the memory pool -}; - -} /* namespace Impl */ -} /* namespace Kokkos */ - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - -template <> -class TaskBase { - public: - enum : int16_t { - TaskTeam = TaskBase::TaskTeam, - TaskSingle = TaskBase::TaskSingle, - Aggregate = TaskBase::Aggregate - }; - - enum : uintptr_t { - LockTag = TaskBase::LockTag, - EndTag = TaskBase::EndTag - }; - - using execution_space = Kokkos::Qthread; - using queue_type = TaskQueue; - - template - friend class Kokkos::TaskScheduler; - - typedef void (*function_type)(TaskBase*, void*); - - // sizeof(TaskBase) == 48 - - function_type m_apply; ///< Apply function pointer - queue_type* m_queue; ///< Queue in which this task resides - TaskBase* m_dep; ///< Dependence - int32_t m_ref_count; ///< Reference count - int32_t m_alloc_size; ///< Allocation size - int32_t m_dep_count; ///< Aggregate's number of dependences - int16_t m_task_type; ///< Type of task - int16_t m_priority; ///< Priority of runnable task - aligned_t m_qfeb; ///< Qthread full/empty bit - int m_state; ///< State of the task - - TaskBase(TaskBase&&) = delete; - TaskBase(const TaskBase&) = delete; - TaskBase& operator=(TaskBase&&) = delete; - TaskBase& operator=(const TaskBase&) = delete; - - KOKKOS_INLINE_FUNCTION ~TaskBase() = default; - - KOKKOS_INLINE_FUNCTION - constexpr TaskBase() noexcept - : m_apply(0), - m_queue(0), - m_dep(0), - m_ref_count(0), - m_alloc_size(0), - m_dep_count(0), - m_task_type(TaskSingle), - m_priority(1 /* TaskRegularPriority */), - m_qfeb(0), - m_state(queue_type::TASK_STATE_CONSTRUCTING) { - qthread_empty(&m_qfeb); // Set to full when complete - } - - //---------------------------------------- - - static aligned_t qthread_func(void* arg); - - KOKKOS_INLINE_FUNCTION - TaskBase** aggregate_dependences() { - return reinterpret_cast(this + 1); - } - - KOKKOS_INLINE_FUNCTION - void requested_respawn() { return m_state == queue_type::TASK_STATE_RESPAWN; } - - KOKKOS_INLINE_FUNCTION - void add_dependence(TaskBase* dep) { - // Assign dependence to m_dep. It will be processed in the subsequent - // call to schedule. Error if the dependence is reset. - if (0 != Kokkos::atomic_exchange(&m_dep, dep)) { - Kokkos::abort("TaskScheduler ERROR: resetting task dependence"); - } - - if (0 != dep) { - // The future may be destroyed upon returning from this call - // so increment reference count to track this assignment. - Kokkos::atomic_fetch_add(&(dep->m_ref_count), 1); - } - } - - using get_return_type = void; - - KOKKOS_INLINE_FUNCTION - get_return_type get() const {} -}; - -} /* namespace Impl */ -} /* namespace Kokkos */ - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -#endif /* #if defined( KOKKOS_ENABLE_TASKPOLICY ) */ -#endif // KOKKOS_QTHREADS_TASKQUEUE_HPP diff --git a/core/src/Qthreads/Kokkos_Qthreads_TaskQueue_impl.hpp b/core/src/Qthreads/Kokkos_Qthreads_TaskQueue_impl.hpp deleted file mode 100644 index 7a91103ae77..00000000000 --- a/core/src/Qthreads/Kokkos_Qthreads_TaskQueue_impl.hpp +++ /dev/null @@ -1,398 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#ifndef KOKKOS_QTHREADS_TASKQUEUE_IMPL_HPP -#define KOKKOS_QTHREADS_TASKQUEUE_IMPL_HPP - -#include -#if defined(KOKKOS_ENABLE_QTHREADS) && defined(KOKKOS_ENABLE_TASKPOLICY) - -namespace Kokkos { -namespace Impl { - -//---------------------------------------------------------------------------- - -template -void TaskQueue::Destroy::destroy_shared_allocation() { - m_queue->~TaskQueue(); -} - -//---------------------------------------------------------------------------- - -template -TaskQueue::TaskQueue( - const TaskQueue::memory_space &arg_space, - unsigned const arg_memory_pool_capacity, - unsigned const arg_memory_pool_superblock_capacity_log2) - : m_memory(arg_space, arg_memory_pool_capacity, - arg_memory_pool_superblock_capacity_log2) - m_team_size(unsigned(qthread_num_workers_local(NO_SHEPHERD))), - m_accum_alloc(0), - m_count_alloc(0), - m_max_alloc(0), - m_ready_count(0) {} - -//---------------------------------------------------------------------------- - -template -TaskQueue::~TaskQueue() { - // Verify that ready count is zero. - if (0 != m_ready_count) { - Kokkos::abort("TaskQueue::~TaskQueue ERROR: has ready or executing tasks"); - } -} - -//---------------------------------------------------------------------------- - -template -KOKKOS_FUNCTION size_t TaskQueue::allocate_block_size(size_t n) { - return m_memory.allocate_block_size(n); -} - -//---------------------------------------------------------------------------- - -template -KOKKOS_FUNCTION void *TaskQueue::allocate(size_t n) { - void *const p = m_memory.allocate(n); - - if (p) { - Kokkos::atomic_increment(&m_accum_alloc); - Kokkos::atomic_increment(&m_count_alloc); - - if (m_max_alloc < m_count_alloc) m_max_alloc = m_count_alloc; - } - - return p; -} - -//---------------------------------------------------------------------------- - -template -KOKKOS_FUNCTION void TaskQueue::deallocate(void *p, size_t n) { - m_memory.deallocate(p, n); - Kokkos::atomic_decrement(&m_count_alloc); -} - -//---------------------------------------------------------------------------- - -template -KOKKOS_FUNCTION void TaskQueue::schedule( - TaskQueue::task_root_type *const task) { -#if 0 - printf( "schedule( 0x%lx { %d %d %d }\n", - uintptr_t(task), - task->m_task_type, - task->m_priority, - task->m_ref_count ); -#endif - - // The task has been constructed and is waiting to be executed. - task->m_state = TASK_STATE_WAITING; - - if (task->m_task_type != task_root_type::Aggregate) { - // Scheduling a single or team task. - - // Increment active task count before spawning. - Kokkos::atomic_increment(m_ready_count); - - if (task->m_dep == 0) { - // Schedule a task with no dependences. - - if (task_root_type::TaskTeam == task->m_task_type && m_team_size > 1) { - // If more than one shepherd spawn on a shepherd other than this - // shepherd - const int num_shepherd = qthread_num_shepherds(); - const int this_shepherd = qthread_shep(); - int spawn_shepherd = (this_shepherd + 1) % num_shepherd; - -#if 0 - fprintf( stdout, - "worker(%d.%d) task 0x%.12lx spawning on shepherd(%d) clone(%d)\n", - qthread_shep(), - qthread_worker_local(NULL), - reinterpret_cast(this), - spawn_shepherd, - m_team_size - 1 - ); - fflush(stdout); -#endif - - qthread_spawn_cloneable( - &task_root_type::qthread_func, task, 0, NULL, - 0, // no depenedences - 0, // dependences array - spawn_shepherd, - unsigned(QTHREAD_SPAWN_SIMPLE | QTHREAD_SPAWN_LOCAL_PRIORITY), - m_team_size - 1); - } else { - qthread_spawn( - &task_root_type::qthread_func, task, 0, NULL, - 0, // no depenedences - 0, // dependences array - NO_SHEPHERD, - QTHREAD_SPAWN_SIMPLE /* allows optimization for non-blocking task */ - ); - } - } else if (task->m_dep->m_task_type != task_root_type::Aggregate) - // Malloc the precondition array to pass to qthread_spawn(). For - // non-aggregate tasks, it is a single pointer since there are no - // dependences. Qthreads will eventually free this allocation so memory - // will not be leaked. Is malloc thread-safe? Should this call be - // guarded? The memory can't be allocated from the pool allocator because - // Qthreads frees it using free(). - aligned_t **qprecon = (aligned_t **)malloc(sizeof(aligned_t *)); - - *qprecon = reinterpret_cast(uintptr_t(m_dep_size)); - - if (task->m_task_type == task_root_type::TaskTeam && m_team_size > 1) { - // If more than one shepherd spawn on a shepherd other than this shepherd - const int num_shepherd = qthread_num_shepherds(); - const int this_shepherd = qthread_shep(); - int spawn_shepherd = (this_shepherd + 1) % num_shepherd; - -#if 0 - fprintf( stdout, - "worker(%d.%d) task 0x%.12lx spawning on shepherd(%d) clone(%d)\n", - qthread_shep(), - qthread_worker_local(NULL), - reinterpret_cast(this), - spawn_shepherd, - m_team_size - 1 - ); - fflush(stdout); -#endif - - qthread_spawn_cloneable( - &Task::qthread_func, this, 0, NULL, m_dep_size, - qprecon, /* dependences */ - spawn_shepherd, - unsigned(QTHREAD_SPAWN_SIMPLE | QTHREAD_SPAWN_LOCAL_PRIORITY), - m_team_size - 1); - } else { - qthread_spawn( - &Task::qthread_func, /* function */ - this, /* function argument */ - 0, NULL, m_dep_size, qprecon, /* dependences */ - NO_SHEPHERD, - QTHREAD_SPAWN_SIMPLE /* allows optimization for non-blocking task */ - ); - } - } else { - // GEM: How do I handle an aggregate (when_all) task? - } -} - -//---------------------------------------------------------------------------- - -template -KOKKOS_FUNCTION void TaskQueue::reschedule(task_root_type *task) { - // Precondition: - // task is in Executing state - // task->m_next == LockTag - // - // Postcondition: - // task is in Executing-Respawn state - // task->m_next == 0 (no dependence) - - task_root_type *const zero = (task_root_type *)0; - task_root_type *const lock = (task_root_type *)task_root_type::LockTag; - - if (lock != Kokkos::atomic_exchange(&task->m_next, zero)) { - Kokkos::abort("TaskScheduler::respawn ERROR: already respawned"); - } -} - -//---------------------------------------------------------------------------- - -template -KOKKOS_FUNCTION void TaskQueue::complete( - TaskQueue::task_root_type *task) { - // Complete a runnable task that has finished executing - // or a when_all task when all of its dependeneces are complete. - - task_root_type *const zero = (task_root_type *)0; - task_root_type *const lock = (task_root_type *)task_root_type::LockTag; - task_root_type *const end = (task_root_type *)task_root_type::EndTag; - -#if 0 - printf( "complete( 0x%lx { 0x%lx 0x%lx %d %d %d }\n", - uintptr_t(task), - uintptr_t(task->m_wait), - uintptr_t(task->m_next), - task->m_task_type, - task->m_priority, - task->m_ref_count - ); - fflush( stdout ); -#endif - - const bool runnable = task_root_type::Aggregate != task->m_task_type; - - //---------------------------------------- - - if (runnable && lock != task->m_next) { - // Is a runnable task has finished executing and requested respawn. - // Schedule the task for subsequent execution. - - schedule(task); - } - //---------------------------------------- - else { - // Is either an aggregate or a runnable task that executed - // and did not respawn. Transition this task to complete. - - // If 'task' is an aggregate then any of the runnable tasks that - // it depends upon may be attempting to complete this 'task'. - // Must only transition a task once to complete status. - // This is controled by atomically locking the wait queue. - - // Stop other tasks from adding themselves to this task's wait queue - // by locking the head of this task's wait queue. - - task_root_type *x = Kokkos::atomic_exchange(&task->m_wait, lock); - - if (x != (task_root_type *)lock) { - // This thread has transitioned this 'task' to complete. - // 'task' is no longer in a queue and is not executing - // so decrement the reference count from 'task's creation. - // If no other references to this 'task' then it will be deleted. - - TaskQueue::assign(&task, zero); - - // This thread has exclusive access to the wait list so - // the concurrency-safe pop_task function is not needed. - // Schedule the tasks that have been waiting on the input 'task', - // which may have been deleted. - - while (x != end) { - // Set x->m_next = zero <= no dependence - - task_root_type *const next = - (task_root_type *)Kokkos::atomic_exchange(&x->m_next, zero); - - schedule(x); - - x = next; - } - } - } - - if (runnable) { - // A runnable task was popped from a ready queue and executed. - // If respawned into a ready queue then the ready count was incremented - // so decrement whether respawned or not. - Kokkos::atomic_decrement(&m_ready_count); - } -} - -//---------------------------------------------------------------------------- - -template <> -aligned_t TaskBase::qthread_func(void *arg) { - using execution_space = Kokkos::Qthreads; - using task_root_type = TaskBase; - using Member = Kokkos::Impl::QthreadsTeamPolicyMember; - - task_root_type *const task = reinterpret_cast(arg); - - // First member of the team change state to executing. - // Use compare-exchange to avoid race condition with a respawn. - Kokkos::atomic_compare_exchange_strong(&task->m_state, - queue_type::TASK_STATE_WAITING, - queue_type::TASK_STATE_EXECUTING); - - if (task_root_type::TaskTeam == task->m_task_type) { - if (1 < task->m_queue->m_team_size) { - // Team task with team size of more than 1. - Member::TaskTeam task_team_tag; - - // Initialize team size and rank with shephered info - Member member(task_team_tag); - - (*task->m_apply)(task, &member); - -#if 0 - fprintf( stdout, - "worker(%d.%d) task 0x%.12lx executed by member(%d:%d)\n", - qthread_shep(), - qthread_worker_local(NULL), - reinterpret_cast(task), - member.team_rank(), - member.team_size() - ); - fflush(stdout); -#endif - - member.team_barrier(); - if (member.team_rank() == 0) task->closeout(); - member.team_barrier(); - } else { - // Team task with team size of 1. - Member member; - (*task->m_apply)(task, &member); - task->closeout(); - } - } else { - (*task->m_apply)(task); - task->closeout(); - } - -#if 0 -fprintf( stdout - , "worker(%d.%d) task 0x%.12lx return\n" - , qthread_shep() - , qthread_worker_local(NULL) - , reinterpret_cast(task) - ); -fflush(stdout); -#endif - - return 0; -} - -} /* namespace Impl */ -} /* namespace Kokkos */ - -#endif /* #if defined( KOKKOS_ENABLE_TASKPOLICY ) */ -#endif // KOKKOS_QTHREADS_TASKQUEUE_IMPL_HPP diff --git a/core/src/Qthreads/README b/core/src/Qthreads/README deleted file mode 100644 index e35b1f698ec..00000000000 --- a/core/src/Qthreads/README +++ /dev/null @@ -1,24 +0,0 @@ - -# This Qthreads back-end uses an experimental branch of the Qthreads repository with special #define options. - -# Cloning repository and branch: - -git clone git@github.com:Qthreads/qthreads.git qthreads - -cd qthreads - -# checkout branch with "cloned tasks" - -git checkout dev-kokkos - -# Configure/autogen - -sh autogen.sh - -# configure with 'hwloc' installation: - -./configure CFLAGS="-DCLONED_TASKS -DQTHREAD_LOCAL_PRIORITY" --with-hwloc=${HWLOCDIR} --prefix=${INSTALLDIR} - -# install - -make install diff --git a/core/src/ROCm/Kokkos_ROCm_Parallel.hpp b/core/src/ROCm/Kokkos_ROCm_Parallel.hpp index 4ebcdc6f958..8a4d8c07d0b 100644 --- a/core/src/ROCm/Kokkos_ROCm_Parallel.hpp +++ b/core/src/ROCm/Kokkos_ROCm_Parallel.hpp @@ -293,7 +293,6 @@ class TeamPolicyInternal } KOKKOS_INLINE_FUNCTION int team_size() const { return (m_team_size > 0) ? m_team_size : Impl::get_max_tile_thread(); - ; } KOKKOS_INLINE_FUNCTION int league_size() const { return m_league_size; } @@ -837,7 +836,7 @@ class ParallelFor, if (total_size == 0) return; const auto shared_size = FunctorTeamShmemSize::value(f, team_size); - char* scratch = NULL; + char* scratch = nullptr; char* shared = (char*)rocm_device_allocate(shared_size * league_size + scratch_size0 * league_size); if (0 < scratch_size1) @@ -889,7 +888,7 @@ class ParallelReduce, ReducerType, const FunctorType& f, const Policy& policy, const ViewType& result_view, typename std::enable_if::value && !Kokkos::is_reducer_type::value, - void*>::type = NULL) { + void*>::type = nullptr) { typedef typename Policy::work_tag Tag; typedef Kokkos::Impl::FunctorValueTraits ValueTraits; typedef Kokkos::Impl::FunctorValueInit ValueInit; @@ -1106,7 +1105,7 @@ class ParallelReduce, ReducerType, ParallelReduce(const FunctorType& arg_functor, const Policy& arg_policy, const HostViewType& arg_result, typename std::enable_if::value, - void*>::type = NULL) + void*>::type = nullptr) : m_functor(arg_functor), m_policy(arg_policy), m_reducer(InvalidType()), @@ -1149,7 +1148,7 @@ class ParallelReduce, ReducerType, const FunctorType& f, const Policy& policy, const ViewType& result_view, typename std::enable_if::value && !Kokkos::is_reducer_type::value, - void*>::type = NULL) { + void*>::type = nullptr) { const int league_size = policy.league_size(); const int team_size = policy.team_size(f); const int vector_length = policy.vector_length(); @@ -1172,7 +1171,7 @@ class ParallelReduce, ReducerType, FunctorTeamShmemSize::value(f, team_size); char* shared; - char* scratch = NULL; + char* scratch = nullptr; shared = (char*)rocm_device_allocate(league_size * (shared_size + scratch_size0)); @@ -1223,7 +1222,7 @@ class ParallelReduce, ReducerType, const int scratch_size1 = policy.scratch_size(1, team_size); char* shared; - char* scratch = NULL; + char* scratch = nullptr; shared = (char*)rocm_device_allocate((shared_size + scratch_size0) * league_size); if (0 < scratch_size1) diff --git a/core/src/ROCm/Kokkos_ROCm_Space.cpp b/core/src/ROCm/Kokkos_ROCm_Space.cpp index 501567406dd..1a79425f493 100644 --- a/core/src/ROCm/Kokkos_ROCm_Space.cpp +++ b/core/src/ROCm/Kokkos_ROCm_Space.cpp @@ -409,7 +409,7 @@ void* SharedAllocationRecord:: allocate_tracked(const Kokkos::Experimental::ROCmSpace& arg_space, const std::string& arg_alloc_label, const size_t arg_alloc_size) { - if (!arg_alloc_size) return (void*)0; + if (!arg_alloc_size) return nullptr; SharedAllocationRecord* const r = allocate(arg_space, arg_alloc_label, arg_alloc_size); @@ -621,7 +621,7 @@ namespace Kokkos { namespace { void* rocm_resize_scratch_space(size_t bytes, bool force_shrink) { - static void* ptr = NULL; + static void* ptr = nullptr; static size_t current_size = 0; if (current_size == 0) { current_size = bytes; diff --git a/core/src/ROCm/Kokkos_ROCm_Tile.hpp b/core/src/ROCm/Kokkos_ROCm_Tile.hpp index da95064eea8..3d80b4d4409 100644 --- a/core/src/ROCm/Kokkos_ROCm_Tile.hpp +++ b/core/src/ROCm/Kokkos_ROCm_Tile.hpp @@ -293,8 +293,9 @@ struct tile_buffer { } template - typename Impl::enable_if<(sizeof(Q) <= 8), void>::type action_at( - std::size_t i, Action a) [[hc]] { + typename std::enable_if<(sizeof(Q) <= 8), void>::type action_at(std::size_t i, + Action a) + [[hc]] { element_type* value = (*this)[i]; #if defined(ROCM15) a(value); @@ -316,7 +317,7 @@ struct tile_buffer { } template - typename Impl::enable_if::type action_at( + typename std::enable_if::type action_at( std::size_t i, Action a) [[hc]] { element_type* value = (*this)[i]; #if defined(ROCM15) diff --git a/core/src/ROCm/Kokkos_ROCm_Vectorization.hpp b/core/src/ROCm/Kokkos_ROCm_Vectorization.hpp index 157c15695b0..ae52f8da346 100644 --- a/core/src/ROCm/Kokkos_ROCm_Vectorization.hpp +++ b/core/src/ROCm/Kokkos_ROCm_Vectorization.hpp @@ -163,7 +163,7 @@ float shfl(const float& val, const int& srcLane, const int& width) { template KOKKOS_INLINE_FUNCTION Scalar shfl(const Scalar& val, const int& srcLane, - const typename Impl::enable_if<(sizeof(Scalar) == 4), int>::type& width) { + const typename std::enable_if<(sizeof(Scalar) == 4), int>::type& width) { Scalar tmp1 = val; float tmp = *reinterpret_cast(&tmp1); tmp = __shfl(tmp, srcLane, width); @@ -182,7 +182,7 @@ double shfl(const double& val, const int& srcLane, const int& width) { template KOKKOS_INLINE_FUNCTION Scalar shfl(const Scalar& val, const int& srcLane, - const typename Impl::enable_if<(sizeof(Scalar) == 8), int>::type& width) { + const typename std::enable_if<(sizeof(Scalar) == 8), int>::type& width) { int lo = __double2loint(*reinterpret_cast(&val)); int hi = __double2hiint(*reinterpret_cast(&val)); lo = __shfl(lo, srcLane, width); @@ -194,7 +194,7 @@ shfl(const Scalar& val, const int& srcLane, template KOKKOS_INLINE_FUNCTION Scalar shfl(const Scalar& val, const int& srcLane, - const typename Impl::enable_if<(sizeof(Scalar) > 8), int>::type& width) { + const typename std::enable_if<(sizeof(Scalar) > 8), int>::type& width) { Impl::shfl_union s_val; Impl::shfl_union r_val; s_val = val; @@ -217,7 +217,7 @@ float shfl_down(const float& val, const int& delta, const int& width) { template KOKKOS_INLINE_FUNCTION Scalar shfl_down( const Scalar& val, const int& delta, - const typename Impl::enable_if<(sizeof(Scalar) == 4), int>::type& width) { + const typename std::enable_if<(sizeof(Scalar) == 4), int>::type& width) { Scalar tmp1 = val; float tmp = *reinterpret_cast(&tmp1); tmp = __shfl_down(tmp, delta, width); @@ -245,7 +245,7 @@ double shfl_down(const double& val, const int& delta, const int& width) { template KOKKOS_INLINE_FUNCTION Scalar shfl_down( const Scalar& val, const int& delta, - const typename Impl::enable_if<(sizeof(Scalar) == 8), int>::type& width) { + const typename std::enable_if<(sizeof(Scalar) == 8), int>::type& width) { int lo = __double2loint(*reinterpret_cast(&val)); int hi = __double2hiint(*reinterpret_cast(&val)); lo = __shfl_down(lo, delta, width); @@ -257,7 +257,7 @@ KOKKOS_INLINE_FUNCTION Scalar shfl_down( template KOKKOS_INLINE_FUNCTION Scalar shfl_down( const Scalar& val, const int& delta, - const typename Impl::enable_if<(sizeof(Scalar) > 8), int>::type& width) { + const typename std::enable_if<(sizeof(Scalar) > 8), int>::type& width) { Impl::shfl_union s_val; Impl::shfl_union r_val; s_val = val; @@ -280,7 +280,7 @@ float shfl_up(const float& val, const int& delta, const int& width) { template KOKKOS_INLINE_FUNCTION Scalar shfl_up( const Scalar& val, const int& delta, - const typename Impl::enable_if<(sizeof(Scalar) == 4), int>::type& width) { + const typename std::enable_if<(sizeof(Scalar) == 4), int>::type& width) { Scalar tmp1 = val; float tmp = *reinterpret_cast(&tmp1); tmp = __shfl_up(tmp, delta, width); @@ -299,7 +299,7 @@ double shfl_up(const double& val, const int& delta, const int& width) { template KOKKOS_INLINE_FUNCTION Scalar shfl_up( const Scalar& val, const int& delta, - const typename Impl::enable_if<(sizeof(Scalar) == 8), int>::type& width) { + const typename std::enable_if<(sizeof(Scalar) == 8), int>::type& width) { int lo = __double2loint(*reinterpret_cast(&val)); int hi = __double2hiint(*reinterpret_cast(&val)); lo = __shfl_up(lo, delta, width); @@ -309,9 +309,9 @@ KOKKOS_INLINE_FUNCTION Scalar shfl_up( } template -KOKKOS_INLINE_FUNCTION Scalar shfl_up( - const Scalar& val, const int& delta, - const typename Impl::enable_if<(sizeof(Scalar) > 8), int>::type& width) { +KOKKOS_INLINE_FUNCTION Scalar +shfl_up(const Scalar& val, const int& delta, + const typename std::enable_if<(sizeof(Scalar) > 8), int>::type& width) { Impl::shfl_union s_val; Impl::shfl_union r_val; s_val = val; diff --git a/core/src/Threads/Kokkos_ThreadsExec.cpp b/core/src/Threads/Kokkos_ThreadsExec.cpp index 33df9e20147..7adfd127de4 100644 --- a/core/src/Threads/Kokkos_ThreadsExec.cpp +++ b/core/src/Threads/Kokkos_ThreadsExec.cpp @@ -65,7 +65,7 @@ namespace Impl { namespace { ThreadsExec s_threads_process; -ThreadsExec *s_threads_exec[ThreadsExec::MAX_THREAD_COUNT] = {0}; +ThreadsExec *s_threads_exec[ThreadsExec::MAX_THREAD_COUNT] = {nullptr}; pthread_t s_threads_pid[ThreadsExec::MAX_THREAD_COUNT] = {0}; std::pair s_threads_coord[ThreadsExec::MAX_THREAD_COUNT]; @@ -75,11 +75,9 @@ unsigned s_current_reduce_size = 0; unsigned s_current_shared_size = 0; void (*volatile s_current_function)(ThreadsExec &, const void *); -const void *volatile s_current_function_arg = 0; +const void *volatile s_current_function_arg = nullptr; struct Sentinel { - Sentinel() {} - ~Sentinel() { if (s_thread_pool_size[0] || s_thread_pool_size[1] || s_thread_pool_size[2] || s_current_reduce_size || @@ -129,8 +127,8 @@ void ThreadsExec::driver(void) { } ThreadsExec::ThreadsExec() - : m_pool_base(0), - m_scratch(0), + : m_pool_base(nullptr), + m_scratch(nullptr), m_scratch_reduce_end(0), m_scratch_thread_end(0), m_numa_rank(0), @@ -142,7 +140,7 @@ ThreadsExec::ThreadsExec() if (&s_threads_process != this) { // A spawned thread - ThreadsExec *const nil = 0; + ThreadsExec *const nil = nullptr; // Which entry in 's_threads_exec', possibly determined from hwloc binding const int entry = @@ -192,12 +190,12 @@ ThreadsExec::~ThreadsExec() { if (m_scratch) { Record *const r = Record::get_record(m_scratch); - m_scratch = 0; + m_scratch = nullptr; Record::decrement(r); } - m_pool_base = 0; + m_pool_base = nullptr; m_scratch_reduce_end = 0; m_scratch_thread_end = 0; m_numa_rank = 0; @@ -209,7 +207,7 @@ ThreadsExec::~ThreadsExec() { m_pool_state = ThreadsExec::Terminating; if (&s_threads_process != this && entry < MAX_THREAD_COUNT) { - ThreadsExec *const nil = 0; + ThreadsExec *const nil = nullptr; atomic_compare_exchange(s_threads_exec + entry, this, nil); @@ -223,13 +221,13 @@ ThreadsExec *ThreadsExec::get_thread(const int init_thread_rank) { ThreadsExec *const th = init_thread_rank < s_thread_pool_size[0] ? s_threads_exec[s_thread_pool_size[0] - (init_thread_rank + 1)] - : 0; + : nullptr; - if (0 == th || th->m_pool_rank != init_thread_rank) { + if (nullptr == th || th->m_pool_rank != init_thread_rank) { std::ostringstream msg; msg << "Kokkos::Impl::ThreadsExec::get_thread ERROR : " << "thread " << init_thread_rank << " of " << s_thread_pool_size[0]; - if (0 == th) { + if (nullptr == th) { msg << " does not exist"; } else { msg << " has wrong thread_rank " << th->m_pool_rank; @@ -299,8 +297,8 @@ void ThreadsExec::fence() { ThreadsExec::Active); } - s_current_function = 0; - s_current_function_arg = 0; + s_current_function = nullptr; + s_current_function_arg = nullptr; // Make sure function and arguments are cleared before // potentially re-activating threads with a subsequent launch. @@ -364,7 +362,7 @@ bool ThreadsExec::wake() { ThreadsExec::global_unlock(); if (s_threads_process.m_pool_base) { - execute_sleep(s_threads_process, 0); + execute_sleep(s_threads_process, nullptr); s_threads_process.m_pool_state = ThreadsExec::Inactive; } @@ -394,12 +392,12 @@ void ThreadsExec::execute_serial(void (*func)(ThreadsExec &, const void *)) { if (s_threads_process.m_pool_base) { s_threads_process.m_pool_state = ThreadsExec::Active; - (*func)(s_threads_process, 0); + (*func)(s_threads_process, nullptr); s_threads_process.m_pool_state = ThreadsExec::Inactive; } - s_current_function_arg = 0; - s_current_function = 0; + s_current_function_arg = nullptr; + s_current_function = nullptr; // Make sure function and arguments are cleared before proceeding. memory_fence(); @@ -417,7 +415,7 @@ void ThreadsExec::execute_resize_scratch(ThreadsExec &exec, const void *) { if (exec.m_scratch) { Record *const r = Record::get_record(exec.m_scratch); - exec.m_scratch = 0; + exec.m_scratch = nullptr; Record::decrement(r); } @@ -508,7 +506,7 @@ void ThreadsExec::print_configuration(std::ostream &s, const bool detail) { s << " threads[" << s_thread_pool_size[0] << "]" << " threads_per_numa[" << s_thread_pool_size[1] << "]" << " threads_per_core[" << s_thread_pool_size[2] << "]"; - if (0 == s_threads_process.m_pool_base) { + if (nullptr == s_threads_process.m_pool_base) { s << " Asynchronous"; } s << " ReduceScratch[" << s_current_reduce_size << "]" @@ -547,19 +545,20 @@ void ThreadsExec::print_configuration(std::ostream &s, const bool detail) { //---------------------------------------------------------------------------- -int ThreadsExec::is_initialized() { return 0 != s_threads_exec[0]; } +int ThreadsExec::is_initialized() { return nullptr != s_threads_exec[0]; } void ThreadsExec::initialize(unsigned thread_count, unsigned use_numa_count, unsigned use_cores_per_numa, bool allow_asynchronous_threadpool) { - static const Sentinel sentinel; + // need to provide an initializer for Intel compilers + static const Sentinel sentinel = {}; const bool is_initialized = 0 != s_thread_pool_size[0]; unsigned thread_spawn_failed = 0; for (int i = 0; i < ThreadsExec::MAX_THREAD_COUNT; i++) - s_threads_exec[i] = NULL; + s_threads_exec[i] = nullptr; if (!is_initialized) { // If thread_count, use_numa_count, or use_cores_per_numa are zero @@ -611,7 +610,7 @@ void ThreadsExec::initialize(unsigned thread_count, unsigned use_numa_count, // Spawn thread executing the 'driver()' function. // Wait until spawned thread has attempted to initialize. - // If spawning and initialization is successfull then + // If spawning and initialization is successful then // an entry in 's_threads_exec' will be assigned. if (ThreadsExec::spawn()) { wait_yield(s_threads_process.m_pool_state, ThreadsExec::Inactive); @@ -631,15 +630,15 @@ void ThreadsExec::initialize(unsigned thread_count, unsigned use_numa_count, } } - s_current_function = 0; - s_current_function_arg = 0; + s_current_function = nullptr; + s_current_function_arg = nullptr; s_threads_process.m_pool_state = ThreadsExec::Inactive; memory_fence(); if (!thread_spawn_failed) { // Bind process to the core on which it was located before spawning - // occured + // occurred if (hwloc_can_bind) { Kokkos::hwloc::bind_this_thread(proc_coord); } @@ -659,7 +658,7 @@ void ThreadsExec::initialize(unsigned thread_count, unsigned use_numa_count, s_threads_process.m_pool_rank, s_threads_process.m_pool_size); s_threads_pid[s_threads_process.m_pool_rank] = pthread_self(); } else { - s_threads_process.m_pool_base = 0; + s_threads_process.m_pool_base = nullptr; s_threads_process.m_pool_rank = 0; s_threads_process.m_pool_size = 0; s_threads_process.m_pool_fan_size = 0; @@ -740,7 +739,7 @@ void ThreadsExec::finalize() { if (s_threads_process.m_pool_base) { (&s_threads_process)->~ThreadsExec(); - s_threads_exec[0] = 0; + s_threads_exec[0] = nullptr; } if (Kokkos::hwloc::can_bind_threads()) { @@ -754,7 +753,7 @@ void ThreadsExec::finalize() { // Reset master thread to run solo. s_threads_process.m_numa_rank = 0; s_threads_process.m_numa_core_rank = 0; - s_threads_process.m_pool_base = 0; + s_threads_process.m_pool_base = nullptr; s_threads_process.m_pool_rank = 0; s_threads_process.m_pool_size = 1; s_threads_process.m_pool_fan_size = 0; diff --git a/core/src/Threads/Kokkos_ThreadsExec_base.cpp b/core/src/Threads/Kokkos_ThreadsExec_base.cpp index ba86678f760..40a09ed22ab 100644 --- a/core/src/Threads/Kokkos_ThreadsExec_base.cpp +++ b/core/src/Threads/Kokkos_ThreadsExec_base.cpp @@ -52,7 +52,7 @@ #include #include -/* Standard C++ libaries */ +/* Standard C++ libraries */ #include #include @@ -87,7 +87,7 @@ void* internal_pthread_driver(void*) { std::cerr.flush(); std::abort(); } - return NULL; + return nullptr; } } // namespace @@ -105,7 +105,7 @@ bool ThreadsExec::spawn() { 0 == pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED)) { pthread_t pt; - result = 0 == pthread_create(&pt, &attr, internal_pthread_driver, 0); + result = 0 == pthread_create(&pt, &attr, internal_pthread_driver, nullptr); } pthread_attr_destroy(&attr); @@ -153,7 +153,7 @@ void ThreadsExec::wait_yield(volatile int& flag, const int value) { #include #include -/* Standard C++ libaries */ +/* Standard C++ libraries */ #include #include diff --git a/core/src/Threads/Kokkos_ThreadsTeam.hpp b/core/src/Threads/Kokkos_ThreadsTeam.hpp index 2cd9d73b2a4..fe1a1e8b08e 100644 --- a/core/src/Threads/Kokkos_ThreadsTeam.hpp +++ b/core/src/Threads/Kokkos_ThreadsTeam.hpp @@ -169,7 +169,10 @@ class ThreadsExecTeamMember { KOKKOS_INLINE_FUNCTION void team_broadcast(ValueType& value, const int& thread_id) const { #if !defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST) - {} + { + (void)value; + (void)thread_id; + } #else // Make sure there is enough scratch space: typedef typename if_cscratch_memory()); + memory_fence(); + team_barrier(); if (team_rank() == thread_id) *local_value = value; memory_fence(); team_barrier(); @@ -189,7 +194,11 @@ class ThreadsExecTeamMember { KOKKOS_INLINE_FUNCTION void team_broadcast(Closure const& f, ValueType& value, const int& thread_id) const { #if !defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST) - {} + { + (void)f; + (void)value; + (void)thread_id; + } #else // Make sure there is enough scratch space: typedef typename if_cscratch_memory()); + memory_fence(); + team_barrier(); if (team_rank() == thread_id) *local_value = value; memory_fence(); team_barrier(); @@ -211,7 +222,7 @@ class ThreadsExecTeamMember { team_reduce(const Type& value) const #if !defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST) { - return Type(); + return value; } #else { @@ -221,13 +232,15 @@ class ThreadsExecTeamMember { if (0 == m_exec) return value; - *((volatile type*)m_exec->scratch_memory()) = value; + if (team_rank() != team_size() - 1) + *((volatile type*)m_exec->scratch_memory()) = value; memory_fence(); type& accum = *((type*)m_team_base[0]->scratch_memory()); if (team_fan_in()) { + accum = value; for (int i = 1; i < m_team_size; ++i) { accum += *((type*)m_team_base[i]->scratch_memory()); } @@ -267,7 +280,7 @@ class ThreadsExecTeamMember { type* const local_value = ((type*)m_exec->scratch_memory()); // Set this thread's contribution - *local_value = contribution; + if (team_rank() != team_size() - 1) *local_value = contribution; // Fence to make sure the base team member has access: memory_fence(); @@ -277,6 +290,7 @@ class ThreadsExecTeamMember { // team_fan_out() type* const team_value = ((type*)m_team_base[0]->scratch_memory()); + *team_value = contribution; // Join to the team value: for (int i = 1; i < m_team_size; ++i) { reducer.join(*team_value, *((type*)m_team_base[i]->scratch_memory())); @@ -313,7 +327,8 @@ class ThreadsExecTeamMember { ArgType* const global_accum) const #if !defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST) { - return ArgType(); + (void)global_accum; + return value; } #else { @@ -607,6 +622,11 @@ class TeamPolicyInternal typedef PolicyTraits traits; + const typename traits::execution_space& space() const { + static typename traits::execution_space m_space; + return m_space; + } + TeamPolicyInternal& operator=(const TeamPolicyInternal& p) { m_league_size = p.m_league_size; m_team_size = p.m_team_size; @@ -1167,7 +1187,8 @@ namespace Kokkos { template KOKKOS_INLINE_FUNCTION void single( - const Impl::VectorSingleStruct& single_struct, + const Impl::VectorSingleStruct< + Impl::ThreadsExecTeamMember>& /*single_struct*/, const FunctorType& lambda) { lambda(); } @@ -1181,7 +1202,8 @@ KOKKOS_INLINE_FUNCTION void single( template KOKKOS_INLINE_FUNCTION void single( - const Impl::VectorSingleStruct& single_struct, + const Impl::VectorSingleStruct< + Impl::ThreadsExecTeamMember>& /*single_struct*/, const FunctorType& lambda, ValueType& val) { lambda(val); } diff --git a/core/src/Threads/Kokkos_Threads_Parallel.hpp b/core/src/Threads/Kokkos_Threads_Parallel.hpp index f45830cb24c..fbc83e9a555 100644 --- a/core/src/Threads/Kokkos_Threads_Parallel.hpp +++ b/core/src/Threads/Kokkos_Threads_Parallel.hpp @@ -492,7 +492,7 @@ class ParallelReduce, ReducerType, const HostViewType &arg_result_view, typename std::enable_if::value && !Kokkos::is_reducer_type::value, - void *>::type = NULL) + void *>::type = nullptr) : m_functor(arg_functor), m_policy(arg_policy), m_reducer(InvalidType()), @@ -650,7 +650,7 @@ class ParallelReduce, ReducerType, const HostViewType &arg_result_view, typename std::enable_if::value && !Kokkos::is_reducer_type::value, - void *>::type = NULL) + void *>::type = nullptr) : m_functor(arg_functor), m_mdr_policy(arg_policy), m_policy(Policy(0, m_mdr_policy.m_num_tiles).set_chunk_size(1)), @@ -774,7 +774,7 @@ class ParallelReduce, const ViewType &arg_result, typename std::enable_if::value && !Kokkos::is_reducer_type::value, - void *>::type = NULL) + void *>::type = nullptr) : m_functor(arg_functor), m_policy(arg_policy), m_reducer(InvalidType()), diff --git a/core/src/Threads/Kokkos_Threads_WorkGraphPolicy.hpp b/core/src/Threads/Kokkos_Threads_WorkGraphPolicy.hpp index ec2b1d983fa..7bcd9aaee06 100644 --- a/core/src/Threads/Kokkos_Threads_WorkGraphPolicy.hpp +++ b/core/src/Threads/Kokkos_Threads_WorkGraphPolicy.hpp @@ -87,9 +87,10 @@ class ParallelFor, } } - static inline void thread_main(ThreadsExec&, const void* arg) noexcept { + static inline void thread_main(ThreadsExec& exec, const void* arg) noexcept { const Self& self = *(static_cast(arg)); self.exec_one_thread(); + exec.fan_in(); } public: diff --git a/core/src/impl/CMakeLists.txt b/core/src/impl/CMakeLists.txt index 2f6d8e78a3c..361a85b7381 100644 --- a/core/src/impl/CMakeLists.txt +++ b/core/src/impl/CMakeLists.txt @@ -9,7 +9,7 @@ TRIBITS_ADD_LIBRARY( kokkoscore_impl NOINSTALLHEADERS ${HEADERS} SOURCES ${SOURCES} - DEPLIBS + DEPLIBS ) SET(TRILINOS_INCDIR ${CMAKE_INSTALL_PREFIX}/${${PROJECT_NAME}_INSTALL_INCLUDE_DIR}) diff --git a/core/src/impl/Kokkos_AnalyzePolicy.hpp b/core/src/impl/Kokkos_AnalyzePolicy.hpp index ac866514334..739e2d4f462 100644 --- a/core/src/impl/Kokkos_AnalyzePolicy.hpp +++ b/core/src/impl/Kokkos_AnalyzePolicy.hpp @@ -177,11 +177,14 @@ struct AnalyzePolicy is_launch_bounds::value, SetLaunchBounds, typename std::conditional< - Experimental::is_work_item_property< - T>::value, + Kokkos::Experimental:: + is_work_item_property::value, SetWorkItemProperty, - SetWorkTag >::type>::type>:: - type>::type>::type>::type>::type::type, + typename std::conditional< + !std::is_void::value, + SetWorkTag, Base>::type>:: + type>::type>::type>::type>::type>::type>:: + type::type, Traits...> {}; template @@ -201,9 +204,9 @@ struct AnalyzePolicy { using index_type = typename std::conditional::value, IndexType, - typename Base::index_type>::type :: - type // nasty hack to make index_type into an integral_type - ; // instead of the wrapped IndexType for backwards compatibility + typename Base::index_type>::type::type; + // nasty hack to make index_type into an integral_type + // instead of the wrapped IndexType for backwards compatibility using iteration_pattern = typename std::conditional< is_void::value, diff --git a/core/src/impl/Kokkos_Atomic_Assembly.hpp b/core/src/impl/Kokkos_Atomic_Assembly.hpp index 786ffc902f6..a31dd1cf493 100644 --- a/core/src/impl/Kokkos_Atomic_Assembly.hpp +++ b/core/src/impl/Kokkos_Atomic_Assembly.hpp @@ -48,6 +48,8 @@ namespace Kokkos { namespace Impl { + +#if !defined(_WIN32) struct cas128_t { uint64_t lower; uint64_t upper; @@ -85,6 +87,7 @@ struct cas128_t { upper = a.upper; } } __attribute__((__aligned__(16))); +#endif #if defined(KOKKOS_ENABLE_ASM) && defined(KOKKOS_ENABLE_ISA_X86_64) inline cas128_t cas128(volatile cas128_t* ptr, cas128_t cmp, cas128_t swap) { diff --git a/core/src/impl/Kokkos_Atomic_Compare_Exchange_Strong.hpp b/core/src/impl/Kokkos_Atomic_Compare_Exchange_Strong.hpp index 4fe72ad1426..c25b80a825b 100644 --- a/core/src/impl/Kokkos_Atomic_Compare_Exchange_Strong.hpp +++ b/core/src/impl/Kokkos_Atomic_Compare_Exchange_Strong.hpp @@ -92,8 +92,7 @@ __inline__ __device__ unsigned long long int atomic_compare_exchange( template __inline__ __device__ T atomic_compare_exchange( volatile T* const dest, const T& compare, - typename Kokkos::Impl::enable_if::type - val) { + typename std::enable_if::type val) { const int tmp = atomicCAS((int*)dest, *((int*)&compare), *((int*)&val)); return *((T*)&tmp); } @@ -101,9 +100,9 @@ __inline__ __device__ T atomic_compare_exchange( template __inline__ __device__ T atomic_compare_exchange( volatile T* const dest, const T& compare, - typename Kokkos::Impl::enable_if< - sizeof(T) != sizeof(int) && sizeof(T) == sizeof(unsigned long long int), - const T&>::type val) { + typename std::enable_if::type val) { typedef unsigned long long int type; const type tmp = atomicCAS((type*)dest, *((type*)&compare), *((type*)&val)); return *((T*)&tmp); @@ -112,8 +111,8 @@ __inline__ __device__ T atomic_compare_exchange( template __inline__ __device__ T atomic_compare_exchange( volatile T* const dest, const T& compare, - typename Kokkos::Impl::enable_if<(sizeof(T) != 4) && (sizeof(T) != 8), - const T>::type& val) { + typename std::enable_if<(sizeof(T) != 4) && (sizeof(T) != 8), + const T>::type& val) { T return_val; // This is a way to (hopefully) avoid dead lock in a warp int done = 0; @@ -147,7 +146,7 @@ __inline__ __device__ T atomic_compare_exchange( //---------------------------------------------------------------------------- // GCC native CAS supports int, long, unsigned int, unsigned long. // Intel native CAS support int and long with the same interface as GCC. -#if !defined(KOKKOS_ENABLE_ROCM_ATOMICS) +#if !defined(KOKKOS_ENABLE_ROCM_ATOMICS) || !defined(KOKKOS_ENABLE_HIP_ATOMICS) #if !defined(__CUDA_ARCH__) || defined(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND) #if defined(KOKKOS_ENABLE_GNU_ATOMICS) || defined(KOKKOS_ENABLE_INTEL_ATOMICS) @@ -188,12 +187,11 @@ inline unsigned long atomic_compare_exchange(volatile unsigned long* const dest, template inline T atomic_compare_exchange( volatile T* const dest, const T& compare, - typename Kokkos::Impl::enable_if::type - val) { + typename std::enable_if::type val) { union U { int i; T t; - KOKKOS_INLINE_FUNCTION U(){}; + KOKKOS_INLINE_FUNCTION U() {} } tmp; #if defined(KOKKOS_ENABLE_RFO_PREFETCH) @@ -208,13 +206,13 @@ inline T atomic_compare_exchange( template inline T atomic_compare_exchange( volatile T* const dest, const T& compare, - typename Kokkos::Impl::enable_if::type val) { + typename std::enable_if::type val) { union U { long i; T t; - KOKKOS_INLINE_FUNCTION U(){}; + KOKKOS_INLINE_FUNCTION U() {} } tmp; #if defined(KOKKOS_ENABLE_RFO_PREFETCH) @@ -230,14 +228,14 @@ inline T atomic_compare_exchange( template inline T atomic_compare_exchange( volatile T* const dest, const T& compare, - typename Kokkos::Impl::enable_if::type val) { + typename std::enable_if::type val) { union U { Impl::cas128_t i; T t; - KOKKOS_INLINE_FUNCTION U(){}; + KOKKOS_INLINE_FUNCTION U() {} } tmp; #if defined(KOKKOS_ENABLE_RFO_PREFETCH) @@ -253,12 +251,12 @@ inline T atomic_compare_exchange( template inline T atomic_compare_exchange( volatile T* const dest, const T compare, - typename Kokkos::Impl::enable_if<(sizeof(T) != 4) && (sizeof(T) != 8) + typename std::enable_if<(sizeof(T) != 4) && (sizeof(T) != 8) #if defined(KOKKOS_ENABLE_ASM) && defined(KOKKOS_ENABLE_ISA_X86_64) - && (sizeof(T) != 16) + && (sizeof(T) != 16) #endif - , - const T>::type& val) { + , + const T>::type& val) { #if defined(KOKKOS_ENABLE_RFO_PREFETCH) _mm_prefetch((const char*)dest, _MM_HINT_ET0); #endif diff --git a/core/src/impl/Kokkos_Atomic_Compare_Exchange_Weak.hpp b/core/src/impl/Kokkos_Atomic_Compare_Exchange_Weak.hpp index c2aad61975f..e3fd1c53db3 100644 --- a/core/src/impl/Kokkos_Atomic_Compare_Exchange_Weak.hpp +++ b/core/src/impl/Kokkos_Atomic_Compare_Exchange_Weak.hpp @@ -226,7 +226,7 @@ bool atomic_compare_exchange_weak( // GCC native CAS supports int, long, unsigned int, unsigned long. // Intel native CAS support int and long with the same interface as GCC. -#if !defined(KOKKOS_ENABLE_ROCM_ATOMICS) +#if !defined(KOKKOS_ENABLE_ROCM_ATOMICS) || !defined(KOKKOS_ENABLE_HIP_ATOMICS) #if !defined(__CUDA_ARCH__) || defined(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND) #if defined(KOKKOS_ENABLE_GNU_ATOMICS) || defined(KOKKOS_ENABLE_INTEL_ATOMICS) @@ -267,12 +267,11 @@ inline unsigned long atomic_compare_exchange(volatile unsigned long* const dest, template inline T atomic_compare_exchange( volatile T* const dest, const T& compare, - typename Kokkos::Impl::enable_if::type - val) { + typename std::enable_if::type val) { union U { int i; T t; - KOKKOS_INLINE_FUNCTION U(){}; + KOKKOS_INLINE_FUNCTION U() {} } tmp; #if defined(KOKKOS_ENABLE_RFO_PREFETCH) @@ -287,13 +286,13 @@ inline T atomic_compare_exchange( template inline T atomic_compare_exchange( volatile T* const dest, const T& compare, - typename Kokkos::Impl::enable_if::type val) { + typename std::enable_if::type val) { union U { long i; T t; - KOKKOS_INLINE_FUNCTION U(){}; + KOKKOS_INLINE_FUNCTION U() {} } tmp; #if defined(KOKKOS_ENABLE_RFO_PREFETCH) @@ -309,14 +308,14 @@ inline T atomic_compare_exchange( template inline T atomic_compare_exchange( volatile T* const dest, const T& compare, - typename Kokkos::Impl::enable_if::type val) { + typename std::enable_if::type val) { union U { Impl::cas128_t i; T t; - KOKKOS_INLINE_FUNCTION U(){}; + KOKKOS_INLINE_FUNCTION U() {} } tmp; #if defined(KOKKOS_ENABLE_RFO_PREFETCH) @@ -332,12 +331,12 @@ inline T atomic_compare_exchange( template inline T atomic_compare_exchange( volatile T* const dest, const T compare, - typename Kokkos::Impl::enable_if<(sizeof(T) != 4) && (sizeof(T) != 8) + typename std::enable_if<(sizeof(T) != 4) && (sizeof(T) != 8) #if defined(KOKKOS_ENABLE_ASM) && defined(KOKKOS_ENABLE_ISA_X86_64) - && (sizeof(T) != 16) + && (sizeof(T) != 16) #endif - , - const T>::type& val) { + , + const T>::type& val) { #if defined(KOKKOS_ENABLE_RFO_PREFETCH) _mm_prefetch((const char*)dest, _MM_HINT_ET0); #endif diff --git a/core/src/impl/Kokkos_Atomic_Decrement.hpp b/core/src/impl/Kokkos_Atomic_Decrement.hpp index 992ae3c0052..47961b5c717 100644 --- a/core/src/impl/Kokkos_Atomic_Decrement.hpp +++ b/core/src/impl/Kokkos_Atomic_Decrement.hpp @@ -54,7 +54,7 @@ namespace Kokkos { -// Atomic increment +// Atomic decrement template <> KOKKOS_INLINE_FUNCTION void atomic_decrement(volatile char* a) { #if defined(KOKKOS_ENABLE_ASM) && defined(KOKKOS_ENABLE_ISA_X86_64) && \ diff --git a/core/src/impl/Kokkos_Atomic_Exchange.hpp b/core/src/impl/Kokkos_Atomic_Exchange.hpp index 37aebf9a3d3..4a9a786df4d 100644 --- a/core/src/impl/Kokkos_Atomic_Exchange.hpp +++ b/core/src/impl/Kokkos_Atomic_Exchange.hpp @@ -84,8 +84,7 @@ __inline__ __device__ unsigned long long int atomic_exchange( template __inline__ __device__ T atomic_exchange( volatile T* const dest, - typename Kokkos::Impl::enable_if::type - val) { + typename std::enable_if::type val) { // int tmp = __ullAtomicExch( (int*) dest , *((int*)&val) ); #if defined(KOKKOS_ENABLE_RFO_PREFETCH) _mm_prefetch((const char*)dest, _MM_HINT_ET0); @@ -98,9 +97,9 @@ __inline__ __device__ T atomic_exchange( template __inline__ __device__ T atomic_exchange( volatile T* const dest, - typename Kokkos::Impl::enable_if< - sizeof(T) != sizeof(int) && sizeof(T) == sizeof(unsigned long long int), - const T&>::type val) { + typename std::enable_if::type val) { typedef unsigned long long int type; #if defined(KOKKOS_ENABLE_RFO_PREFETCH) @@ -113,10 +112,10 @@ __inline__ __device__ T atomic_exchange( } template -__inline__ __device__ T atomic_exchange( - volatile T* const dest, - typename Kokkos::Impl::enable_if<(sizeof(T) != 4) && (sizeof(T) != 8), - const T>::type& val) { +__inline__ __device__ T +atomic_exchange(volatile T* const dest, + typename std::enable_if<(sizeof(T) != 4) && (sizeof(T) != 8), + const T>::type& val) { T return_val; // This is a way to (hopefully) avoid dead lock in a warp #if defined(KOKKOS_ENABLE_RFO_PREFETCH) @@ -152,8 +151,7 @@ __inline__ __device__ T atomic_exchange( template __inline__ __device__ void atomic_assign( volatile T* const dest, - typename Kokkos::Impl::enable_if::type - val) { + typename std::enable_if::type val) { // (void) __ullAtomicExch( (int*) dest , *((int*)&val) ); (void)atomicExch(((int*)dest), *((int*)&val)); } @@ -161,9 +159,9 @@ __inline__ __device__ void atomic_assign( template __inline__ __device__ void atomic_assign( volatile T* const dest, - typename Kokkos::Impl::enable_if< - sizeof(T) != sizeof(int) && sizeof(T) == sizeof(unsigned long long int), - const T&>::type val) { + typename std::enable_if::type val) { typedef unsigned long long int type; // (void) __ullAtomicExch( (type*) dest , *((type*)&val) ); (void)atomicExch(((type*)dest), *((type*)&val)); @@ -172,9 +170,9 @@ __inline__ __device__ void atomic_assign( template __inline__ __device__ void atomic_assign( volatile T* const dest, - typename Kokkos::Impl::enable_if< - sizeof(T) != sizeof(int) && sizeof(T) != sizeof(unsigned long long int), - const T&>::type val) { + typename std::enable_if::type val) { (void)atomic_exchange(dest, val); } @@ -187,11 +185,10 @@ __inline__ __device__ void atomic_assign( #if defined(KOKKOS_ENABLE_GNU_ATOMICS) || defined(KOKKOS_ENABLE_INTEL_ATOMICS) template -inline T atomic_exchange( - volatile T* const dest, - typename Kokkos::Impl::enable_if::type val) { +inline T atomic_exchange(volatile T* const dest, + typename std::enable_if::type val) { typedef typename Kokkos::Impl::if_c::type type; #if defined(KOKKOS_ENABLE_RFO_PREFETCH) @@ -205,7 +202,7 @@ inline T atomic_exchange( union U { T val_T; type val_type; - inline U(){}; + inline U() {} } old; old.val_T = *dest; @@ -223,8 +220,8 @@ inline T atomic_exchange( template inline T atomic_exchange( volatile T* const dest, - typename Kokkos::Impl::enable_if::type val) { + typename std::enable_if::type + val) { #if defined(KOKKOS_ENABLE_RFO_PREFETCH) _mm_prefetch((const char*)dest, _MM_HINT_ET0); #endif @@ -232,7 +229,7 @@ inline T atomic_exchange( union U { Impl::cas128_t i; T t; - inline U(){}; + inline U() {} } assume, oldval, newval; oldval.t = *dest; @@ -252,12 +249,12 @@ inline T atomic_exchange( template inline T atomic_exchange( volatile T* const dest, - typename Kokkos::Impl::enable_if<(sizeof(T) != 4) && (sizeof(T) != 8) + typename std::enable_if<(sizeof(T) != 4) && (sizeof(T) != 8) #if defined(KOKKOS_ENABLE_ASM) && defined(KOKKOS_ENABLE_ISA_X86_64) - && (sizeof(T) != 16) + && (sizeof(T) != 16) #endif - , - const T>::type& val) { + , + const T>::type& val) { while (!Impl::lock_address_host_space((void*)dest)) ; T return_val = *dest; @@ -280,11 +277,10 @@ inline T atomic_exchange( } template -inline void atomic_assign( - volatile T* const dest, - typename Kokkos::Impl::enable_if::type val) { +inline void atomic_assign(volatile T* const dest, + typename std::enable_if::type val) { typedef typename Kokkos::Impl::if_c::type type; @@ -299,7 +295,7 @@ inline void atomic_assign( union U { T val_T; type val_type; - inline U(){}; + inline U() {} } old; old.val_T = *dest; @@ -315,8 +311,8 @@ inline void atomic_assign( template inline void atomic_assign( volatile T* const dest, - typename Kokkos::Impl::enable_if::type val) { + typename std::enable_if::type + val) { #if defined(KOKKOS_ENABLE_RFO_PREFETCH) _mm_prefetch((const char*)dest, _MM_HINT_ET0); #endif @@ -324,7 +320,7 @@ inline void atomic_assign( union U { Impl::cas128_t i; T t; - inline U(){}; + inline U() {} } assume, oldval, newval; oldval.t = *dest; @@ -339,12 +335,12 @@ inline void atomic_assign( template inline void atomic_assign( volatile T* const dest, - typename Kokkos::Impl::enable_if<(sizeof(T) != 4) && (sizeof(T) != 8) + typename std::enable_if<(sizeof(T) != 4) && (sizeof(T) != 8) #if defined(KOKKOS_ENABLE_ASM) && defined(KOKKOS_ENABLE_ISA_X86_64) - && (sizeof(T) != 16) + && (sizeof(T) != 16) #endif - , - const T>::type& val) { + , + const T>::type& val) { while (!Impl::lock_address_host_space((void*)dest)) ; // This is likely an aggregate type with a defined diff --git a/core/src/impl/Kokkos_Atomic_Fetch_Add.hpp b/core/src/impl/Kokkos_Atomic_Fetch_Add.hpp index 58277740da1..0a6900f8409 100644 --- a/core/src/impl/Kokkos_Atomic_Fetch_Add.hpp +++ b/core/src/impl/Kokkos_Atomic_Fetch_Add.hpp @@ -94,14 +94,13 @@ __inline__ __device__ double atomic_fetch_add(volatile double* const dest, template __inline__ __device__ T atomic_fetch_add( volatile T* const dest, - typename Kokkos::Impl::enable_if::type - val) { + typename std::enable_if::type val) { // to work around a bug in the clang cuda compiler, the name here needs to be // different from the one internal to the other overloads union U1 { int i; T t; - KOKKOS_INLINE_FUNCTION U1(){}; + KOKKOS_INLINE_FUNCTION U1() {} } assume, oldval, newval; oldval.t = *dest; @@ -118,15 +117,15 @@ __inline__ __device__ T atomic_fetch_add( template __inline__ __device__ T atomic_fetch_add( volatile T* const dest, - typename Kokkos::Impl::enable_if< - sizeof(T) != sizeof(int) && sizeof(T) == sizeof(unsigned long long int), - const T>::type val) { + typename std::enable_if::type val) { // to work around a bug in the clang cuda compiler, the name here needs to be // different from the one internal to the other overloads union U2 { unsigned long long int i; T t; - KOKKOS_INLINE_FUNCTION U2(){}; + KOKKOS_INLINE_FUNCTION U2() {} } assume, oldval, newval; oldval.t = *dest; @@ -143,10 +142,10 @@ __inline__ __device__ T atomic_fetch_add( //---------------------------------------------------------------------------- template -__inline__ __device__ T atomic_fetch_add( - volatile T* const dest, - typename Kokkos::Impl::enable_if<(sizeof(T) != 4) && (sizeof(T) != 8), - const T>::type& val) { +__inline__ __device__ T +atomic_fetch_add(volatile T* const dest, + typename std::enable_if<(sizeof(T) != 4) && (sizeof(T) != 8), + const T>::type& val) { T return_val; // This is a way to (hopefully) avoid dead lock in a warp int done = 0; @@ -179,7 +178,7 @@ __inline__ __device__ T atomic_fetch_add( #endif #endif //---------------------------------------------------------------------------- -#if !defined(KOKKOS_ENABLE_ROCM_ATOMICS) +#if !defined(KOKKOS_ENABLE_ROCM_ATOMICS) || !defined(KOKKOS_ENABLE_HIP_ATOMICS) #if !defined(__CUDA_ARCH__) || defined(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND) #if defined(KOKKOS_ENABLE_GNU_ATOMICS) || defined(KOKKOS_ENABLE_INTEL_ATOMICS) @@ -239,12 +238,11 @@ inline unsigned long int atomic_fetch_add( template inline T atomic_fetch_add( volatile T* const dest, - typename Kokkos::Impl::enable_if::type - val) { + typename std::enable_if::type val) { union U { int i; T t; - inline U(){}; + inline U() {} } assume, oldval, newval; #if defined(KOKKOS_ENABLE_RFO_PREFETCH) @@ -263,15 +261,14 @@ inline T atomic_fetch_add( } template -inline T atomic_fetch_add( - volatile T* const dest, - typename Kokkos::Impl::enable_if::type val) { +inline T atomic_fetch_add(volatile T* const dest, + typename std::enable_if::type val) { union U { long i; T t; - inline U(){}; + inline U() {} } assume, oldval, newval; #if defined(KOKKOS_ENABLE_RFO_PREFETCH) @@ -293,14 +290,14 @@ inline T atomic_fetch_add( template inline T atomic_fetch_add( volatile T* const dest, - typename Kokkos::Impl::enable_if::type val) { + typename std::enable_if::type val) { union U { Impl::cas128_t i; T t; - inline U(){}; + inline U() {} } assume, oldval, newval; #if defined(KOKKOS_ENABLE_RFO_PREFETCH) @@ -324,12 +321,12 @@ inline T atomic_fetch_add( template inline T atomic_fetch_add( volatile T* const dest, - typename Kokkos::Impl::enable_if<(sizeof(T) != 4) && (sizeof(T) != 8) + typename std::enable_if<(sizeof(T) != 4) && (sizeof(T) != 8) #if defined(KOKKOS_ENABLE_ASM) && defined(KOKKOS_ENABLE_ISA_X86_64) - && (sizeof(T) != 16) + && (sizeof(T) != 16) #endif - , - const T>::type& val) { + , + const T>::type& val) { while (!Impl::lock_address_host_space((void*)dest)) ; T return_val = *dest; @@ -390,11 +387,5 @@ __inline__ __device__ T atomic_fetch_add(volatile T* const, } #endif -// Simpler version of atomic_fetch_add without the fetch -template -KOKKOS_INLINE_FUNCTION void atomic_add(volatile T* const dest, const T src) { - atomic_fetch_add(dest, src); -} - } // namespace Kokkos #endif diff --git a/core/src/impl/Kokkos_Atomic_Fetch_Sub.hpp b/core/src/impl/Kokkos_Atomic_Fetch_Sub.hpp index 91da9fdcaa4..c14749f1b73 100644 --- a/core/src/impl/Kokkos_Atomic_Fetch_Sub.hpp +++ b/core/src/impl/Kokkos_Atomic_Fetch_Sub.hpp @@ -93,8 +93,7 @@ __inline__ __device__ unsigned int atomic_fetch_sub(volatile double* const dest, template __inline__ __device__ T atomic_fetch_sub( volatile T* const dest, - typename Kokkos::Impl::enable_if::type - val) { + typename std::enable_if::type val) { union U { int i; T t; @@ -115,9 +114,9 @@ __inline__ __device__ T atomic_fetch_sub( template __inline__ __device__ T atomic_fetch_sub( volatile T* const dest, - typename Kokkos::Impl::enable_if< - sizeof(T) != sizeof(int) && sizeof(T) == sizeof(unsigned long long int), - const T>::type val) { + typename std::enable_if::type val) { union U { unsigned long long int i; T t; @@ -138,10 +137,10 @@ __inline__ __device__ T atomic_fetch_sub( //---------------------------------------------------------------------------- template -__inline__ __device__ T atomic_fetch_sub( - volatile T* const dest, - typename Kokkos::Impl::enable_if<(sizeof(T) != 4) && (sizeof(T) != 8), - const T>::type& val) { +__inline__ __device__ T +atomic_fetch_sub(volatile T* const dest, + typename std::enable_if<(sizeof(T) != 4) && (sizeof(T) != 8), + const T>::type& val) { T return_val; // This is a way to (hopefully) avoid dead lock in a warp int done = 0; @@ -172,7 +171,7 @@ __inline__ __device__ T atomic_fetch_sub( #endif #endif //---------------------------------------------------------------------------- -#if !defined(KOKKOS_ENABLE_ROCM_ATOMICS) +#if !defined(KOKKOS_ENABLE_ROCM_ATOMICS) || !defined(KOKKOS_ENABLE_HIP_ATOMICS) #if !defined(__CUDA_ARCH__) || defined(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND) #if defined(KOKKOS_ENABLE_GNU_ATOMICS) || defined(KOKKOS_ENABLE_INTEL_ATOMICS) @@ -214,8 +213,7 @@ inline unsigned long int atomic_fetch_sub( template inline T atomic_fetch_sub( volatile T* const dest, - typename Kokkos::Impl::enable_if::type - val) { + typename std::enable_if::type val) { union U { int i; T t; @@ -238,11 +236,10 @@ inline T atomic_fetch_sub( } template -inline T atomic_fetch_sub( - volatile T* const dest, - typename Kokkos::Impl::enable_if::type val) { +inline T atomic_fetch_sub(volatile T* const dest, + typename std::enable_if::type val) { #if defined(KOKKOS_ENABLE_RFO_PREFETCH) _mm_prefetch((const char*)dest, _MM_HINT_ET0); #endif @@ -269,8 +266,8 @@ inline T atomic_fetch_sub( template inline T atomic_fetch_sub( volatile T* const dest, - typename Kokkos::Impl::enable_if<(sizeof(T) != 4) && (sizeof(T) != 8), - const T>::type& val) { + typename std::enable_if<(sizeof(T) != 4) && (sizeof(T) != 8), + const T>::type& val) { #if defined(KOKKOS_ENABLE_RFO_PREFETCH) _mm_prefetch((const char*)dest, _MM_HINT_ET0); #endif @@ -321,12 +318,6 @@ __inline__ __device__ T atomic_fetch_sub(volatile T* const, } #endif -// Simpler version of atomic_fetch_sub without the fetch -template -KOKKOS_INLINE_FUNCTION void atomic_sub(volatile T* const dest, const T src) { - atomic_fetch_sub(dest, src); -} - } // namespace Kokkos #include diff --git a/core/src/impl/Kokkos_Atomic_Generic.hpp b/core/src/impl/Kokkos_Atomic_Generic.hpp index 160a4e79218..49ee86b2c45 100644 --- a/core/src/impl/Kokkos_Atomic_Generic.hpp +++ b/core/src/impl/Kokkos_Atomic_Generic.hpp @@ -155,9 +155,9 @@ struct RShiftOper { template KOKKOS_INLINE_FUNCTION T atomic_fetch_oper( const Oper& op, volatile T* const dest, - typename Kokkos::Impl::enable_if< - sizeof(T) != sizeof(int) && sizeof(T) == sizeof(unsigned long long int), - const T>::type val) { + typename std::enable_if::type val) { union U { unsigned long long int i; T t; @@ -179,9 +179,9 @@ KOKKOS_INLINE_FUNCTION T atomic_fetch_oper( template KOKKOS_INLINE_FUNCTION T atomic_oper_fetch( const Oper& op, volatile T* const dest, - typename Kokkos::Impl::enable_if< - sizeof(T) != sizeof(int) && sizeof(T) == sizeof(unsigned long long int), - const T>::type val) { + typename std::enable_if::type val) { union U { unsigned long long int i; T t; @@ -192,7 +192,7 @@ KOKKOS_INLINE_FUNCTION T atomic_oper_fetch( do { assume.i = oldval.i; - newval.t = Oper::apply(assume.t, val); + newval.t = op.apply(assume.t, val); oldval.i = Kokkos::atomic_compare_exchange((unsigned long long int*)dest, assume.i, newval.i); } while (assume.i != oldval.i); @@ -203,8 +203,7 @@ KOKKOS_INLINE_FUNCTION T atomic_oper_fetch( template KOKKOS_INLINE_FUNCTION T atomic_fetch_oper( const Oper& op, volatile T* const dest, - typename Kokkos::Impl::enable_if::type - val) { + typename std::enable_if::type val) { union U { int i; T t; @@ -225,8 +224,7 @@ KOKKOS_INLINE_FUNCTION T atomic_fetch_oper( template KOKKOS_INLINE_FUNCTION T atomic_oper_fetch( const Oper& op, volatile T* const dest, - typename Kokkos::Impl::enable_if::type - val) { + typename std::enable_if::type val) { union U { int i; T t; @@ -237,7 +235,7 @@ KOKKOS_INLINE_FUNCTION T atomic_oper_fetch( do { assume.i = oldval.i; - newval.t = Oper::apply(assume.t, val); + newval.t = op.apply(assume.t, val); oldval.i = Kokkos::atomic_compare_exchange((int*)dest, assume.i, newval.i); } while (assume.i != oldval.i); @@ -247,13 +245,13 @@ KOKKOS_INLINE_FUNCTION T atomic_oper_fetch( template KOKKOS_INLINE_FUNCTION T atomic_fetch_oper( const Oper& op, volatile T* const dest, - typename Kokkos::Impl::enable_if<(sizeof(T) != 4) && (sizeof(T) != 8), - const T>::type val) { + typename std::enable_if<(sizeof(T) != 4) && (sizeof(T) != 8), const T>::type + val) { #ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST while (!Impl::lock_address_host_space((void*)dest)) ; T return_val = *dest; - *dest = Oper::apply(return_val, val); + *dest = op.apply(return_val, val); Impl::unlock_address_host_space((void*)dest); return return_val; #elif defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA) @@ -271,8 +269,7 @@ KOKKOS_INLINE_FUNCTION T atomic_fetch_oper( if (!done) { if (Impl::lock_address_cuda_space((void*)dest)) { return_val = *dest; - *dest = Oper::apply(return_val, val); - ; + *dest = op.apply(return_val, val); Impl::unlock_address_cuda_space((void*)dest); done = 1; } @@ -284,24 +281,44 @@ KOKKOS_INLINE_FUNCTION T atomic_fetch_oper( #endif } return return_val; +#elif defined(__HIP_DEVICE_COMPILE__) + // FIXME_HIP + Kokkos::abort("atomic_fetch_oper not implemented for large types."); + T return_val = *dest; + int done = 0; + unsigned int active = __ballot(1); + unsigned int done_active = 0; + while (active != done_active) { + if (!done) { + // if (Impl::lock_address_hip_space((void*)dest)) + { + return_val = *dest; + *dest = op.apply(return_val, val); + // Impl::unlock_address_hip_space((void*)dest); + done = 1; + } + } + done_active = __ballot(done); + } + return return_val; #endif } template -KOKKOS_INLINE_FUNCTION T atomic_oper_fetch( - const Oper& op, volatile T* const dest, - typename Kokkos::Impl::enable_if<(sizeof(T) != 4) && (sizeof(T) != 8) +KOKKOS_INLINE_FUNCTION T +atomic_oper_fetch(const Oper& op, volatile T* const dest, + typename std::enable_if<(sizeof(T) != 4) && (sizeof(T) != 8) #if defined(KOKKOS_ENABLE_ASM) && \ defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST) - && (sizeof(T) != 16) + && (sizeof(T) != 16) #endif - , - const T>::type& val) { + , + const T>::type& val) { #ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST while (!Impl::lock_address_host_space((void*)dest)) ; - T return_val = Oper::apply(*dest, val); + T return_val = op.apply(*dest, val); *dest = return_val; Impl::unlock_address_host_space((void*)dest); return return_val; @@ -319,7 +336,7 @@ KOKKOS_INLINE_FUNCTION T atomic_oper_fetch( while (active != done_active) { if (!done) { if (Impl::lock_address_cuda_space((void*)dest)) { - return_val = Oper::apply(*dest, val); + return_val = op.apply(*dest, val); *dest = return_val; Impl::unlock_address_cuda_space((void*)dest); done = 1; @@ -332,6 +349,26 @@ KOKKOS_INLINE_FUNCTION T atomic_oper_fetch( #endif } return return_val; +#elif defined(__HIP_DEVICE_COMPILE__) + // FIXME_HIP + Kokkos::abort("atomic_oper_fetch not implemented for large types."); + T return_val; + int done = 0; + unsigned int active = __ballot(1); + unsigned int done_active = 0; + while (active != done_active) { + if (!done) { + // if (Impl::lock_address_hip_space((void*)dest)) + { + return_val = op.apply(*dest, val); + *dest = return_val; + // Impl::unlock_address_hip_space((void*)dest); + done = 1; + } + } + done_active = __ballot(done); + } + return return_val; #endif } @@ -454,5 +491,27 @@ KOKKOS_INLINE_FUNCTION T atomic_rshift_fetch(volatile T* const dest, dest, val); } +#ifdef _WIN32 +template +KOKKOS_INLINE_FUNCTION T atomic_add_fetch(volatile T* const dest, const T val) { + return Impl::atomic_oper_fetch(Impl::AddOper(), dest, val); +} + +template +KOKKOS_INLINE_FUNCTION T atomic_sub_fetch(volatile T* const dest, const T val) { + return Impl::atomic_oper_fetch(Impl::SubOper(), dest, val); +} + +template +KOKKOS_INLINE_FUNCTION T atomic_fetch_add(volatile T* const dest, const T val) { + return Impl::atomic_fetch_oper(Impl::AddOper(), dest, val); +} + +template +KOKKOS_INLINE_FUNCTION T atomic_fetch_sub(volatile T* const dest, const T val) { + return Impl::atomic_fetch_oper(Impl::SubOper(), dest, val); +} +#endif + } // namespace Kokkos #endif diff --git a/core/src/impl/Kokkos_Atomic_Generic_Secondary.hpp b/core/src/impl/Kokkos_Atomic_Generic_Secondary.hpp new file mode 100644 index 00000000000..9d0172b6537 --- /dev/null +++ b/core/src/impl/Kokkos_Atomic_Generic_Secondary.hpp @@ -0,0 +1,76 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#if defined(KOKKOS_ATOMIC_HPP) && !defined(KOKKOS_ATOMIC_GENERIC_SECONDARY_HPP) +#define KOKKOS_ATOMIC_GENERIC_SECONDARY_HPP +#include + +namespace Kokkos { + +#ifndef KOKKOS_ENABLE_SERIAL_ATOMICS +template +KOKKOS_INLINE_FUNCTION T atomic_exchange(volatile T* const dest, const T val) { + T oldval = *dest; + T assume; + do { + assume = oldval; + oldval = atomic_compare_exchange(dest, assume, val); + } while (assume != oldval); + + return oldval; +} +#endif + +template +KOKKOS_INLINE_FUNCTION void atomic_add(volatile T* const dest, const T val) { + (void)atomic_fetch_add(dest, val); +} + +template +KOKKOS_INLINE_FUNCTION void atomic_sub(volatile T* const dest, const T val) { + (void)atomic_fetch_sub(dest, val); +} + +} // namespace Kokkos +#endif diff --git a/core/src/impl/Kokkos_Atomic_Increment.hpp b/core/src/impl/Kokkos_Atomic_Increment.hpp index f0ffebef9a2..65630aa84cd 100644 --- a/core/src/impl/Kokkos_Atomic_Increment.hpp +++ b/core/src/impl/Kokkos_Atomic_Increment.hpp @@ -91,6 +91,7 @@ KOKKOS_INLINE_FUNCTION void atomic_increment(volatile short* a) { #endif } +#ifndef _WIN32 template <> KOKKOS_INLINE_FUNCTION void atomic_increment(volatile int* a) { #if defined(KOKKOS_ENABLE_ASM) && defined(KOKKOS_ENABLE_ISA_X86_64) && \ @@ -109,6 +110,7 @@ KOKKOS_INLINE_FUNCTION void atomic_increment(volatile int* a) { Kokkos::atomic_fetch_add(a, int(1)); #endif } +#endif template <> KOKKOS_INLINE_FUNCTION void atomic_increment( diff --git a/core/src/impl/Kokkos_Atomic_Load.hpp b/core/src/impl/Kokkos_Atomic_Load.hpp index 6b4c82b6203..f3b77a29762 100644 --- a/core/src/impl/Kokkos_Atomic_Load.hpp +++ b/core/src/impl/Kokkos_Atomic_Load.hpp @@ -125,8 +125,7 @@ __device__ __inline__ T _relaxed_atomic_load_impl( void const**>::type = nullptr) { T rv{}; // TODO remove a copy operation here? - Kokkos::Impl::atomic_oper_fetch(NoOpOper{}, &rv, rv); - return rv; + return Kokkos::Impl::atomic_oper_fetch(NoOpOper{}, ptr, rv); } template @@ -168,6 +167,14 @@ inline T _atomic_load(T* ptr, MemoryOrder) { return *ptr; } +#elif defined(KOKKOS_ENABLE_WINDOWS_ATOMICS) + +template +inline T _atomic_load(T* ptr, MemoryOrder) { + atomic_compare_exchange(ptr, 0, 0); + return *ptr; +} + #endif // end of all atomic implementations template @@ -189,7 +196,7 @@ KOKKOS_FORCEINLINE_FUNCTION T atomic_load(T* ptr, } template -KOKKOS_FORCEINLINE_FUNCTION T atomic_load(T* ptr, +KOKKOS_FORCEINLINE_FUNCTION T atomic_load(T* /*ptr*/, Impl::memory_order_release_t) { static_assert( sizeof(T) == 0, // just something that will always be false, but only on @@ -198,7 +205,7 @@ KOKKOS_FORCEINLINE_FUNCTION T atomic_load(T* ptr, } template -KOKKOS_FORCEINLINE_FUNCTION T atomic_load(T* ptr, +KOKKOS_FORCEINLINE_FUNCTION T atomic_load(T* /*ptr*/, Impl::memory_order_acq_rel_t) { static_assert( sizeof(T) == 0, // just something that will always be false, but only on diff --git a/core/src/impl/Kokkos_Atomic_MinMax.hpp b/core/src/impl/Kokkos_Atomic_MinMax.hpp new file mode 100644 index 00000000000..8a886d0a775 --- /dev/null +++ b/core/src/impl/Kokkos_Atomic_MinMax.hpp @@ -0,0 +1,223 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#if defined(KOKKOS_ATOMIC_HPP) && !defined(KOKKOS_ATOMIC_MINMAX_HPP) +#define KOKKOS_ATOMIC_MINMAX_HPP + +namespace Kokkos { + +//---------------------------------------------------------------------------- + +#if defined(KOKKOS_ENABLE_CUDA) +#if defined(__CUDA_ARCH__) || defined(KOKKOS_IMPL_CUDA_CLANG_WORKAROUND) + +// Support for int, unsigned int, unsigned long long int, and float + +// Atomic_fetch_{min,max} + +#ifdef KOKKOS_IMPL_CUDA_CLANG_WORKAROUND + +// Host implementations for CLANG compiler + +inline __host__ int atomic_fetch_min(volatile int* const dest, const int val) { + return Impl::atomic_fetch_oper(Impl::MinOper(), dest, + val); +} + +inline __host__ unsigned int atomic_fetch_min(volatile unsigned int* const dest, + const unsigned int val) { + return Impl::atomic_fetch_oper( + Impl::MinOper(), dest, val); +} + +inline __host__ unsigned long long int atomic_fetch_min( + volatile unsigned long long int* const dest, + const unsigned long long int val) { + return Impl::atomic_fetch_oper(Impl::MinOper(), + dest, val); +} + +inline __host__ int atomic_fetch_max(volatile int* const dest, const int val) { + return Impl::atomic_fetch_oper(Impl::MaxOper(), dest, + val); +} + +inline __host__ unsigned int atomic_fetch_max(volatile unsigned int* const dest, + const unsigned int val) { + return Impl::atomic_fetch_oper( + Impl::MaxOper(), dest, val); +} + +inline __host__ unsigned long long int atomic_fetch_max( + volatile unsigned long long int* const dest, + const unsigned long long int val) { + return Impl::atomic_fetch_oper(Impl::MaxOper(), + dest, val); +} + +#endif + +inline __device__ int atomic_fetch_min(volatile int* const dest, + const int val) { + return atomicMin((int*)dest, val); +} + +inline __device__ unsigned int atomic_fetch_min( + volatile unsigned int* const dest, const unsigned int val) { + return atomicMin((unsigned int*)dest, val); +} + +inline __device__ unsigned long long int atomic_fetch_min( + volatile unsigned long long int* const dest, + const unsigned long long int val) { + return atomicMin((unsigned long long int*)dest, val); +} + +inline __device__ int atomic_fetch_max(volatile int* const dest, + const int val) { + return atomicMax((int*)dest, val); +} + +inline __device__ unsigned int atomic_fetch_max( + volatile unsigned int* const dest, const unsigned int val) { + return atomicMax((unsigned int*)dest, val); +} + +inline __device__ unsigned long long int atomic_fetch_max( + volatile unsigned long long int* const dest, + const unsigned long long int val) { + return atomicMax((unsigned long long int*)dest, val); +} + +// Atomic_{min,max}_fetch + +#ifdef KOKKOS_IMPL_CUDA_CLANG_WORKAROUND + +// Host implementations for CLANG compiler + +inline __host__ int atomic_min_fetch(volatile int* const dest, const int val) { + return Impl::atomic_oper_fetch(Impl::MinOper(), dest, + val); +} + +inline __host__ unsigned int atomic_min_fetch(volatile unsigned int* const dest, + const unsigned int val) { + return Impl::atomic_oper_fetch( + Impl::MinOper(), dest, val); +} + +inline __host__ unsigned long long int atomic_min_fetch( + volatile unsigned long long int* const dest, + const unsigned long long int val) { + return Impl::atomic_oper_fetch(Impl::MinOper(), + dest, val); +} + +inline __host__ int atomic_max_fetch(volatile int* const dest, const int val) { + return Impl::atomic_oper_fetch(Impl::MaxOper(), dest, + val); +} + +inline __host__ unsigned int atomic_max_fetch(volatile unsigned int* const dest, + const unsigned int val) { + return Impl::atomic_oper_fetch( + Impl::MaxOper(), dest, val); +} + +inline __host__ unsigned long long int atomic_max_fetch( + volatile unsigned long long int* const dest, + const unsigned long long int val) { + return Impl::atomic_oper_fetch(Impl::MaxOper(), + dest, val); +} +#endif + +inline __device__ int atomic_min_fetch(volatile int* const dest, + const int val) { + const int old = atomicMin((int*)dest, val); + return old < val ? old : val; +} + +inline __device__ unsigned int atomic_min_fetch( + volatile unsigned int* const dest, const unsigned int val) { + const unsigned int old = atomicMin((unsigned int*)dest, val); + return old < val ? old : val; +} + +inline __device__ unsigned long long int atomic_min_fetch( + volatile unsigned long long int* const dest, + const unsigned long long int val) { + const unsigned long long old = atomicMin((unsigned long long*)dest, val); + return old < val ? old : val; +} + +inline __device__ int atomic_max_fetch(volatile int* const dest, + const int val) { + const int old = atomicMax((int*)dest, val); + return old >= val ? old : val; +} + +inline __device__ unsigned int atomic_max_fetch( + volatile unsigned int* const dest, const unsigned int val) { + const unsigned int old = atomicMax((unsigned int*)dest, val); + return old >= val ? old : val; +} + +inline __device__ unsigned long long int atomic_max_fetch( + volatile unsigned long long int* const dest, + const unsigned long long int val) { + const unsigned long long old = atomicMax((unsigned long long*)dest, val); + return old >= val ? old : val; +} + +#endif +#endif +} // namespace Kokkos + +#endif diff --git a/core/src/impl/Kokkos_Atomic_Store.hpp b/core/src/impl/Kokkos_Atomic_Store.hpp index 6cd246e3ae9..264d6beaf5d 100644 --- a/core/src/impl/Kokkos_Atomic_Store.hpp +++ b/core/src/impl/Kokkos_Atomic_Store.hpp @@ -164,6 +164,13 @@ inline void _atomic_store(T* ptr, T val, MemoryOrder) { *ptr = val; } +#elif defined(KOKKOS_ENABLE_WINDOWS_ATOMICS) + +template +inline void _atomic_store(T* ptr, T val, MemoryOrder) { + atomic_exchange(ptr, val); +} + #endif // end of all atomic implementations template @@ -185,7 +192,7 @@ KOKKOS_FORCEINLINE_FUNCTION void atomic_store(T* ptr, T val, } template -KOKKOS_FORCEINLINE_FUNCTION void atomic_store(T* ptr, T val, +KOKKOS_FORCEINLINE_FUNCTION void atomic_store(T* /*ptr*/, T /*val*/, Impl::memory_order_acquire_t) { static_assert( sizeof(T) == 0, // just something that will always be false, but only on @@ -194,7 +201,7 @@ KOKKOS_FORCEINLINE_FUNCTION void atomic_store(T* ptr, T val, } template -KOKKOS_FORCEINLINE_FUNCTION void atomic_store(T* ptr, T val, +KOKKOS_FORCEINLINE_FUNCTION void atomic_store(T* /*ptr*/, T /*val*/, Impl::memory_order_acq_rel_t) { static_assert( sizeof(T) == 0, // just something that will always be false, but only on @@ -205,7 +212,7 @@ KOKKOS_FORCEINLINE_FUNCTION void atomic_store(T* ptr, T val, template KOKKOS_FORCEINLINE_FUNCTION void atomic_store(T* ptr, T val) { // relaxed by default! - _atomic_store(ptr, Impl::memory_order_relaxed); + _atomic_store(ptr, val, Impl::memory_order_relaxed); } } // end namespace Impl diff --git a/core/src/impl/Kokkos_Atomic_View.hpp b/core/src/impl/Kokkos_Atomic_View.hpp index bfe9bcd0b33..c3719bed229 100644 --- a/core/src/impl/Kokkos_Atomic_View.hpp +++ b/core/src/impl/Kokkos_Atomic_View.hpp @@ -347,7 +347,7 @@ class AtomicViewDataHandle { typename ViewTraits::value_type* ptr; KOKKOS_INLINE_FUNCTION - AtomicViewDataHandle() : ptr(NULL) {} + AtomicViewDataHandle() : ptr(nullptr) {} KOKKOS_INLINE_FUNCTION AtomicViewDataHandle(typename ViewTraits::value_type* ptr_) : ptr(ptr_) {} diff --git a/core/src/impl/Kokkos_Atomic_Windows.hpp b/core/src/impl/Kokkos_Atomic_Windows.hpp index 25230982ec7..c5d3466c6c1 100644 --- a/core/src/impl/Kokkos_Atomic_Windows.hpp +++ b/core/src/impl/Kokkos_Atomic_Windows.hpp @@ -52,6 +52,8 @@ #include #include +#undef VOID + namespace Kokkos { namespace Impl { #ifdef _MSC_VER @@ -74,14 +76,43 @@ __attribute__((aligned(16))) template KOKKOS_INLINE_FUNCTION T atomic_compare_exchange( volatile T* const dest, const T& compare, - typename Kokkos::Impl::enable_if::type - val) { + typename std::enable_if::type val) { union U { - LONG i; + CHAR i; + T t; + KOKKOS_INLINE_FUNCTION U(){}; + } tmp; + + tmp.i = _InterlockedCompareExchange8((CHAR*)dest, *((CHAR*)&val), + *((CHAR*)&compare)); + return tmp.t; +} + +template +KOKKOS_INLINE_FUNCTION T atomic_compare_exchange( + volatile T* const dest, const T& compare, + typename std::enable_if::type val) { + union U { + SHORT i; T t; KOKKOS_INLINE_FUNCTION U(){}; } tmp; + tmp.i = _InterlockedCompareExchange16((SHORT*)dest, *((SHORT*)&val), + *((SHORT*)&compare)); + return tmp.t; +} + +template +KOKKOS_INLINE_FUNCTION T atomic_compare_exchange( + volatile T* const dest, const T& compare, + typename std::enable_if::type val) { + union U { + LONG i; + T t; + KOKKOS_INLINE_FUNCTION U() {} + } tmp; + tmp.i = _InterlockedCompareExchange((LONG*)dest, *((LONG*)&val), *((LONG*)&compare)); return tmp.t; @@ -90,12 +121,12 @@ KOKKOS_INLINE_FUNCTION T atomic_compare_exchange( template KOKKOS_INLINE_FUNCTION T atomic_compare_exchange( volatile T* const dest, const T& compare, - typename Kokkos::Impl::enable_if::type val) { + typename std::enable_if::type + val) { union U { LONGLONG i; T t; - KOKKOS_INLINE_FUNCTION U(){}; + KOKKOS_INLINE_FUNCTION U() {} } tmp; tmp.i = _InterlockedCompareExchange64((LONGLONG*)dest, *((LONGLONG*)&val), @@ -106,18 +137,19 @@ KOKKOS_INLINE_FUNCTION T atomic_compare_exchange( template KOKKOS_INLINE_FUNCTION T atomic_compare_exchange( volatile T* const dest, const T& compare, - typename Kokkos::Impl::enable_if::type val) { + typename std::enable_if::type + val) { + T compare_and_result(compare); union U { Impl::cas128_t i; T t; KOKKOS_INLINE_FUNCTION U(){}; - } tmp, newval; + } newval; newval.t = val; _InterlockedCompareExchange128((LONGLONG*)dest, newval.i.upper, - newval.i.lower, ((LONGLONG*)&compare)); - tmp.t = dest; - return tmp.t; + newval.i.lower, + ((LONGLONG*)&compare_and_result)); + return compare_and_result; } template @@ -127,117 +159,6 @@ KOKKOS_INLINE_FUNCTION T atomic_compare_exchange_strong(volatile T* const dest, return atomic_compare_exchange(dest, compare, val); } -template -T atomic_fetch_or(volatile T* const dest, const T val) { - T oldval = *dest; - T assume; - do { - assume = oldval; - T newval = val | oldval; - oldval = atomic_compare_exchange(dest, assume, newval); - } while (assume != oldval); - - return oldval; -} - -template -T atomic_fetch_and(volatile T* const dest, const T val) { - T oldval = *dest; - T assume; - do { - assume = oldval; - T newval = val & oldval; - oldval = atomic_compare_exchange(dest, assume, newval); - } while (assume != oldval); - - return oldval; -} - -template -T atomic_fetch_add(volatile T* const dest, const T val) { - T oldval = *dest; - T assume; - do { - assume = oldval; - T newval = val + oldval; - oldval = atomic_compare_exchange(dest, assume, newval); - } while (assume != oldval); - - return oldval; -} - -template -T atomic_fetch_sub(volatile T* const dest, const T val) { - T oldval = *dest; - T assume; - do { - assume = oldval; - T newval = val - oldval; - oldval = atomic_compare_exchange(dest, assume, newval); - } while (assume != oldval); - - return oldval; -} - -template -T atomic_exchange(volatile T* const dest, const T val) { - T oldval = *dest; - T assume; - do { - assume = oldval; - oldval = atomic_compare_exchange(dest, assume, val); - } while (assume != oldval); - - return oldval; -} - -template -void atomic_or(volatile T* const dest, const T val) { - atomic_fetch_or(dest, val); -} - -template -void atomic_and(volatile T* const dest, const T val) { - atomic_fetch_and(dest, val); -} - -template -void atomic_add(volatile T* const dest, const T val) { - atomic_fetch_add(dest, val); -} - -template -void atomic_sub(volatile T* const dest, const T val) { - atomic_fetch_sub(dest, val); -} - -template -void atomic_assign(volatile T* const dest, const T val) { - atomic_fetch_exchange(dest, val); -} - -template -T atomic_increment(volatile T* const dest) { - T oldval = *dest; - T assume; - do { - assume = oldval; - T newval = assume++; - oldval = atomic_compare_exchange(dest, assume, newval); - } while (assume != oldval); -} - -template -T atomic_decrement(volatile T* const dest) { - T oldval = *dest; - T assume; - do { - assume = oldval; - T newval = assume--; - oldval = atomic_compare_exchange(dest, assume, newval); - } while (assume != oldval); -} - } // namespace Kokkos #endif #endif diff --git a/core/src/impl/Kokkos_ChaseLev.hpp b/core/src/impl/Kokkos_ChaseLev.hpp index bff0ed8315a..e2283f11fd2 100644 --- a/core/src/impl/Kokkos_ChaseLev.hpp +++ b/core/src/impl/Kokkos_ChaseLev.hpp @@ -48,8 +48,7 @@ #define KOKKOS_IMPL_LOCKFREEDEQUE_HPP #include -#ifdef KOKKOS_ENABLE_TASKDAG // Note: implies CUDA_VERSION >= 8000 if using - // CUDA +#ifdef KOKKOS_ENABLE_TASKDAG #include @@ -191,11 +190,21 @@ struct ChaseLevDeque { return_value = *static_cast(a[b]); // relaxed load if (t == b) { /* single last element in the queue. */ - if (not Impl::atomic_compare_exchange_strong( +#ifdef _WIN32 + Kokkos::memory_fence(); + bool const success = + Kokkos::atomic_compare_exchange_strong(&m_top, t, t + 1); + Kokkos::memory_fence(); + if (!success) { + return_value = nullptr; + } +#else + if (!Impl::atomic_compare_exchange_strong( &m_top, t, t + 1, memory_order_seq_cst, memory_order_relaxed)) { /* failed race, someone else stole it */ return_value = nullptr; } +#endif m_bottom = b + 1; // memory order relaxed } } else { @@ -241,10 +250,20 @@ struct ChaseLevDeque { Kokkos::load_fence(); // TODO @tasking @memory_order DSH memory order // instead of fence return_value = *static_cast(a[t]); // relaxed - if (not Impl::atomic_compare_exchange_strong( +#ifdef _WIN32 + Kokkos::memory_fence(); + bool const success = + Kokkos::atomic_compare_exchange_strong(&m_top, t, t + 1); + Kokkos::memory_fence(); + if (!success) { + return_value = nullptr; + } +#else + if (!Impl::atomic_compare_exchange_strong( &m_top, t, t + 1, memory_order_seq_cst, memory_order_relaxed)) { return_value = nullptr; } +#endif } return return_value; } diff --git a/core/src/impl/Kokkos_ClockTic.hpp b/core/src/impl/Kokkos_ClockTic.hpp index ad13fb7548d..386b5918d03 100644 --- a/core/src/impl/Kokkos_ClockTic.hpp +++ b/core/src/impl/Kokkos_ClockTic.hpp @@ -75,7 +75,10 @@ uint64_t clock_tic(void) noexcept { #elif defined(__HCC_ACCELERATOR__) // Get clock register return hc::__clock_u64(); - +#elif defined(KOKKOS_ENABLE_OPENMPTARGET) + return (uint64_t)std::chrono::high_resolution_clock::now() + .time_since_epoch() + .count(); #elif defined(__i386__) || defined(__x86_64) // Return value of 64-bit hi-res clock register. diff --git a/core/src/impl/Kokkos_Core.cpp b/core/src/impl/Kokkos_Core.cpp index 53f3fd9ce20..9640e0fccb7 100644 --- a/core/src/impl/Kokkos_Core.cpp +++ b/core/src/impl/Kokkos_Core.cpp @@ -50,19 +50,122 @@ #include #include #include +#include +#include #include +#ifndef _WIN32 #include +#endif //---------------------------------------------------------------------------- - namespace { bool g_is_initialized = false; bool g_show_warnings = true; -std::stack > finalize_hooks; +// When compiling with clang/LLVM and using the GNU (GCC) C++ Standard Library +// (any recent version between GCC 7.3 and GCC 9.2), std::deque SEGV's during +// the unwinding of the atexit(3C) handlers at program termination. However, +// this bug is not observable when building with GCC. +// As an added bonus, std::list provides constant insertion and +// deletion time complexity, which translates to better run-time performance. As +// opposed to std::deque which does not provide the same constant time +// complexity for inserts/removals, since std::deque is implemented as a +// segmented array. +using hook_function_type = std::function; +std::stack> finalize_hooks; } // namespace namespace Kokkos { namespace Impl { + +int get_ctest_gpu(const char* local_rank_str) { + auto const* ctest_kokkos_device_type = + std::getenv("CTEST_KOKKOS_DEVICE_TYPE"); + if (!ctest_kokkos_device_type) { + return 0; + } + + auto const* ctest_resource_group_count_str = + std::getenv("CTEST_RESOURCE_GROUP_COUNT"); + if (!ctest_resource_group_count_str) { + return 0; + } + + // Make sure rank is within bounds of resource groups specified by CTest + auto resource_group_count = std::atoi(ctest_resource_group_count_str); + auto local_rank = std::atoi(local_rank_str); + if (local_rank >= resource_group_count) { + std::ostringstream ss; + ss << "Error: local rank " << local_rank + << " is outside the bounds of resource groups provided by CTest. Raised" + << " by Kokkos::Impl::get_ctest_gpu()."; + throw_runtime_exception(ss.str()); + } + + // Get the resource types allocated to this resource group + std::ostringstream ctest_resource_group; + ctest_resource_group << "CTEST_RESOURCE_GROUP_" << local_rank; + std::string ctest_resource_group_name = ctest_resource_group.str(); + auto const* ctest_resource_group_str = + std::getenv(ctest_resource_group_name.c_str()); + if (!ctest_resource_group_str) { + std::ostringstream ss; + ss << "Error: " << ctest_resource_group_name << " is not specified. Raised" + << " by Kokkos::Impl::get_ctest_gpu()."; + throw_runtime_exception(ss.str()); + } + + // Look for the device type specified in CTEST_KOKKOS_DEVICE_TYPE + bool found_device = false; + std::string ctest_resource_group_cxx_str = ctest_resource_group_str; + std::istringstream instream(ctest_resource_group_cxx_str); + while (true) { + std::string devName; + std::getline(instream, devName, ','); + if (devName == ctest_kokkos_device_type) { + found_device = true; + break; + } + if (instream.eof() || devName.length() == 0) { + break; + } + } + + if (!found_device) { + std::ostringstream ss; + ss << "Error: device type '" << ctest_kokkos_device_type + << "' not included in " << ctest_resource_group_name + << ". Raised by Kokkos::Impl::get_ctest_gpu()."; + throw_runtime_exception(ss.str()); + } + + // Get the device ID + std::string ctest_device_type_upper = ctest_kokkos_device_type; + for (auto& c : ctest_device_type_upper) { + c = std::toupper(c); + } + ctest_resource_group << "_" << ctest_device_type_upper; + + std::string ctest_resource_group_id_name = ctest_resource_group.str(); + auto resource_str = std::getenv(ctest_resource_group_id_name.c_str()); + if (!resource_str) { + std::ostringstream ss; + ss << "Error: " << ctest_resource_group_id_name + << " is not specified. Raised by Kokkos::Impl::get_ctest_gpu()."; + throw_runtime_exception(ss.str()); + } + + auto const* comma = std::strchr(resource_str, ','); + if (!comma || strncmp(resource_str, "id:", 3)) { + std::ostringstream ss; + ss << "Error: invalid value of " << ctest_resource_group_id_name << ": '" + << resource_str << "'. Raised by Kokkos::Impl::get_ctest_gpu()."; + throw_runtime_exception(ss.str()); + } + + std::string id(resource_str + 3, comma - resource_str - 3); + return std::atoi(id.c_str()); +} + namespace { bool is_unsigned_int(const char* str) { @@ -74,7 +177,8 @@ bool is_unsigned_int(const char* str) { } return true; } -void initialize_internal(const InitArguments& args) { + +void initialize_backends(const InitArguments& args) { // This is an experimental setting // For KNL in Flat mode this variable should be set, so that // memkind allocates high bandwidth memory correctly. @@ -82,10 +186,6 @@ void initialize_internal(const InitArguments& args) { setenv("MEMKIND_HBW_NODES", "1", 0); #endif - if (args.disable_warnings) { - g_show_warnings = false; - } - // Protect declarations, to prevent "unused variable" warnings. #if defined(KOKKOS_ENABLE_OPENMP) || defined(KOKKOS_ENABLE_THREADS) || \ defined(KOKKOS_ENABLE_OPENMPTARGET) || defined(KOKKOS_ENABLE_HPX) @@ -94,25 +194,39 @@ void initialize_internal(const InitArguments& args) { #if defined(KOKKOS_ENABLE_THREADS) || defined(KOKKOS_ENABLE_OPENMPTARGET) const int use_numa = args.num_numa; #endif -#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_ROCM) +#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_ROCM) || \ + defined(KOKKOS_ENABLE_HIP) int use_gpu = args.device_id; const int ndevices = args.ndevices; const int skip_device = args.skip_device; // if the exact device is not set, but ndevices was given, assign round-robin // using on-node MPI rank - if (use_gpu < 0 && ndevices >= 0) { - auto local_rank_str = std::getenv("OMPI_COMM_WORLD_LOCAL_RANK"); // OpenMPI + if (use_gpu < 0) { + auto const* local_rank_str = + std::getenv("OMPI_COMM_WORLD_LOCAL_RANK"); // OpenMPI if (!local_rank_str) local_rank_str = std::getenv("MV2_COMM_WORLD_LOCAL_RANK"); // MVAPICH2 if (!local_rank_str) local_rank_str = std::getenv("SLURM_LOCALID"); // SLURM - if (local_rank_str) { - auto local_rank = std::atoi(local_rank_str); - use_gpu = local_rank % ndevices; - } else { - // user only gave us ndevices, but the MPI environment variable wasn't - // set. start with GPU 0 at this point - use_gpu = 0; + + auto const* ctest_kokkos_device_type = + std::getenv("CTEST_KOKKOS_DEVICE_TYPE"); // CTest + auto const* ctest_resource_group_count_str = + std::getenv("CTEST_RESOURCE_GROUP_COUNT"); // CTest + if (ctest_kokkos_device_type && ctest_resource_group_count_str && + local_rank_str) { + // Use the device assigned by CTest + use_gpu = get_ctest_gpu(local_rank_str); + } else if (ndevices >= 0) { + // Use the device assigned by the rank + if (local_rank_str) { + auto local_rank = std::atoi(local_rank_str); + use_gpu = local_rank % ndevices; + } else { + // user only gave use ndevices, but the MPI environment variable wasn't + // set. start with GPU 0 at this point + use_gpu = 0; + } } // shift assignments over by one so no one is assigned to "skip_device" if (use_gpu >= skip_device) ++use_gpu; @@ -199,17 +313,9 @@ void initialize_internal(const InitArguments& args) { #endif #if defined(KOKKOS_ENABLE_OPENMPTARGET) - if (Impl::is_same::value) { - if (num_threads > 0) { - if (use_numa > 0) { - Kokkos::Experimental::OpenMPTarget::initialize(num_threads, use_numa); - } else { - Kokkos::Experimental::OpenMPTarget::initialize(num_threads); - } - } else { - Kokkos::Experimental::OpenMPTarget::initialize(); - } + if (std::is_same::value) { + Kokkos::Experimental::OpenMPTarget().impl_initialize(); // std::cout << "Kokkos::initialize() fyi: OpenMP enabled and initialized" // << std::endl ; } else { @@ -254,6 +360,23 @@ void initialize_internal(const InitArguments& args) { } #endif +#if defined(KOKKOS_ENABLE_HIP) + if (std::is_same::value || + 0 < use_gpu) { + if (use_gpu > -1) { + Kokkos::Experimental::HIP::impl_initialize( + Kokkos::Experimental::HIP::SelectDevice(use_gpu)); + } else { + Kokkos::Experimental::HIP::impl_initialize(); + } + std::cout << "Kokkos::initialize() fyi: HIP enabled and initialized" + << std::endl; + } +#endif +} + +void initialize_profiling(const InitArguments&) { #if defined(KOKKOS_ENABLE_PROFILING) Kokkos::Profiling::initialize(); #else @@ -263,9 +386,23 @@ void initialize_internal(const InitArguments& args) { << std::endl; } #endif +} + +void pre_initialize_internal(const InitArguments& args) { + if (args.disable_warnings) g_show_warnings = false; +} + +void post_initialize_internal(const InitArguments& args) { + initialize_profiling(args); g_is_initialized = true; } +void initialize_internal(const InitArguments& args) { + pre_initialize_internal(args); + initialize_backends(args); + post_initialize_internal(args); +} + void finalize_internal(const bool all_spaces = false) { typename decltype(finalize_hooks)::size_type numSuccessfulCalls = 0; while (!finalize_hooks.empty()) { @@ -320,12 +457,20 @@ void finalize_internal(const bool all_spaces = false) { } #endif +#if defined(KOKKOS_ENABLE_HIP) + if (std::is_same::value || + all_spaces) { + if (Kokkos::Experimental::HIP::impl_is_initialized()) + Kokkos::Experimental::HIP::impl_finalize(); + } +#endif #if defined(KOKKOS_ENABLE_OPENMPTARGET) if (std::is_same::value || all_spaces) { - if (Kokkos::Experimental::OpenMPTarget::is_initialized()) - Kokkos::Experimental::OpenMPTarget::finalize(); + if (Kokkos::Experimental::OpenMPTarget().impl_is_initialized()) + Kokkos::Experimental::OpenMPTarget().impl_finalize(); } #endif @@ -380,23 +525,19 @@ void finalize_internal(const bool all_spaces = false) { void fence_internal() { #if defined(KOKKOS_ENABLE_CUDA) - if (std::is_same::value) { - Kokkos::Cuda::impl_static_fence(); - } + Kokkos::Cuda::impl_static_fence(); #endif #if defined(KOKKOS_ENABLE_ROCM) - if (std::is_same::value) { - Kokkos::Experimental::ROCm().fence(); - } + Kokkos::Experimental::ROCm().fence(); +#endif + +#if defined(KOKKOS_ENABLE_HIP) + Kokkos::Experimental::HIP().fence(); #endif #if defined(KOKKOS_ENABLE_OPENMP) - if (std::is_same::value || - std::is_same::value) { - Kokkos::OpenMP::impl_static_fence(); - } + Kokkos::OpenMP::impl_static_fence(); #endif #if defined(KOKKOS_ENABLE_HPX) @@ -404,18 +545,11 @@ void fence_internal() { #endif #if defined(KOKKOS_ENABLE_THREADS) - if (std::is_same::value || - std::is_same::value) { - Kokkos::Threads::impl_static_fence(); - } + Kokkos::Threads::impl_static_fence(); #endif #if defined(KOKKOS_ENABLE_SERIAL) - if (std::is_same::value || - std::is_same::value) { - Kokkos::Serial::impl_static_fence(); - } + Kokkos::Serial::impl_static_fence(); #endif } @@ -454,22 +588,31 @@ bool check_int_arg(char const* arg, char const* expected, int* value) { return true; } -} // namespace - -} // namespace Impl -} // namespace Kokkos - -//---------------------------------------------------------------------------- +void warn_deprecated_command_line_argument(std::string deprecated, + std::string valid) { + std::cerr + << "Warning: command line argument '" << deprecated + << "' is deprecated. Use '" << valid + << "' instead. Raised by Kokkos::initialize(int narg, char* argc[])." + << std::endl; +} -namespace Kokkos { +unsigned get_process_id() { +#ifdef _WIN32 + return unsigned(GetCurrentProcessId()); +#else + return unsigned(getpid()); +#endif +} -void initialize(int& narg, char* arg[]) { - int num_threads = -1; - int numa = -1; - int device = -1; - int ndevices = -1; - int skip_device = 9999; - bool disable_warnings = false; +void parse_command_line_arguments(int& narg, char* arg[], + InitArguments& arguments) { + auto& num_threads = arguments.num_threads; + auto& numa = arguments.num_numa; + auto& device = arguments.device_id; + auto& ndevices = arguments.ndevices; + auto& skip_device = arguments.skip_device; + auto& disable_warnings = arguments.disable_warnings; int kokkos_threads_found = 0; int kokkos_numa_found = 0; @@ -479,76 +622,97 @@ void initialize(int& narg, char* arg[]) { int iarg = 0; while (iarg < narg) { - if (Impl::check_int_arg(arg[iarg], "--kokkos-threads", &num_threads)) { + if (check_int_arg(arg[iarg], "--kokkos-threads", &num_threads)) { for (int k = iarg; k < narg - 1; k++) { arg[k] = arg[k + 1]; } kokkos_threads_found = 1; narg--; } else if (!kokkos_threads_found && - Impl::check_int_arg(arg[iarg], "--threads", &num_threads)) { + check_int_arg(arg[iarg], "--threads", &num_threads)) { iarg++; - } else if (Impl::check_int_arg(arg[iarg], "--kokkos-numa", &numa)) { + } else if (check_int_arg(arg[iarg], "--kokkos-numa", &numa)) { for (int k = iarg; k < narg - 1; k++) { arg[k] = arg[k + 1]; } kokkos_numa_found = 1; narg--; } else if (!kokkos_numa_found && - Impl::check_int_arg(arg[iarg], "--numa", &numa)) { + check_int_arg(arg[iarg], "--numa", &numa)) { iarg++; - } else if (Impl::check_int_arg(arg[iarg], "--kokkos-device", &device)) { + } else if (check_int_arg(arg[iarg], "--kokkos-device-id", &device) || + check_int_arg(arg[iarg], "--kokkos-device", &device)) { + if (check_arg(arg[iarg], "--kokkos-device")) { + warn_deprecated_command_line_argument("--kokkos-device", + "--kokkos-device-id"); + } for (int k = iarg; k < narg - 1; k++) { arg[k] = arg[k + 1]; } kokkos_device_found = 1; narg--; } else if (!kokkos_device_found && - Impl::check_int_arg(arg[iarg], "--device", &device)) { + (check_int_arg(arg[iarg], "--device-id", &device) || + check_int_arg(arg[iarg], "--device", &device))) { + if (check_arg(arg[iarg], "--device")) { + warn_deprecated_command_line_argument("--device", "--device-id"); + } iarg++; - } else if (Impl::check_arg(arg[iarg], "--kokkos-ndevices") || - Impl::check_arg(arg[iarg], "--ndevices")) { + } else if (check_arg(arg[iarg], "--kokkos-num-devices") || + check_arg(arg[iarg], "--num-devices") || + check_arg(arg[iarg], "--kokkos-ndevices") || + check_arg(arg[iarg], "--ndevices")) { + if (check_arg(arg[iarg], "--ndevices")) { + warn_deprecated_command_line_argument("--ndevices", "--num-devices"); + } + if (check_arg(arg[iarg], "--kokkos-ndevices")) { + warn_deprecated_command_line_argument("--kokkos-ndevices", + "--kokkos-num-devices"); + } // Find the number of device (expecting --device=XX) - if (!((strncmp(arg[iarg], "--kokkos-ndevices=", 18) == 0) || + if (!((strncmp(arg[iarg], "--kokkos-num-devices=", 21) == 0) || + (strncmp(arg[iarg], "--num-ndevices=", 14) == 0) || + (strncmp(arg[iarg], "--kokkos-ndevices=", 18) == 0) || (strncmp(arg[iarg], "--ndevices=", 11) == 0))) - Impl::throw_runtime_exception( + throw_runtime_exception( "Error: expecting an '=INT[,INT]' after command line argument " - "'--ndevices/--kokkos-ndevices'. Raised by Kokkos::initialize(int " - "narg, char* argc[])."); + "'--num-devices/--kokkos-num-devices'. Raised by " + "Kokkos::initialize(int narg, char* argc[])."); char* num1 = strchr(arg[iarg], '=') + 1; char* num2 = strpbrk(num1, ","); - int num1_len = num2 == NULL ? strlen(num1) : num2 - num1; + int num1_len = num2 == nullptr ? strlen(num1) : num2 - num1; char* num1_only = new char[num1_len + 1]; strncpy(num1_only, num1, num1_len); num1_only[num1_len] = 0; - if (!Impl::is_unsigned_int(num1_only) || (strlen(num1_only) == 0)) { - Impl::throw_runtime_exception( + if (!is_unsigned_int(num1_only) || (strlen(num1_only) == 0)) { + throw_runtime_exception( "Error: expecting an integer number after command line argument " - "'--kokkos-ndevices'. Raised by Kokkos::initialize(int narg, char* " - "argc[])."); + "'--kokkos-numdevices'. Raised by " + "Kokkos::initialize(int narg, char* argc[])."); } - if ((strncmp(arg[iarg], "--kokkos-ndevices", 17) == 0) || - !kokkos_ndevices_found) + if (check_arg(arg[iarg], "--kokkos-num-devices") || + check_arg(arg[iarg], "--kokkos-ndevices") || !kokkos_ndevices_found) ndevices = atoi(num1_only); delete[] num1_only; - if (num2 != NULL) { - if ((!Impl::is_unsigned_int(num2 + 1)) || (strlen(num2) == 1)) - Impl::throw_runtime_exception( + if (num2 != nullptr) { + if ((!is_unsigned_int(num2 + 1)) || (strlen(num2) == 1)) + throw_runtime_exception( "Error: expecting an integer number after command line argument " - "'--kokkos-ndevices=XX,'. Raised by Kokkos::initialize(int narg, " - "char* argc[])."); + "'--kokkos-num-devices=XX,'. Raised by " + "Kokkos::initialize(int narg, char* argc[])."); - if ((strncmp(arg[iarg], "--kokkos-ndevices", 17) == 0) || - !kokkos_ndevices_found) + if (check_arg(arg[iarg], "--kokkos-num-devices") || + check_arg(arg[iarg], "--kokkos-ndevices") || !kokkos_ndevices_found) skip_device = atoi(num2 + 1); } - // Remove the --kokkos-ndevices argument from the list but leave - // --ndevices - if (strncmp(arg[iarg], "--kokkos-ndevices", 17) == 0) { + // Remove the --kokkos-num-devices argument from the list but leave + // --num-devices + if (check_arg(arg[iarg], "--kokkos-num-devices") || + check_arg(arg[iarg], "--kokkos-ndevices")) { for (int k = iarg; k < narg - 1; k++) { arg[k] = arg[k + 1]; } @@ -557,88 +721,45 @@ void initialize(int& narg, char* arg[]) { } else { iarg++; } - } else if (strcmp(arg[iarg], "--kokkos-disable-warnings") == 0) { + } else if (check_arg(arg[iarg], "--kokkos-disable-warnings")) { disable_warnings = true; for (int k = iarg; k < narg - 1; k++) { arg[k] = arg[k + 1]; } narg--; - } else if ((strcmp(arg[iarg], "--kokkos-help") == 0) || - (strcmp(arg[iarg], "--help") == 0)) { - std::cout << std::endl; - std::cout << "-----------------------------------------------------------" - "---------------------" - << std::endl; - std::cout << "-------------Kokkos command line " - "arguments--------------------------------------" - << std::endl; - std::cout << "-----------------------------------------------------------" - "---------------------" - << std::endl; - std::cout << "The following arguments exist also without prefix 'kokkos' " - "(e.g. --help)." - << std::endl; - std::cout << "The prefixed arguments will be removed from the list by " - "Kokkos::initialize()," - << std::endl; - std::cout << "the non-prefixed ones are not removed. Prefixed versions " - "take precedence over " - << std::endl; - std::cout << "non prefixed ones, and the last occurrence of an argument " - "overwrites prior" - << std::endl; - std::cout << "settings." << std::endl; - std::cout << std::endl; - std::cout << "--kokkos-help : print this message" - << std::endl; - std::cout - << "--kokkos-disable-warnings : disable kokkos warning messages" - << std::endl; - std::cout - << "--kokkos-threads=INT : specify total number of threads or" - << std::endl; - std::cout << " number of threads per NUMA " - "region if " - << std::endl; - std::cout << " used in conjunction with " - "'--numa' option. " - << std::endl; - std::cout << "--kokkos-numa=INT : specify number of NUMA " - "regions used by process." - << std::endl; - std::cout << "--kokkos-device=INT : specify device id to be used " - "by Kokkos. " - << std::endl; - std::cout << "--kokkos-ndevices=INT[,INT] : used when running MPI jobs. " - "Specify number of" - << std::endl; - std::cout << " devices per node to be used. " - "Process to device" - << std::endl; - std::cout << " mapping happens by obtaining " - "the local MPI rank" - << std::endl; - std::cout << " and assigning devices " - "round-robin. The optional" - << std::endl; - std::cout << " second argument allows for " - "an existing device" - << std::endl; - std::cout << " to be ignored. This is most " - "useful on workstations" - << std::endl; - std::cout << " with multiple GPUs of which " - "one is used to drive" - << std::endl; - std::cout << " screen output." << std::endl; - std::cout << std::endl; - std::cout << "-----------------------------------------------------------" - "---------------------" - << std::endl; - std::cout << std::endl; - - // Remove the --kokkos-help argument from the list but leave --ndevices - if (strcmp(arg[iarg], "--kokkos-help") == 0) { + } else if (check_arg(arg[iarg], "--kokkos-help") || + check_arg(arg[iarg], "--help")) { + auto const help_message = R"( + -------------------------------------------------------------------------------- + -------------Kokkos command line arguments-------------------------------------- + -------------------------------------------------------------------------------- + The following arguments exist also without prefix 'kokkos' (e.g. --help). + The prefixed arguments will be removed from the list by Kokkos::initialize(), + the non-prefixed ones are not removed. Prefixed versions take precedence over + non prefixed ones, and the last occurrence of an argument overwrites prior + settings. + + --kokkos-help : print this message + --kokkos-disable-warnings : disable kokkos warning messages + --kokkos-threads=INT : specify total number of threads or + number of threads per NUMA region if + used in conjunction with '--numa' option. + --kokkos-numa=INT : specify number of NUMA regions used by process. + --kokkos-device-id=INT : specify device id to be used by Kokkos. + --kokkos-num-devices=INT[,INT] : used when running MPI jobs. Specify number of + devices per node to be used. Process to device + mapping happens by obtaining the local MPI rank + and assigning devices round-robin. The optional + second argument allows for an existing device + to be ignored. This is most useful on workstations + with multiple GPUs of which one is used to drive + screen output. + -------------------------------------------------------------------------------- +)"; + std::cout << help_message << std::endl; + + // Remove the --kokkos-help argument from the list but leave --help + if (check_arg(arg[iarg], "--kokkos-help")) { for (int k = iarg; k < narg - 1; k++) { arg[k] = arg[k + 1]; } @@ -649,8 +770,16 @@ void initialize(int& narg, char* arg[]) { } else iarg++; } +} + +void parse_environment_variables(InitArguments& arguments) { + auto& num_threads = arguments.num_threads; + auto& numa = arguments.num_numa; + auto& device = arguments.device_id; + auto& ndevices = arguments.ndevices; + auto& skip_device = arguments.skip_device; + auto& disable_warnings = arguments.disable_warnings; - // Read environment variables char* endptr; auto env_num_threads_str = std::getenv("KOKKOS_NUM_THREADS"); if (env_num_threads_str != nullptr) { @@ -783,7 +912,7 @@ void initialize(int& narg, char* arg[]) { "Error: cannot KOKKOS_SKIP_DEVICE the only KOKKOS_RAND_DEVICE. " "Raised by Kokkos::initialize(int narg, char* argc[])."); - std::srand(getpid()); + std::srand(get_process_id()); while (device < 0) { int test_device = std::rand() % rdevices; if (test_device != skip_device) device = test_device; @@ -804,21 +933,40 @@ void initialize(int& narg, char* arg[]) { "KOKKOS_DISABLE_WARNINGS if both are set. Raised by " "Kokkos::initialize(int narg, char* argc[])."); } +} +} // namespace + +} // namespace Impl +} // namespace Kokkos + +//---------------------------------------------------------------------------- + +namespace Kokkos { + +void initialize(int& narg, char* arg[]) { InitArguments arguments; - arguments.num_threads = num_threads; - arguments.num_numa = numa; - arguments.device_id = device; - arguments.ndevices = ndevices; - arguments.skip_device = skip_device; - arguments.disable_warnings = disable_warnings; + Impl::parse_command_line_arguments(narg, arg, arguments); + Impl::parse_environment_variables(arguments); Impl::initialize_internal(arguments); } -void initialize(const InitArguments& arguments) { +void initialize(InitArguments arguments) { + Impl::parse_environment_variables(arguments); Impl::initialize_internal(arguments); } +namespace Impl { + +void pre_initialize(const InitArguments& args) { + pre_initialize_internal(args); +} + +void post_initialize(const InitArguments& args) { + post_initialize_internal(args); +} +} // namespace Impl + void push_finalize_hook(std::function f) { finalize_hooks.push(f); } void finalize() { Impl::finalize_internal(); } @@ -833,6 +981,10 @@ void fence() { Impl::fence_internal(); } void print_configuration(std::ostream& out, const bool detail) { std::ostringstream msg; + msg << "Kokkos Version:" << std::endl; + msg << " " << KOKKOS_VERSION / 10000 << "." << (KOKKOS_VERSION % 10000) / 100 + << "." << KOKKOS_VERSION % 100 << std::endl; + msg << "Compiler:" << std::endl; #ifdef KOKKOS_COMPILER_APPLECC msg << " KOKKOS_COMPILER_APPLECC: " << KOKKOS_COMPILER_APPLECC << std::endl; @@ -882,6 +1034,12 @@ void print_configuration(std::ostream& out, const bool detail) { msg << "yes" << std::endl; #else msg << "no" << std::endl; +#endif + msg << " KOKKOS_ENABLE_HIP: "; +#ifdef KOKKOS_ENABLE_HIP + msg << "yes" << std::endl; +#else + msg << "no" << std::endl; #endif msg << " KOKKOS_ENABLE_OPENMP: "; #ifdef KOKKOS_ENABLE_OPENMP @@ -900,12 +1058,6 @@ void print_configuration(std::ostream& out, const bool detail) { msg << "yes" << std::endl; #else msg << "no" << std::endl; -#endif - msg << " KOKKOS_ENABLE_QTHREADS: "; -#ifdef KOKKOS_ENABLE_QTHREADS - msg << "yes" << std::endl; -#else - msg << "no" << std::endl; #endif msg << " KOKKOS_ENABLE_SERIAL: "; #ifdef KOKKOS_ENABLE_SERIAL @@ -921,20 +1073,20 @@ void print_configuration(std::ostream& out, const bool detail) { #else msg << "no" << std::endl; #endif - msg << " KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP: "; -#ifdef KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP + msg << " KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_HIP: "; +#ifdef KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_HIP msg << "yes" << std::endl; #else msg << "no" << std::endl; #endif - msg << " KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS: "; -#ifdef KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS + msg << " KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP: "; +#ifdef KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP msg << "yes" << std::endl; #else msg << "no" << std::endl; #endif - msg << " KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_QTHREADS: "; -#ifdef KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_QTHREADS + msg << " KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS: "; +#ifdef KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS msg << "yes" << std::endl; #else msg << "no" << std::endl; @@ -1131,12 +1283,25 @@ void print_configuration(std::ostream& out, const bool detail) { msg << "no" << std::endl; #endif +#endif + +#ifdef KOKKOS_ENABLE_HIP + msg << "HIP Options:" << std::endl; + msg << " KOKKOS_ENABLE_HIP_RELOCATABLE_DEVICE_CODE: "; +#ifdef KOKKOS_ENABLE_HIP_RELOCATABLE_DEVICE_CODE + msg << "yes" << std::endl; +#else + msg << "no" << std::endl; +#endif #endif msg << "\nRuntime Configuration:" << std::endl; #ifdef KOKKOS_ENABLE_CUDA Cuda::print_configuration(msg, detail); #endif +#ifdef KOKKOS_ENABLE_HIP + Experimental::HIP::print_configuration(msg, detail); +#endif #ifdef KOKKOS_ENABLE_OPENMP OpenMP::print_configuration(msg, detail); #endif @@ -1146,9 +1311,6 @@ void print_configuration(std::ostream& out, const bool detail) { #if defined(KOKKOS_ENABLE_THREADS) Threads::print_configuration(msg, detail); #endif -#ifdef KOKKOS_ENABLE_QTHREADS - Qthreads::print_configuration(msg, detail); -#endif #ifdef KOKKOS_ENABLE_SERIAL Serial::print_configuration(msg, detail); #endif diff --git a/core/src/impl/Kokkos_EBO.hpp b/core/src/impl/Kokkos_EBO.hpp index 5adf4bd5f16..39e855a55eb 100644 --- a/core/src/impl/Kokkos_EBO.hpp +++ b/core/src/impl/Kokkos_EBO.hpp @@ -117,21 +117,21 @@ struct EBOBaseImpl { : EBOBaseImpl(_constexpr_14_workaround_no_device_tag{}, T(std::forward(args)...)) {} - KOKKOS_FORCEINLINE_FUNCTION + KOKKOS_DEFAULTED_FUNCTION constexpr EBOBaseImpl(EBOBaseImpl const&) = default; - KOKKOS_FORCEINLINE_FUNCTION + KOKKOS_DEFAULTED_FUNCTION constexpr EBOBaseImpl(EBOBaseImpl&&) = default; - KOKKOS_FORCEINLINE_FUNCTION + KOKKOS_DEFAULTED_FUNCTION KOKKOS_CONSTEXPR_14 EBOBaseImpl& operator=(EBOBaseImpl const&) = default; - KOKKOS_FORCEINLINE_FUNCTION + KOKKOS_DEFAULTED_FUNCTION KOKKOS_CONSTEXPR_14 EBOBaseImpl& operator=(EBOBaseImpl&&) = default; - KOKKOS_FORCEINLINE_FUNCTION + KOKKOS_DEFAULTED_FUNCTION ~EBOBaseImpl() = default; KOKKOS_INLINE_FUNCTION @@ -184,21 +184,21 @@ struct EBOBaseImpl { // TODO @tasking @minor DSH noexcept in the right places? - KOKKOS_FORCEINLINE_FUNCTION + KOKKOS_DEFAULTED_FUNCTION constexpr EBOBaseImpl(EBOBaseImpl const&) = default; - KOKKOS_FORCEINLINE_FUNCTION + KOKKOS_DEFAULTED_FUNCTION constexpr EBOBaseImpl(EBOBaseImpl&&) noexcept = default; - KOKKOS_FORCEINLINE_FUNCTION + KOKKOS_DEFAULTED_FUNCTION KOKKOS_CONSTEXPR_14 EBOBaseImpl& operator=(EBOBaseImpl const&) = default; - KOKKOS_FORCEINLINE_FUNCTION + KOKKOS_DEFAULTED_FUNCTION KOKKOS_CONSTEXPR_14 EBOBaseImpl& operator=(EBOBaseImpl&&) = default; - KOKKOS_FORCEINLINE_FUNCTION + KOKKOS_DEFAULTED_FUNCTION ~EBOBaseImpl() = default; KOKKOS_INLINE_FUNCTION diff --git a/core/src/impl/Kokkos_Error.cpp b/core/src/impl/Kokkos_Error.cpp index 817c9a0581c..a42b916f805 100644 --- a/core/src/impl/Kokkos_Error.cpp +++ b/core/src/impl/Kokkos_Error.cpp @@ -129,6 +129,8 @@ void Experimental::RawMemoryAllocationFailure::print_error_message( o << "cudaMallocManaged()."; break; case AllocationMechanism::CudaHostAlloc: o << "cudaHostAlloc()."; break; + case AllocationMechanism::HIPMalloc: o << "hipMalloc()."; break; + case AllocationMechanism::HIPHostMalloc: o << "hipHostMalloc()."; break; } append_additional_error_information(o); o << ")" << std::endl; diff --git a/core/src/impl/Kokkos_Error.hpp b/core/src/impl/Kokkos_Error.hpp index 44f77710426..41be6737e7b 100644 --- a/core/src/impl/Kokkos_Error.hpp +++ b/core/src/impl/Kokkos_Error.hpp @@ -51,6 +51,9 @@ #ifdef KOKKOS_ENABLE_CUDA #include #endif +#ifdef KOKKOS_ENABLE_HIP +#include +#endif #ifndef KOKKOS_ABORT_MESSAGE_BUFFER_SIZE #define KOKKOS_ABORT_MESSAGE_BUFFER_SIZE 2048 @@ -87,7 +90,9 @@ class RawMemoryAllocationFailure : public std::bad_alloc { IntelMMAlloc, CudaMalloc, CudaMallocManaged, - CudaHostAlloc + CudaHostAlloc, + HIPMalloc, + HIPHostMalloc }; private: @@ -124,7 +129,7 @@ class RawMemoryAllocationFailure : public std::bad_alloc { const char *what() const noexcept override { if (m_failure_mode == FailureMode::OutOfMemoryError) { return "Memory allocation error: out of memory"; - } else if (m_failure_mode == FailureMode::OutOfMemoryError) { + } else if (m_failure_mode == FailureMode::AllocationNotAligned) { return "Memory allocation error: allocation result was under-aligned"; } @@ -164,11 +169,11 @@ KOKKOS_INLINE_FUNCTION void abort(const char *const message) { #if defined(KOKKOS_ENABLE_CUDA) && defined(__CUDA_ARCH__) Kokkos::Impl::cuda_abort(message); -#else -#if !defined(KOKKOS_ENABLE_OPENMPTARGET) && !defined(__HCC_ACCELERATOR__) +#elif defined(KOKKOS_ENABLE_HIP) && defined(__HIP_DEVICE_COMPILE__) + Kokkos::Impl::hip_abort(message); +#elif !defined(KOKKOS_ENABLE_OPENMPTARGET) && !defined(__HCC_ACCELERATOR__) Kokkos::Impl::host_abort(message); #endif -#endif } } // namespace Kokkos diff --git a/core/src/impl/Kokkos_FixedBufferMemoryPool.hpp b/core/src/impl/Kokkos_FixedBufferMemoryPool.hpp index a304e8eff26..2651229a706 100644 --- a/core/src/impl/Kokkos_FixedBufferMemoryPool.hpp +++ b/core/src/impl/Kokkos_FixedBufferMemoryPool.hpp @@ -126,18 +126,19 @@ class FixedBlockSizeMemoryPool actual_size) { /* forwarding ctor, must be empty */ } - KOKKOS_INLINE_FUNCTION FixedBlockSizeMemoryPool() = default; - KOKKOS_INLINE_FUNCTION FixedBlockSizeMemoryPool(FixedBlockSizeMemoryPool&&) = - default; - KOKKOS_INLINE_FUNCTION FixedBlockSizeMemoryPool( - FixedBlockSizeMemoryPool const&) = default; - KOKKOS_INLINE_FUNCTION FixedBlockSizeMemoryPool& operator=( + KOKKOS_DEFAULTED_FUNCTION FixedBlockSizeMemoryPool() = default; + KOKKOS_DEFAULTED_FUNCTION FixedBlockSizeMemoryPool( FixedBlockSizeMemoryPool&&) = default; - KOKKOS_INLINE_FUNCTION FixedBlockSizeMemoryPool& operator=( + KOKKOS_DEFAULTED_FUNCTION FixedBlockSizeMemoryPool( + FixedBlockSizeMemoryPool const&) = default; + KOKKOS_DEFAULTED_FUNCTION FixedBlockSizeMemoryPool& operator=( + FixedBlockSizeMemoryPool&&) = default; + KOKKOS_DEFAULTED_FUNCTION FixedBlockSizeMemoryPool& operator=( FixedBlockSizeMemoryPool const&) = default; KOKKOS_INLINE_FUNCTION void* allocate(size_type alloc_size) const noexcept { + (void)alloc_size; KOKKOS_EXPECTS(alloc_size <= Size); auto free_idx_counter = Kokkos::atomic_fetch_add( (volatile size_type*)&m_first_free_idx, size_type(1)); @@ -161,7 +162,7 @@ class FixedBlockSizeMemoryPool } KOKKOS_INLINE_FUNCTION - void deallocate(void* ptr, size_type alloc_size) const noexcept { + void deallocate(void* ptr, size_type /*alloc_size*/) const noexcept { // figure out which block we are auto offset = intptr_t(ptr) - intptr_t(m_first_block); @@ -250,11 +251,11 @@ class FixedBlockSizeChaseLevMemoryPool ) : FixedBlockSizeMemoryPool(mem_space, mempool_capacity / actual_size) { /* forwarding ctor, must be empty */ } - KOKKOS_INLINE_FUNCTION FixedBlockSizeMemoryPool() = default; - KOKKOS_INLINE_FUNCTION FixedBlockSizeMemoryPool(FixedBlockSizeMemoryPool&&) = default; - KOKKOS_INLINE_FUNCTION FixedBlockSizeMemoryPool(FixedBlockSizeMemoryPool const&) = default; - KOKKOS_INLINE_FUNCTION FixedBlockSizeMemoryPool& operator=(FixedBlockSizeMemoryPool&&) = default; - KOKKOS_INLINE_FUNCTION FixedBlockSizeMemoryPool& operator=(FixedBlockSizeMemoryPool const&) = default; + KOKKOS_DEFAULTED_FUNCTION FixedBlockSizeMemoryPool() = default; + KOKKOS_DEFAULTED_FUNCTION FixedBlockSizeMemoryPool(FixedBlockSizeMemoryPool&&) = default; + KOKKOS_DEFAULTED_FUNCTION FixedBlockSizeMemoryPool(FixedBlockSizeMemoryPool const&) = default; + KOKKOS_DEFAULTED_FUNCTION FixedBlockSizeMemoryPool& operator=(FixedBlockSizeMemoryPool&&) = default; + KOKKOS_DEFAULTED_FUNCTION FixedBlockSizeMemoryPool& operator=(FixedBlockSizeMemoryPool const&) = default; KOKKOS_INLINE_FUNCTION diff --git a/core/src/impl/Kokkos_FunctorAdapter.hpp b/core/src/impl/Kokkos_FunctorAdapter.hpp index e981e903664..b777dac0217 100644 --- a/core/src/impl/Kokkos_FunctorAdapter.hpp +++ b/core/src/impl/Kokkos_FunctorAdapter.hpp @@ -64,7 +64,7 @@ struct ReduceFunctorHasInit { template struct ReduceFunctorHasInit< FunctorType, - typename Impl::enable_if<0 < sizeof(&FunctorType::init)>::type> { + typename std::enable_if<0 < sizeof(&FunctorType::init)>::type> { enum { value = true }; }; @@ -76,7 +76,7 @@ struct ReduceFunctorHasJoin { template struct ReduceFunctorHasJoin< FunctorType, - typename Impl::enable_if<0 < sizeof(&FunctorType::join)>::type> { + typename std::enable_if<0 < sizeof(&FunctorType::join)>::type> { enum { value = true }; }; @@ -88,7 +88,7 @@ struct ReduceFunctorHasFinal { template struct ReduceFunctorHasFinal< FunctorType, - typename Impl::enable_if<0 < sizeof(&FunctorType::final)>::type> { + typename std::enable_if<0 < sizeof(&FunctorType::final)>::type> { enum { value = true }; }; @@ -100,18 +100,18 @@ struct ReduceFunctorHasShmemSize { template struct ReduceFunctorHasShmemSize< FunctorType, - typename Impl::enable_if<0 < sizeof(&FunctorType::team_shmem_size)>::type> { + typename std::enable_if<0 < sizeof(&FunctorType::team_shmem_size)>::type> { enum { value = true }; }; template -struct FunctorDeclaresValueType : public Impl::false_type {}; +struct FunctorDeclaresValueType : public std::false_type {}; template struct FunctorDeclaresValueType< FunctorType, ArgTag, typename Impl::enable_if_type::type> - : public Impl::true_type {}; + : public std::true_type {}; template ::value) || @@ -174,18 +174,19 @@ struct FunctorValueTraits { template struct FunctorValueTraits { - typedef typename Impl::remove_extent::type + typedef typename std::remove_extent::type value_type; typedef FunctorType functor_type; - static_assert(0 == (sizeof(value_type) % sizeof(int)), + static_assert((sizeof(value_type) < sizeof(int)) || + 0 == (sizeof(value_type) % sizeof(int)), "Reduction functor's declared value_type requires: 0 == " "sizeof(value_type) % sizeof(int)"); /* this cast to bool is needed for correctness by NVCC */ enum : bool { IsArray = static_cast( - Impl::is_array::value) + std::is_array::value) }; // If not an array then what is the sizeof(value_type) @@ -202,8 +203,8 @@ struct FunctorValueTraits KOKKOS_FORCEINLINE_FUNCTION static - typename Impl::enable_if::value && !IsArray, - unsigned>::type + typename std::enable_if::value && !IsArray, + unsigned>::type value_count(const F&) { return 1; } @@ -213,8 +214,8 @@ struct FunctorValueTraits KOKKOS_FORCEINLINE_FUNCTION static - typename Impl::enable_if::value && IsArray, - unsigned>::type + typename std::enable_if::value && IsArray, + unsigned>::type value_count(const F& f) { return f.value_count; } @@ -1412,8 +1413,10 @@ struct FunctorValueInit< // First substitution failure when FunctorType::init does not exist. // Second substitution failure when FunctorType::init is not compatible. , - decltype(FunctorValueInitFunction::enable_if( - &FunctorType::init))> { + typename std::enable_if< + !std::is_same::value, + decltype(FunctorValueInitFunction::enable_if( + &FunctorType::init))>::type> { KOKKOS_FORCEINLINE_FUNCTION static T& init(const FunctorType& f, void* p) { f.init(ArgTag(), *((T*)p)); return *((T*)p); @@ -1428,8 +1431,10 @@ struct FunctorValueInit< // First substitution failure when FunctorType::init does not exist. // Second substitution failure when FunctorType::init is not compatible , - decltype(FunctorValueInitFunction::enable_if( - &FunctorType::init))> { + typename std::enable_if< + !std::is_same::value, + decltype(FunctorValueInitFunction::enable_if( + &FunctorType::init))>::type> { KOKKOS_FORCEINLINE_FUNCTION static T* init(const FunctorType& f, void* p) { f.init(ArgTag(), (T*)p); return (T*)p; @@ -1525,7 +1530,7 @@ struct FunctorValueJoin { KOKKOS_FORCEINLINE_FUNCTION FunctorValueJoin(const FunctorType&) {} - KOKKOS_FORCEINLINE_FUNCTION static void join(const FunctorType& f, + KOKKOS_FORCEINLINE_FUNCTION static void join(const FunctorType& /*f*/, volatile void* const lhs, const volatile void* const rhs) { *((volatile T*)lhs) += *((const volatile T*)rhs); @@ -1760,8 +1765,8 @@ template struct JoinAdd { typedef ValueType value_type; - KOKKOS_INLINE_FUNCTION - JoinAdd() {} + KOKKOS_DEFAULTED_FUNCTION + JoinAdd() = default; KOKKOS_INLINE_FUNCTION void join(volatile value_type& dst, const volatile value_type& src) const { diff --git a/core/src/impl/Kokkos_FunctorAnalysis.hpp b/core/src/impl/Kokkos_FunctorAnalysis.hpp index a8f3b5042ac..827a9f346d3 100644 --- a/core/src/impl/Kokkos_FunctorAnalysis.hpp +++ b/core/src/impl/Kokkos_FunctorAnalysis.hpp @@ -797,7 +797,7 @@ struct FunctorAnalysis { using rebind = Reducer; KOKKOS_INLINE_FUNCTION explicit constexpr Reducer( - Functor const* arg_functor = 0, ValueType* arg_value = 0) noexcept + Functor const* arg_functor = 0, ValueType* arg_value = nullptr) noexcept : m_functor(arg_functor), m_result(arg_value) {} }; }; diff --git a/core/src/impl/Kokkos_HBWSpace.cpp b/core/src/impl/Kokkos_HBWSpace.cpp index 8a83aef4c9f..9b5bee22791 100644 --- a/core/src/impl/Kokkos_HBWSpace.cpp +++ b/core/src/impl/Kokkos_HBWSpace.cpp @@ -136,8 +136,8 @@ void *HBWSpace::allocate(const size_t arg_alloc_size) const { case STD_MALLOC: msg << "STD_MALLOC"; break; } msg << " ]( " << arg_alloc_size << " ) FAILED"; - if (ptr == NULL) { - msg << " NULL"; + if (ptr == nullptr) { + msg << " nullptr"; } else { msg << " NOT ALIGNED " << ptr; } @@ -181,7 +181,12 @@ void SharedAllocationRecord::deallocate( } SharedAllocationRecord::~SharedAllocationRecord() { + void>::~SharedAllocationRecord() +#if defined( \ + KOKKOS_IMPL_INTEL_WORKAROUND_NOEXCEPT_SPECIFICATION_VIRTUAL_FUNCTION) + noexcept +#endif +{ #if defined(KOKKOS_ENABLE_PROFILING) if (Kokkos::Profiling::profileLibraryLoaded()) { Kokkos::Profiling::deallocateData( diff --git a/core/src/impl/Kokkos_HostBarrier.cpp b/core/src/impl/Kokkos_HostBarrier.cpp index 0b96add432f..55d70985dcd 100644 --- a/core/src/impl/Kokkos_HostBarrier.cpp +++ b/core/src/impl/Kokkos_HostBarrier.cpp @@ -92,7 +92,7 @@ void HostBarrier::impl_backoff_wait_until_equal( #endif } #else // _WIN32 - while (!try_wait()) { + while (!test_equal(ptr, v)) { #if defined(KOKKOS_ENABLE_ASM) for (int j = 0; j < num_nops; ++j) { __asm__ __volatile__("nop\n"); diff --git a/core/src/impl/Kokkos_HostBarrier.hpp b/core/src/impl/Kokkos_HostBarrier.hpp index 923fbc17039..4b9235ab702 100644 --- a/core/src/impl/Kokkos_HostBarrier.hpp +++ b/core/src/impl/Kokkos_HostBarrier.hpp @@ -70,7 +70,7 @@ namespace Impl { // called split_release // // The purporse of the split functions is to allow the last thread to arrive -// an opprotunity to perform some actions before releasing the waiting threads +// an opportunity to perform some actions before releasing the waiting threads // // If all threads have arrived (and split_release has been call if using // split_arrive) before a wait type call, the wait may return quickly @@ -235,6 +235,7 @@ class HostBarrier { impl_backoff_wait_until_equal(ptr, v, active_wait); } #else + (void)active_wait; while (!test_equal(ptr, v)) { } #endif diff --git a/core/src/impl/Kokkos_HostSpace.cpp b/core/src/impl/Kokkos_HostSpace.cpp index 94490b83001..59d14e53922 100644 --- a/core/src/impl/Kokkos_HostSpace.cpp +++ b/core/src/impl/Kokkos_HostSpace.cpp @@ -221,18 +221,19 @@ void *HostSpace::allocate(const size_t arg_alloc_size) const { // read write access to private memory - ptr = mmap(NULL /* address hint, if NULL OS kernel chooses address */ - , - arg_alloc_size /* size in bytes */ - , - prot /* memory protection */ - , - flags /* visibility of updates */ - , - -1 /* file descriptor */ - , - 0 /* offset */ - ); + ptr = + mmap(nullptr /* address hint, if nullptr OS kernel chooses address */ + , + arg_alloc_size /* size in bytes */ + , + prot /* memory protection */ + , + flags /* visibility of updates */ + , + -1 /* file descriptor */ + , + 0 /* offset */ + ); /* Associated reallocation: ptr = mremap( old_ptr , old_size , new_size , MREMAP_MAYMOVE ); @@ -325,7 +326,12 @@ void SharedAllocationRecord::deallocate( delete static_cast(arg_rec); } -SharedAllocationRecord::~SharedAllocationRecord() { +SharedAllocationRecord::~SharedAllocationRecord() +#if defined( \ + KOKKOS_IMPL_INTEL_WORKAROUND_NOEXCEPT_SPECIFICATION_VIRTUAL_FUNCTION) + noexcept +#endif +{ #if defined(KOKKOS_ENABLE_PROFILING) if (Kokkos::Profiling::profileLibraryLoaded()) { Kokkos::Profiling::deallocateData( @@ -397,7 +403,7 @@ SharedAllocationRecord::SharedAllocationRecord( void *SharedAllocationRecord::allocate_tracked( const Kokkos::HostSpace &arg_space, const std::string &arg_alloc_label, const size_t arg_alloc_size) { - if (!arg_alloc_size) return (void *)nullptr; + if (!arg_alloc_size) return nullptr; SharedAllocationRecord *const r = allocate(arg_space, arg_alloc_label, arg_alloc_size); @@ -409,7 +415,7 @@ void *SharedAllocationRecord::allocate_tracked( void SharedAllocationRecord::deallocate_tracked( void *const arg_alloc_ptr) { - if (arg_alloc_ptr != 0) { + if (arg_alloc_ptr != nullptr) { SharedAllocationRecord *const r = get_record(arg_alloc_ptr); RecordBase::decrement(r); @@ -437,9 +443,9 @@ SharedAllocationRecord::get_record(void *alloc_ptr) { typedef SharedAllocationRecord RecordHost; SharedAllocationHeader const *const head = - alloc_ptr ? Header::get_header(alloc_ptr) : (SharedAllocationHeader *)0; + alloc_ptr ? Header::get_header(alloc_ptr) : nullptr; RecordHost *const record = - head ? static_cast(head->m_record) : (RecordHost *)0; + head ? static_cast(head->m_record) : nullptr; if (!alloc_ptr || record->m_alloc_ptr != head) { Kokkos::Impl::throw_runtime_exception( diff --git a/core/src/impl/Kokkos_HostThreadTeam.cpp b/core/src/impl/Kokkos_HostThreadTeam.cpp index ff8a9759ba7..2e5587e4a34 100644 --- a/core/src/impl/Kokkos_HostThreadTeam.cpp +++ b/core/src/impl/Kokkos_HostThreadTeam.cpp @@ -62,7 +62,8 @@ void HostThreadTeamData::organize_pool(HostThreadTeamData *members[], // Verify not already a member of a pool: for (int rank = 0; rank < size && ok; ++rank) { - ok = (nullptr != members[rank]) && (0 == members[rank]->m_pool_scratch); + ok = (nullptr != members[rank]) && + (nullptr == members[rank]->m_pool_scratch); } if (ok) { @@ -106,8 +107,8 @@ void HostThreadTeamData::organize_pool(HostThreadTeamData *members[], void HostThreadTeamData::disband_pool() { m_work_range.first = -1; m_work_range.second = -1; - m_pool_scratch = 0; - m_team_scratch = 0; + m_pool_scratch = nullptr; + m_team_scratch = nullptr; m_pool_rank = 0; m_pool_size = 1; m_team_base = 0; @@ -121,7 +122,7 @@ void HostThreadTeamData::disband_pool() { int HostThreadTeamData::organize_team(const int team_size) { // Pool is initialized - const bool ok_pool = 0 != m_pool_scratch; + const bool ok_pool = nullptr != m_pool_scratch; // Team is not set const bool ok_team = @@ -240,7 +241,7 @@ int HostThreadTeamData::get_work_stealing() noexcept { HostThreadTeamData *const *const pool = (HostThreadTeamData **)(m_pool_scratch + m_pool_members); - // Attempt from begining failed, try to steal from end of neighbor + // Attempt from beginning failed, try to steal from end of neighbor pair_int_t volatile *steal_range = &(pool[m_steal_rank]->m_work_range); diff --git a/core/src/impl/Kokkos_HostThreadTeam.hpp b/core/src/impl/Kokkos_HostThreadTeam.hpp index fce665f6b15..9e4dda941b8 100644 --- a/core/src/impl/Kokkos_HostThreadTeam.hpp +++ b/core/src/impl/Kokkos_HostThreadTeam.hpp @@ -53,7 +53,8 @@ #include #include -#include // std::numeric_limits +#include // std::numeric_limits +#include // std::max //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- @@ -188,9 +189,9 @@ class HostThreadTeamData { constexpr HostThreadTeamData() noexcept : m_work_range(-1, -1), m_work_end(0), - m_scratch(0), - m_pool_scratch(0), - m_team_scratch(0), + m_scratch(nullptr), + m_pool_scratch(nullptr), + m_team_scratch(nullptr), m_pool_rank(0), m_pool_size(1), m_team_reduce(0), @@ -538,6 +539,8 @@ class HostThreadTeamMember { } #else { + (void)value; + (void)source_team_rank; Kokkos::abort("HostThreadTeamMember team_broadcast\n"); } #endif @@ -574,6 +577,9 @@ class HostThreadTeamMember { } #else { + (void)f; + (void)value; + (void)source_team_rank; Kokkos::abort("HostThreadTeamMember team_broadcast\n"); } #endif @@ -640,6 +646,8 @@ class HostThreadTeamMember { } #else { + (void)reducer; + (void)contribution; Kokkos::abort("HostThreadTeamMember team_reduce\n"); } #endif @@ -693,8 +701,8 @@ class HostThreadTeamMember { #endif*/ template - KOKKOS_INLINE_FUNCTION T team_scan(T const& value, T* const global = 0) const - noexcept + KOKKOS_INLINE_FUNCTION T team_scan(T const& value, + T* const global = nullptr) const noexcept #if defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST) { if (0 != m_data.m_team_rank) { @@ -751,6 +759,8 @@ class HostThreadTeamMember { } #else { + (void)value; + (void)global; Kokkos::abort("HostThreadTeamMember team_scan\n"); return T(); } diff --git a/core/src/impl/Kokkos_LIFO.hpp b/core/src/impl/Kokkos_LIFO.hpp index 67e9af5c402..683c5c9b18b 100644 --- a/core/src/impl/Kokkos_LIFO.hpp +++ b/core/src/impl/Kokkos_LIFO.hpp @@ -48,8 +48,7 @@ #define KOKKOS_IMPL_LIFO_HPP #include -#ifdef KOKKOS_ENABLE_TASKDAG // Note: implies CUDA_VERSION >= 8000 if using - // CUDA +#ifdef KOKKOS_ENABLE_TASKDAG #include @@ -305,7 +304,7 @@ class SingleConsumeOperationLIFO : private LockBasedLIFOCommon { public: using value_type = typename base_t::value_type; // = T - KOKKOS_INLINE_FUNCTION + KOKKOS_DEFAULTED_FUNCTION SingleConsumeOperationLIFO() noexcept = default; SingleConsumeOperationLIFO(SingleConsumeOperationLIFO const&) = delete; @@ -314,7 +313,7 @@ class SingleConsumeOperationLIFO : private LockBasedLIFOCommon { delete; SingleConsumeOperationLIFO& operator=(SingleConsumeOperationLIFO&&) = delete; - KOKKOS_INLINE_FUNCTION + KOKKOS_DEFAULTED_FUNCTION ~SingleConsumeOperationLIFO() = default; KOKKOS_INLINE_FUNCTION diff --git a/core/src/impl/Kokkos_LinkedListNode.hpp b/core/src/impl/Kokkos_LinkedListNode.hpp index 1b7918ed788..79aeca5da06 100644 --- a/core/src/impl/Kokkos_LinkedListNode.hpp +++ b/core/src/impl/Kokkos_LinkedListNode.hpp @@ -48,8 +48,7 @@ #define KOKKOS_IMPL_LINKEDLISTNODE_HPP #include -#ifdef KOKKOS_ENABLE_TASKDAG // Note: implies CUDA_VERSION >= 8000 if using - // CUDA +#ifdef KOKKOS_ENABLE_TASKDAG #include diff --git a/core/src/impl/Kokkos_MemoryPoolAllocator.hpp b/core/src/impl/Kokkos_MemoryPoolAllocator.hpp index 54f91eb68d0..2218405766c 100644 --- a/core/src/impl/Kokkos_MemoryPoolAllocator.hpp +++ b/core/src/impl/Kokkos_MemoryPoolAllocator.hpp @@ -65,17 +65,17 @@ class MemoryPoolAllocator { memory_pool m_pool; public: - KOKKOS_INLINE_FUNCTION + KOKKOS_DEFAULTED_FUNCTION MemoryPoolAllocator() = default; - KOKKOS_INLINE_FUNCTION + KOKKOS_DEFAULTED_FUNCTION MemoryPoolAllocator(MemoryPoolAllocator const&) = default; - KOKKOS_INLINE_FUNCTION + KOKKOS_DEFAULTED_FUNCTION MemoryPoolAllocator(MemoryPoolAllocator&&) = default; - KOKKOS_INLINE_FUNCTION + KOKKOS_DEFAULTED_FUNCTION MemoryPoolAllocator& operator=(MemoryPoolAllocator const&) = default; - KOKKOS_INLINE_FUNCTION + KOKKOS_DEFAULTED_FUNCTION MemoryPoolAllocator& operator=(MemoryPoolAllocator&&) = default; - KOKKOS_INLINE_FUNCTION + KOKKOS_DEFAULTED_FUNCTION ~MemoryPoolAllocator() = default; KOKKOS_INLINE_FUNCTION diff --git a/core/src/impl/Kokkos_Memory_Fence.hpp b/core/src/impl/Kokkos_Memory_Fence.hpp index f8bef72b77f..eae14a92d58 100644 --- a/core/src/impl/Kokkos_Memory_Fence.hpp +++ b/core/src/impl/Kokkos_Memory_Fence.hpp @@ -55,6 +55,8 @@ void memory_fence() { __threadfence(); #elif defined(KOKKOS_ENABLE_ROCM_ATOMICS) amp_barrier(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE); +#elif defined(KOKKOS_ENABLE_HIP_ATOMICS) + __threadfence(); #elif defined(KOKKOS_ENABLE_ASM) && defined(KOKKOS_ENABLE_ISA_X86_64) asm volatile("mfence" ::: "memory"); #elif defined(KOKKOS_ENABLE_GNU_ATOMICS) || \ diff --git a/core/src/impl/Kokkos_MultipleTaskQueue.hpp b/core/src/impl/Kokkos_MultipleTaskQueue.hpp index 675547ba5e8..fe78cfbacc6 100644 --- a/core/src/impl/Kokkos_MultipleTaskQueue.hpp +++ b/core/src/impl/Kokkos_MultipleTaskQueue.hpp @@ -125,7 +125,7 @@ struct MultipleTaskQueueTeamEntry { KOKKOS_INLINE_FUNCTION OptionalRef _pop_failed_insertion( int priority, TaskType type, typename std::enable_if< - task_queue_traits::ready_queue_insertion_may_fail and + task_queue_traits::ready_queue_insertion_may_fail && std::is_void<_always_void>::value, void*>::type = nullptr) { auto* rv_ptr = m_failed_heads[priority][(int)type]; @@ -142,9 +142,9 @@ struct MultipleTaskQueueTeamEntry { template KOKKOS_INLINE_FUNCTION OptionalRef _pop_failed_insertion( - int priority, TaskType type, + int /*priority*/, TaskType /*type*/, typename std::enable_if< - not task_queue_traits::ready_queue_insertion_may_fail and + !task_queue_traits::ready_queue_insertion_may_fail && std::is_void<_always_void>::value, void*>::type = nullptr) { return OptionalRef{nullptr}; @@ -181,13 +181,13 @@ struct MultipleTaskQueueTeamEntry { auto return_value = OptionalRef{}; for (int i_priority = 0; i_priority < NumPriorities; ++i_priority) { return_value = _pop_failed_insertion(i_priority, TaskTeam); - if (not return_value) + if (!return_value) return_value = m_ready_queues[i_priority][TaskTeam].pop(); if (return_value) return return_value; // Check for a single task with this priority return_value = _pop_failed_insertion(i_priority, TaskSingle); - if (not return_value) + if (!return_value) return_value = m_ready_queues[i_priority][TaskSingle].pop(); if (return_value) return return_value; } @@ -203,7 +203,7 @@ struct MultipleTaskQueueTeamEntry { KOKKOS_INLINE_FUNCTION void do_handle_failed_insertion( runnable_task_base_type&& task, typename std::enable_if< - task_queue_traits::ready_queue_insertion_may_fail and + task_queue_traits::ready_queue_insertion_may_fail && std::is_void<_always_void>::value, void*>::type = nullptr) { // failed insertions, if they happen, must be from the only thread that @@ -217,9 +217,9 @@ struct MultipleTaskQueueTeamEntry { template KOKKOS_INLINE_FUNCTION void do_handle_failed_insertion( - runnable_task_base_type&& task, + runnable_task_base_type&& /*task*/, typename std::enable_if< - not task_queue_traits::ready_queue_insertion_may_fail and + !task_queue_traits::ready_queue_insertion_may_fail && std::is_void<_always_void>::value, void*>::type = nullptr) { Kokkos::abort("should be unreachable!"); @@ -229,7 +229,7 @@ struct MultipleTaskQueueTeamEntry { KOKKOS_INLINE_FUNCTION void flush_failed_insertions( int priority, int task_type, typename std::enable_if< - task_queue_traits::ready_queue_insertion_may_fail and + task_queue_traits::ready_queue_insertion_may_fail && std::is_void<_always_void>::value, // just to make this dependent // on template parameter int>::type = 0) { @@ -258,7 +258,7 @@ struct MultipleTaskQueueTeamEntry { KOKKOS_INLINE_FUNCTION void flush_failed_insertions( int, int, typename std::enable_if< - not task_queue_traits::ready_queue_insertion_may_fail and + !task_queue_traits::ready_queue_insertion_may_fail && std::is_void<_always_void>::value, // just to make this dependent // on template parameter int>::type = 0) {} @@ -335,22 +335,22 @@ class MultipleTaskQueue final constexpr explicit SchedulerInfo(team_queue_id_t association) noexcept : team_association(association) {} - KOKKOS_INLINE_FUNCTION + KOKKOS_DEFAULTED_FUNCTION SchedulerInfo() = default; - KOKKOS_INLINE_FUNCTION + KOKKOS_DEFAULTED_FUNCTION SchedulerInfo(SchedulerInfo const&) = default; - KOKKOS_INLINE_FUNCTION + KOKKOS_DEFAULTED_FUNCTION SchedulerInfo(SchedulerInfo&&) = default; - KOKKOS_INLINE_FUNCTION + KOKKOS_DEFAULTED_FUNCTION SchedulerInfo& operator=(SchedulerInfo const&) = default; - KOKKOS_INLINE_FUNCTION + KOKKOS_DEFAULTED_FUNCTION SchedulerInfo& operator=(SchedulerInfo&&) = default; - KOKKOS_INLINE_FUNCTION + KOKKOS_DEFAULTED_FUNCTION ~SchedulerInfo() = default; }; @@ -438,7 +438,7 @@ class MultipleTaskQueue final return_value = team_queue_info.pop_ready_task(); - if (not return_value) { + if (!return_value) { // loop through the rest of the teams and try to steal for (auto isteal = (team_association + 1) % this->n_queues(); isteal != team_association; @@ -484,32 +484,32 @@ class MultipleTaskQueue final // Provide a sensible default that can be overridden KOKKOS_INLINE_FUNCTION void update_scheduling_info_from_completed_predecessor( - runnable_task_base_type& ready_task, - runnable_task_base_type const& predecessor) const { + runnable_task_base_type& /*ready_task*/, + runnable_task_base_type const& /*predecessor*/) const { // Do nothing; we're using the extra storage for the failure linked list } // Provide a sensible default that can be overridden KOKKOS_INLINE_FUNCTION void update_scheduling_info_from_completed_predecessor( - aggregate_task_type& aggregate, - runnable_task_base_type const& predecessor) const { + aggregate_task_type& /*aggregate*/, + runnable_task_base_type const& /*predecessor*/) const { // Do nothing; we're using the extra storage for the failure linked list } // Provide a sensible default that can be overridden KOKKOS_INLINE_FUNCTION void update_scheduling_info_from_completed_predecessor( - aggregate_task_type& aggregate, - aggregate_task_type const& predecessor) const { + aggregate_task_type& /*aggregate*/, + aggregate_task_type const& /*predecessor*/) const { // Do nothing; we're using the extra storage for the failure linked list } // Provide a sensible default that can be overridden KOKKOS_INLINE_FUNCTION void update_scheduling_info_from_completed_predecessor( - runnable_task_base_type& ready_task, - aggregate_task_type const& predecessor) const { + runnable_task_base_type& /*ready_task*/, + aggregate_task_type const& /*predecessor*/) const { // Do nothing; we're using the extra storage for the failure linked list } diff --git a/core/src/impl/Kokkos_OldMacros.hpp b/core/src/impl/Kokkos_OldMacros.hpp index 6b1e8b63cce..fbb921d7f26 100644 --- a/core/src/impl/Kokkos_OldMacros.hpp +++ b/core/src/impl/Kokkos_OldMacros.hpp @@ -245,12 +245,6 @@ #endif #endif -#ifdef KOKKOS_HAVE_QTHREADS -#ifndef KOKKOS_ENABLE_QTHREADS -#define KOKKOS_ENABLE_QTHREADS KOKKOS_HAVE_QTHREADS -#endif -#endif - #ifdef KOKKOS_HAVE_SERIAL #ifndef KOKKOS_ENABLE_SERIAL #define KOKKOS_ENABLE_SERIAL KOKKOS_HAVE_SERIAL @@ -474,10 +468,6 @@ #define KOKKOS_HAVE_PTHREAD 1 #endif -#if (!defined(KOKKOS_HAVE_QTHREADS)) && defined(KOKKOS_ENABLE_QTHREADS) -#define KOKKOS_HAVE_QTHREADS 1 -#endif - #if (!defined(KOKKOS_HAVE_SERIAL)) && defined(KOKKOS_ENABLE_SERIAL) #define KOKKOS_HAVE_SERIAL 1 #endif diff --git a/core/src/impl/Kokkos_OptionalRef.hpp b/core/src/impl/Kokkos_OptionalRef.hpp index 668e7c09161..12f6c9f5fdb 100644 --- a/core/src/impl/Kokkos_OptionalRef.hpp +++ b/core/src/impl/Kokkos_OptionalRef.hpp @@ -69,17 +69,22 @@ struct OptionalRef { public: using value_type = T; - KOKKOS_INLINE_FUNCTION + KOKKOS_DEFAULTED_FUNCTION OptionalRef() = default; - KOKKOS_INLINE_FUNCTION + KOKKOS_DEFAULTED_FUNCTION OptionalRef(OptionalRef const&) = default; - KOKKOS_INLINE_FUNCTION + KOKKOS_DEFAULTED_FUNCTION OptionalRef(OptionalRef&&) = default; KOKKOS_INLINE_FUNCTION - OptionalRef& operator=(OptionalRef const&) = default; + // MSVC requires that this copy constructor is not defaulted + // if there exists a (non-defaulted) volatile one. + OptionalRef& operator=(OptionalRef const& other) noexcept { + m_value = other.m_value; + return *this; + } KOKKOS_INLINE_FUNCTION // Can't return a reference to volatile OptionalRef, since GCC issues a @@ -88,10 +93,10 @@ struct OptionalRef { m_value = other.m_value; } - KOKKOS_INLINE_FUNCTION + KOKKOS_DEFAULTED_FUNCTION OptionalRef& operator=(OptionalRef&&) = default; - KOKKOS_INLINE_FUNCTION + KOKKOS_DEFAULTED_FUNCTION ~OptionalRef() = default; KOKKOS_INLINE_FUNCTION diff --git a/core/src/impl/Kokkos_Profiling_DeviceInfo.hpp b/core/src/impl/Kokkos_Profiling_DeviceInfo.hpp index e35ff84e978..51d1446ef5c 100644 --- a/core/src/impl/Kokkos_Profiling_DeviceInfo.hpp +++ b/core/src/impl/Kokkos_Profiling_DeviceInfo.hpp @@ -1,45 +1,45 @@ /* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 + //@HEADER + // ************************************************************************ + // + // Kokkos v. 3.0 // Copyright (2020) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER + // + // Under the terms of Contract DE-NA0003525 with NTESS, + // the U.S. Government retains certain rights in this software. + // + // Redistribution and use in source and binary forms, with or without + // modification, are permitted provided that the following conditions are + // met: + // + // 1. Redistributions of source code must retain the above copyright + // notice, this list of conditions and the following disclaimer. + // + // 2. Redistributions in binary form must reproduce the above copyright + // notice, this list of conditions and the following disclaimer in the + // documentation and/or other materials provided with the distribution. + // + // 3. Neither the name of the Corporation nor the names of the + // contributors may be used to endorse or promote products derived from + // this software without specific prior written permission. + // + // THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY + // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE + // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + // + // Questions? Contact Christian R. Trott (crtrott@sandia.gov) + // + // ************************************************************************ + //@HEADER */ #ifndef KOKKOSP_DEVICE_INFO_HPP diff --git a/core/src/impl/Kokkos_Profiling_Interface.cpp b/core/src/impl/Kokkos_Profiling_Interface.cpp index a069a695fc4..cf52caea905 100644 --- a/core/src/impl/Kokkos_Profiling_Interface.cpp +++ b/core/src/impl/Kokkos_Profiling_Interface.cpp @@ -1,46 +1,46 @@ /* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 + //@HEADER + // ************************************************************************ + // + // Kokkos v. 3.0 // Copyright (2020) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ + // + // Under the terms of Contract DE-NA0003525 with NTESS, + // the U.S. Government retains certain rights in this software. + // + // Redistribution and use in source and binary forms, with or without + // modification, are permitted provided that the following conditions are + // met: + // + // 1. Redistributions of source code must retain the above copyright + // notice, this list of conditions and the following disclaimer. + // + // 2. Redistributions in binary form must reproduce the above copyright + // notice, this list of conditions and the following disclaimer in the + // documentation and/or other materials provided with the distribution. + // + // 3. Neither the name of the Corporation nor the names of the + // contributors may be used to endorse or promote products derived from + // this software without specific prior written permission. + // + // THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY + // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE + // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + // + // Questions? Contact Christian R. Trott (crtrott@sandia.gov) + // + // ************************************************************************ + //@HEADER + */ #include diff --git a/core/src/impl/Kokkos_Profiling_Interface.hpp b/core/src/impl/Kokkos_Profiling_Interface.hpp index ac37401e108..df17501ff40 100644 --- a/core/src/impl/Kokkos_Profiling_Interface.hpp +++ b/core/src/impl/Kokkos_Profiling_Interface.hpp @@ -1,46 +1,46 @@ /* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 + //@HEADER + // ************************************************************************ + // + // Kokkos v. 3.0 // Copyright (2020) National Technology & Engineering // Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ + // + // Under the terms of Contract DE-NA0003525 with NTESS, + // the U.S. Government retains certain rights in this software. + // + // Redistribution and use in source and binary forms, with or without + // modification, are permitted provided that the following conditions are + // met: + // + // 1. Redistributions of source code must retain the above copyright + // notice, this list of conditions and the following disclaimer. + // + // 2. Redistributions in binary form must reproduce the above copyright + // notice, this list of conditions and the following disclaimer in the + // documentation and/or other materials provided with the distribution. + // + // 3. Neither the name of the Corporation nor the names of the + // contributors may be used to endorse or promote products derived from + // this software without specific prior written permission. + // + // THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY + // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE + // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + // + // Questions? Contact Christian R. Trott (crtrott@sandia.gov) + // + // ************************************************************************ + //@HEADER + */ #ifndef KOKKOSP_INTERFACE_HPP #define KOKKOSP_INTERFACE_HPP @@ -54,7 +54,37 @@ #include #include +// NOTE: in this Kokkos::Profiling block, do not define anything that shouldn't +// exist should Profiling be disabled + +namespace Kokkos { +namespace Profiling { +namespace Experimental { +enum struct DeviceType { + Serial, + OpenMP, + Cuda, + HIP, + OpenMPTarget, + HPX, + Threads +}; +template +struct DeviceTypeTraits; + +constexpr const size_t device_type_bits = 8; +constexpr const size_t instance_bits = 24; +template +inline uint32_t device_id(ExecutionSpace const& space) noexcept { + auto device_id = static_cast(DeviceTypeTraits::id); + return (device_id << instance_bits) + space.impl_instance_id(); +} +} // namespace Experimental +} // namespace Profiling +} // end namespace Kokkos + #if defined(KOKKOS_ENABLE_PROFILING) +// We check at configure time that libdl is available. #include #include diff --git a/core/src/impl/Kokkos_Serial.cpp b/core/src/impl/Kokkos_Serial.cpp index 40aeaa1b7a9..b39f9dfeea9 100644 --- a/core/src/impl/Kokkos_Serial.cpp +++ b/core/src/impl/Kokkos_Serial.cpp @@ -192,7 +192,7 @@ void Serial::impl_finalize() space.deallocate(Impl::g_serial_thread_team_data.scratch_buffer(), Impl::g_serial_thread_team_data.scratch_bytes()); - Impl::g_serial_thread_team_data.scratch_assign((void*)0, 0, 0, 0, 0, 0); + Impl::g_serial_thread_team_data.scratch_assign(nullptr, 0, 0, 0, 0, 0); } #if defined(KOKKOS_ENABLE_PROFILING) diff --git a/core/src/impl/Kokkos_Serial_Task.hpp b/core/src/impl/Kokkos_Serial_Task.hpp index 6871e4f14a4..3ac3899acaf 100644 --- a/core/src/impl/Kokkos_Serial_Task.hpp +++ b/core/src/impl/Kokkos_Serial_Task.hpp @@ -92,7 +92,7 @@ class TaskQueueSpecialization > { auto current_task = OptionalRef(nullptr); - while (not queue.is_done()) { + while (!queue.is_done()) { // Each team lead attempts to acquire either a thread team task // or a single thread task for the team. diff --git a/core/src/impl/Kokkos_SharedAlloc.cpp b/core/src/impl/Kokkos_SharedAlloc.cpp index 3688f069e56..6a054f73a19 100644 --- a/core/src/impl/Kokkos_SharedAlloc.cpp +++ b/core/src/impl/Kokkos_SharedAlloc.cpp @@ -47,7 +47,8 @@ namespace Kokkos { namespace Impl { -__thread int SharedAllocationRecord::t_tracking_enabled = 1; +KOKKOS_THREAD_LOCAL int SharedAllocationRecord::t_tracking_enabled = + 1; #ifdef KOKKOS_DEBUG bool SharedAllocationRecord::is_sane( @@ -100,7 +101,7 @@ bool SharedAllocationRecord::is_sane( reinterpret_cast(rec->m_next), reinterpret_cast(rec->m_prev), reinterpret_cast( - rec->m_next != NULL ? rec->m_next->m_prev : NULL), + rec->m_next != nullptr ? rec->m_next->m_prev : nullptr), reinterpret_cast(rec->m_prev != rec->m_root ? rec->m_prev->m_next : root_next)); @@ -186,7 +187,7 @@ SharedAllocationRecord::SharedAllocationRecord( #endif , m_count(0) { - if (0 != arg_alloc_ptr) { + if (nullptr != arg_alloc_ptr) { #ifdef KOKKOS_DEBUG // Insert into the root double-linked list for tracking // @@ -197,7 +198,7 @@ SharedAllocationRecord::SharedAllocationRecord( m_prev = m_root; static constexpr SharedAllocationRecord* zero = nullptr; - // Read root->m_next and lock by setting to NULL + // Read root->m_next and lock by setting to nullptr while ((m_next = Kokkos::atomic_exchange(&m_root->m_next, zero)) == nullptr) ; @@ -214,7 +215,7 @@ SharedAllocationRecord::SharedAllocationRecord( } else { Kokkos::Impl::throw_runtime_exception( - "Kokkos::Impl::SharedAllocationRecord given NULL allocation"); + "Kokkos::Impl::SharedAllocationRecord given nullptr allocation"); } } @@ -288,7 +289,7 @@ SharedAllocationRecord* SharedAllocationRecord< function_type d = arg_record->m_dealloc; (*d)(arg_record); - arg_record = 0; + arg_record = nullptr; } else if (old_count < 1) { // Error fprintf(stderr, "Kokkos::Impl::SharedAllocationRecord '%s' failed decrement count " diff --git a/core/src/impl/Kokkos_SharedAlloc.hpp b/core/src/impl/Kokkos_SharedAlloc.hpp index 75eefc0df05..6e954e8f271 100644 --- a/core/src/impl/Kokkos_SharedAlloc.hpp +++ b/core/src/impl/Kokkos_SharedAlloc.hpp @@ -115,7 +115,7 @@ class SharedAllocationRecord { SharedAllocationHeader* arg_alloc_ptr, size_t arg_alloc_size, function_type arg_dealloc); private: - static __thread int t_tracking_enabled; + static KOKKOS_THREAD_LOCAL int t_tracking_enabled; public: virtual std::string get_label() const { return std::string("Unmanaged"); } @@ -132,7 +132,7 @@ class SharedAllocationRecord { */ static void tracking_enable() { t_tracking_enabled = 1; } - virtual ~SharedAllocationRecord() {} + virtual ~SharedAllocationRecord() = default; SharedAllocationRecord() : m_alloc_ptr(nullptr), @@ -245,6 +245,9 @@ class SharedAllocationRecord #if defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST) return new SharedAllocationRecord(arg_space, arg_label, arg_alloc); #else + (void)arg_space; + (void)arg_label; + (void)arg_alloc; return (SharedAllocationRecord*)0; #endif } @@ -267,7 +270,7 @@ union SharedAllocationTracker { public: // Use macros instead of inline functions to reduce // pressure on compiler optimization by reducing - // number of symbols and inline functons. + // number of symbols and inline functions. #if defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST) diff --git a/core/src/impl/Kokkos_SimpleTaskScheduler.hpp b/core/src/impl/Kokkos_SimpleTaskScheduler.hpp index f01bdce17b5..a01b22e4e9b 100644 --- a/core/src/impl/Kokkos_SimpleTaskScheduler.hpp +++ b/core/src/impl/Kokkos_SimpleTaskScheduler.hpp @@ -226,13 +226,13 @@ class SimpleTaskScheduler } template - KOKKOS_FUNCTION - future_type_for_functor::type> - _spawn_impl( - DepTaskType arg_predecessor_task, TaskPriority arg_priority, - typename runnable_task_base_type::function_type apply_function_ptr, - typename runnable_task_base_type::destroy_type destroy_function_ptr, - FunctorType&& functor) { + KOKKOS_FUNCTION future_type_for_functor< + typename std::decay::type> + _spawn_impl( + DepTaskType arg_predecessor_task, TaskPriority arg_priority, + typename runnable_task_base_type::function_type apply_function_ptr, + typename runnable_task_base_type::destroy_type /*destroy_function_ptr*/, + FunctorType&& functor) { KOKKOS_EXPECTS(m_queue != nullptr); using functor_future_type = @@ -445,7 +445,7 @@ class SimpleTaskScheduler KOKKOS_EXPECTS(!task.get_respawn_flag()); task.set_priority(priority); - KOKKOS_ASSERT(not task.has_predecessor()); + KOKKOS_ASSERT(!task.has_predecessor()); task.set_respawn_flag(true); } diff --git a/core/src/impl/Kokkos_SingleTaskQueue.hpp b/core/src/impl/Kokkos_SingleTaskQueue.hpp index 8c149e978f7..a0eccffb627 100644 --- a/core/src/impl/Kokkos_SingleTaskQueue.hpp +++ b/core/src/impl/Kokkos_SingleTaskQueue.hpp @@ -154,7 +154,7 @@ class SingleTaskQueue KOKKOS_FUNCTION OptionalRef pop_ready_task( - team_scheduler_info_type const& info) { + team_scheduler_info_type const& /*info*/) { OptionalRef return_value; // always loop in order of priority first, then prefer team tasks over // single tasks diff --git a/core/src/impl/Kokkos_Stacktrace.cpp b/core/src/impl/Kokkos_Stacktrace.cpp index 582f6979363..c0c1fdf6be7 100644 --- a/core/src/impl/Kokkos_Stacktrace.cpp +++ b/core/src/impl/Kokkos_Stacktrace.cpp @@ -18,7 +18,7 @@ namespace Kokkos { namespace Impl { #ifndef KOKKOS_IMPL_ENABLE_STACKTRACE int backtrace(void**, int) { return 0; } -char** backtrace_symbols(void* const*, int) { return NULL; } +char** backtrace_symbols(void* const*, int) { return nullptr; } #endif std::string demangle(const std::string& name) { diff --git a/core/src/impl/Kokkos_TaskBase.hpp b/core/src/impl/Kokkos_TaskBase.hpp index e3de5d0eb9a..8078c68dbd1 100644 --- a/core/src/impl/Kokkos_TaskBase.hpp +++ b/core/src/impl/Kokkos_TaskBase.hpp @@ -167,11 +167,7 @@ class TaskBase { TaskBase& operator=(TaskBase&&) = delete; TaskBase& operator=(const TaskBase&) = delete; -#ifdef KOKKOS_CUDA_9_DEFAULTED_BUG_WORKAROUND - KOKKOS_INLINE_FUNCTION ~TaskBase(){}; -#else - KOKKOS_INLINE_FUNCTION ~TaskBase() = default; -#endif + KOKKOS_DEFAULTED_FUNCTION ~TaskBase() = default; KOKKOS_INLINE_FUNCTION constexpr TaskBase() : m_apply(nullptr), @@ -211,7 +207,7 @@ class TaskBase { Kokkos::abort("TaskScheduler ERROR: resetting task dependence"); } - if (0 != dep) { + if (nullptr != dep) { // The future may be destroyed upon returning from this call // so increment reference count to track this assignment. Kokkos::atomic_increment(&(dep->m_ref_count)); @@ -226,7 +222,44 @@ class TaskBase { } }; -static_assert(sizeof(TaskBase) == 48, "Verifying expected sizeof(TaskBase)"); +//------------------------------------------------------------------------------ +// {{{2 + +// Workaround: some compilers implement int16_t as 4 bytes, so the size might +// not actually be 48 bytes. +// There's not a lot of reason to keep checking this here; the program will +// work fine if this isn't true. I think this check was originally here to +// emphasize the fact that adding to the size of TaskBase could have a +// significant performance penalty, since doing so could substantially decrease +// the number of full task types that fit into a cache line. We'll leave it +// here for now, though, since we're probably going to be ripping all of the +// old TaskBase stuff out eventually anyway. +constexpr size_t unpadded_task_base_size = 44 + 2 * sizeof(int16_t); +// don't forget padding: +constexpr size_t task_base_misalignment = + unpadded_task_base_size % alignof(void*); +constexpr size_t task_base_padding_size = + (alignof(void*) - task_base_misalignment) % alignof(void*); +constexpr size_t expected_task_base_size = + unpadded_task_base_size + task_base_padding_size; + +// Produce a more readable compiler error message than the plain static assert +template +struct verify_task_base_size_is_48_note_actual_size_is_ {}; +template <> +struct verify_task_base_size_is_48_note_actual_size_is_< + expected_task_base_size> { + using type = int; +}; +static constexpr + typename verify_task_base_size_is_48_note_actual_size_is_::type verify = {}; + +static_assert(sizeof(TaskBase) == expected_task_base_size, + "Verifying expected sizeof(TaskBase)"); + +// end Verify the size of TaskBase is as expected }}}2 +//------------------------------------------------------------------------------ } /* namespace Impl */ } /* namespace Kokkos */ diff --git a/core/src/impl/Kokkos_TaskNode.hpp b/core/src/impl/Kokkos_TaskNode.hpp index c2085d236ed..42afa93cdcc 100644 --- a/core/src/impl/Kokkos_TaskNode.hpp +++ b/core/src/impl/Kokkos_TaskNode.hpp @@ -308,7 +308,7 @@ class TaskNode template KOKKOS_INLINE_FUNCTION void consume_wait_queue(Function&& f) { - KOKKOS_EXPECTS(not m_wait_queue.is_consumed()); + KOKKOS_EXPECTS(!m_wait_queue.is_consumed()); m_wait_queue.consume(std::forward(f)); } @@ -499,7 +499,7 @@ class RunnableTaskBase void acquire_predecessor_from(runnable_task_type& other) { KOKKOS_EXPECTS(m_predecessor == nullptr || other.m_predecessor == m_predecessor); - // since we're transfering, no need to modify the reference count + // since we're transferring, no need to modify the reference count m_predecessor = other.m_predecessor; other.m_predecessor = nullptr; } @@ -508,7 +508,7 @@ class RunnableTaskBase void acquire_predecessor_from(runnable_task_type& other) volatile { KOKKOS_EXPECTS(m_predecessor == nullptr || other.m_predecessor == m_predecessor); - // since we're transfering, no need to modify the reference count + // since we're transferring, no need to modify the reference count m_predecessor = other.m_predecessor; other.m_predecessor = nullptr; } @@ -620,7 +620,7 @@ class alignas(16) RunnableTask ~RunnableTask() = delete; KOKKOS_INLINE_FUNCTION - void update_scheduling_info(member_type& member) { + void update_scheduling_info(member_type& /*member*/) { // TODO @tasking @generalization DSH call a queue-specific hook here; for // now, this info is already updated elsewhere this->scheduling_info() = // member.scheduler().scheduling_info(); @@ -639,7 +639,7 @@ class alignas(16) RunnableTask this->functor_type::operator()(*member, *val); } - KOKKOS_FUNCTION static void destroy(task_base_type* root) { + KOKKOS_FUNCTION static void destroy(task_base_type* /*root*/) { // TaskResult::destroy(root); } diff --git a/core/src/impl/Kokkos_TaskPolicyData.hpp b/core/src/impl/Kokkos_TaskPolicyData.hpp index c5b8c2fc686..09113628a76 100644 --- a/core/src/impl/Kokkos_TaskPolicyData.hpp +++ b/core/src/impl/Kokkos_TaskPolicyData.hpp @@ -75,20 +75,20 @@ struct TaskPolicyWithPredecessor { TaskPolicyWithPredecessor() = delete; - KOKKOS_INLINE_FUNCTION + KOKKOS_DEFAULTED_FUNCTION TaskPolicyWithPredecessor(TaskPolicyWithPredecessor const&) = default; - KOKKOS_INLINE_FUNCTION + KOKKOS_DEFAULTED_FUNCTION TaskPolicyWithPredecessor(TaskPolicyWithPredecessor&&) = default; - KOKKOS_INLINE_FUNCTION + KOKKOS_DEFAULTED_FUNCTION TaskPolicyWithPredecessor& operator=(TaskPolicyWithPredecessor const&) = default; - KOKKOS_INLINE_FUNCTION + KOKKOS_DEFAULTED_FUNCTION TaskPolicyWithPredecessor& operator=(TaskPolicyWithPredecessor&&) = default; - KOKKOS_INLINE_FUNCTION + KOKKOS_DEFAULTED_FUNCTION ~TaskPolicyWithPredecessor() = default; KOKKOS_INLINE_FUNCTION @@ -129,19 +129,19 @@ struct TaskPolicyWithScheduler { TaskPolicyWithScheduler() = delete; - KOKKOS_INLINE_FUNCTION + KOKKOS_DEFAULTED_FUNCTION TaskPolicyWithScheduler(TaskPolicyWithScheduler const&) = default; - KOKKOS_INLINE_FUNCTION + KOKKOS_DEFAULTED_FUNCTION TaskPolicyWithScheduler(TaskPolicyWithScheduler&&) = default; - KOKKOS_INLINE_FUNCTION + KOKKOS_DEFAULTED_FUNCTION TaskPolicyWithScheduler& operator=(TaskPolicyWithScheduler const&) = default; - KOKKOS_INLINE_FUNCTION + KOKKOS_DEFAULTED_FUNCTION TaskPolicyWithScheduler& operator=(TaskPolicyWithScheduler&&) = default; - KOKKOS_INLINE_FUNCTION + KOKKOS_DEFAULTED_FUNCTION ~TaskPolicyWithScheduler() = default; KOKKOS_INLINE_FUNCTION @@ -155,7 +155,7 @@ struct TaskPolicyWithScheduler { KOKKOS_INLINE_FUNCTION static constexpr bool has_predecessor() noexcept { - return not std::is_same::value; + return !std::is_same::value; } KOKKOS_INLINE_FUNCTION diff --git a/core/src/impl/Kokkos_TaskQueue.hpp b/core/src/impl/Kokkos_TaskQueue.hpp index f7787dae0cf..c0d2eca9c10 100644 --- a/core/src/impl/Kokkos_TaskQueue.hpp +++ b/core/src/impl/Kokkos_TaskQueue.hpp @@ -168,13 +168,13 @@ class TaskQueue : public TaskQueueBase { int allocation_count() const noexcept { return m_count_alloc; } KOKKOS_INLINE_FUNCTION - void initialize_team_queues(int pool_size) const noexcept {} + void initialize_team_queues(int /*pool_size*/) const noexcept {} KOKKOS_INLINE_FUNCTION task_root_type* attempt_to_steal_task() const noexcept { return nullptr; } KOKKOS_INLINE_FUNCTION - team_queue_type& get_team_queue(int team_rank) { return *this; } + team_queue_type& get_team_queue(int /*team_rank*/) { return *this; } // void execute() { specialization::execute( this ); } diff --git a/core/src/impl/Kokkos_TaskQueueCommon.hpp b/core/src/impl/Kokkos_TaskQueueCommon.hpp index b3444420d61..cd53a81b1b8 100644 --- a/core/src/impl/Kokkos_TaskQueueCommon.hpp +++ b/core/src/impl/Kokkos_TaskQueueCommon.hpp @@ -259,9 +259,9 @@ class TaskQueueCommonMixin { // we've lost exclusive access and should nt touch task again // If the predecessor is not done, then task is not ready - task_is_ready = not predecessor_not_ready; + task_is_ready = !predecessor_not_ready; - if (task_is_ready and predecessor.is_runnable()) { + if (task_is_ready && predecessor.is_runnable()) { // this is our last chance to update the scheduling info before // predecessor is potentially deleted _self().update_scheduling_info_from_completed_predecessor( @@ -299,7 +299,7 @@ class TaskQueueCommonMixin { // and enqueue the task // (can't move because the task isn't expired unless the push succeeds bool push_success = ready_queue.push(task); - if (not push_success) { + if (!push_success) { _self().handle_failed_ready_queue_insertion(std::move(task), ready_queue, info); } @@ -312,8 +312,8 @@ class TaskQueueCommonMixin { template KOKKOS_INLINE_FUNCTION void handle_failed_ready_queue_insertion( - RunnableTaskBase&& task, ReadyQueueType& ready_queue, - TeamSchedulerInfo const& info) { + RunnableTaskBase&& /*task*/, + ReadyQueueType& /*ready_queue*/, TeamSchedulerInfo const& /*info*/) { Kokkos::abort("Unhandled failure of ready task queue insertion!\n"); } @@ -325,7 +325,7 @@ class TaskQueueCommonMixin { AggregateTask&& aggregate, TeamSchedulerInfo const& info) { // Because the aggregate is being scheduled, should not be in any queue - KOKKOS_EXPECTS(not aggregate.is_enqueued()); + KOKKOS_EXPECTS(!aggregate.is_enqueued()); using task_scheduling_info_type = typename Derived::task_scheduling_info_type; @@ -369,7 +369,7 @@ class TaskQueueCommonMixin { // ready yet incomplete_dependence_found = pred_not_ready; - if (not pred_not_ready) { + if (!pred_not_ready) { // A predecessor was done, and we didn't enqueue the aggregate // Update the aggregate's scheduling info (we still have exclusive // access to it here) @@ -403,7 +403,7 @@ class TaskQueueCommonMixin { // dependence was found, because some other thread could have already popped // it off of another waiting queue - if (not incomplete_dependence_found) { + if (!incomplete_dependence_found) { // all of the predecessors were completed, so we can complete `task` _self().complete(std::move(aggregate), info); } @@ -462,15 +462,16 @@ class TaskQueueCommonMixin { template KOKKOS_INLINE_FUNCTION void initialize_scheduling_info_from_predecessor( - TaskNode& task, - TaskNode& predecessor) const { + TaskNode& /*task*/, + TaskNode& /*predecessor*/) const { /* do nothing by default */ } template KOKKOS_INLINE_FUNCTION void initialize_scheduling_info_from_team_scheduler_info( - TaskNode& task, TeamSchedulerInfo const& info) const { + TaskNode& /*task*/, + TeamSchedulerInfo const& /*info*/) const { /* do nothing by default */ } diff --git a/core/src/impl/Kokkos_TaskQueue_impl.hpp b/core/src/impl/Kokkos_TaskQueue_impl.hpp index 6acd40fc4bd..fe3cac7bda6 100644 --- a/core/src/impl/Kokkos_TaskQueue_impl.hpp +++ b/core/src/impl/Kokkos_TaskQueue_impl.hpp @@ -181,7 +181,7 @@ KOKKOS_FUNCTION bool TaskQueue::push_task( task->m_priority, task->m_ref_count); #endif - task_root_type *const zero = (task_root_type *)0; + task_root_type *const zero = nullptr; task_root_type *const lock = (task_root_type *)task_root_type::LockTag; task_root_type *volatile &next = task->m_next; @@ -254,7 +254,7 @@ TaskQueue::pop_ready_task( // // If queue is locked then just read by guaranteeing the CAS will fail. - if (lock == task) task = 0; + if (lock == task) task = nullptr; task_root_type *const x = task; @@ -334,7 +334,7 @@ KOKKOS_FUNCTION void TaskQueue::schedule_runnable( task->m_priority, task->m_ref_count); #endif - task_root_type *const zero = (task_root_type *)0; + task_root_type *const zero = nullptr; task_root_type *const lock = (task_root_type *)task_root_type::LockTag; task_root_type *const end = (task_root_type *)task_root_type::EndTag; @@ -382,16 +382,16 @@ KOKKOS_FUNCTION void TaskQueue::schedule_runnable( // If we don't have a dependency, or if pushing onto the wait queue of that // dependency failed (since the only time that queue should be locked is when // the task is transitioning to complete??!?) - const bool is_ready = (0 == dep) || (!push_task(&dep->m_wait, task)); + const bool is_ready = (nullptr == dep) || (!push_task(&dep->m_wait, task)); - if ((0 != dep) && respawn) { + if ((nullptr != dep) && respawn) { // Reference count for dep was incremented when // respawn assigned dependency to task->m_next // so that if dep completed prior to the // above push_task dep would not be destroyed. // dep reference count can now be decremented, // which may deallocate the task. - TaskQueue::assign(&dep, (task_root_type *)0); + TaskQueue::assign(&dep, nullptr); } if (is_ready) { @@ -452,7 +452,7 @@ KOKKOS_FUNCTION void TaskQueue::schedule_aggregate( task->m_ref_count); #endif - task_root_type *const zero = (task_root_type *)0; + task_root_type *const zero = nullptr; task_root_type *const lock = (task_root_type *)task_root_type::LockTag; task_root_type *const end = (task_root_type *)task_root_type::EndTag; @@ -551,7 +551,7 @@ KOKKOS_FUNCTION void TaskQueue::reschedule( // task is in Executing-Respawn state // task->m_next == 0 (no dependence) - task_root_type *const zero = (task_root_type *)0; + task_root_type *const zero = nullptr; task_root_type *const lock = (task_root_type *)task_root_type::LockTag; if (lock != Kokkos::atomic_exchange(&task->m_next, zero)) { @@ -567,7 +567,7 @@ KOKKOS_FUNCTION void TaskQueue::complete( // Complete a runnable task that has finished executing // or a when_all task when all of its dependeneces are complete. - task_root_type *const zero = (task_root_type *)0; + task_root_type *const zero = nullptr; task_root_type *const lock = (task_root_type *)task_root_type::LockTag; task_root_type *const end = (task_root_type *)task_root_type::EndTag; @@ -597,7 +597,7 @@ KOKKOS_FUNCTION void TaskQueue::complete( // If 'task' is an aggregate then any of the runnable tasks that // it depends upon may be attempting to complete this 'task'. // Must only transition a task once to complete status. - // This is controled by atomically locking the wait queue. + // This is controlled by atomically locking the wait queue. // Stop other tasks from adding themselves to this task's wait queue // by locking the head of this task's wait queue. @@ -624,7 +624,7 @@ KOKKOS_FUNCTION void TaskQueue::complete( task_root_type volatile &vx = *x; task_root_type *const next = vx.m_next; - vx.m_next = 0; + vx.m_next = nullptr; Kokkos::memory_fence(); diff --git a/core/src/impl/Kokkos_TaskResult.hpp b/core/src/impl/Kokkos_TaskResult.hpp index f846ff4ea62..40a9c3bf57c 100644 --- a/core/src/impl/Kokkos_TaskResult.hpp +++ b/core/src/impl/Kokkos_TaskResult.hpp @@ -113,20 +113,20 @@ struct TaskResult { using reference_type = void; template - KOKKOS_INLINE_FUNCTION static void* ptr(TaskNode* task) { + KOKKOS_INLINE_FUNCTION static void* ptr(TaskNode* /*task*/) { return nullptr; } - KOKKOS_INLINE_FUNCTION static void* ptr(TaskBase*) { return (void*)nullptr; } + KOKKOS_INLINE_FUNCTION static void* ptr(TaskBase*) { return nullptr; } template KOKKOS_INLINE_FUNCTION static reference_type get( - TaskNode* task) { /* Should never be called */ + TaskNode* /*task*/) { /* Should never be called */ } KOKKOS_INLINE_FUNCTION static reference_type get(TaskBase*) {} - KOKKOS_INLINE_FUNCTION static void destroy(TaskBase* task) {} + KOKKOS_INLINE_FUNCTION static void destroy(TaskBase* /*task*/) {} // template // KOKKOS_INLINE_FUNCTION static diff --git a/core/src/impl/Kokkos_TaskTeamMember.hpp b/core/src/impl/Kokkos_TaskTeamMember.hpp index 5a9d4c77bf6..2faab579490 100644 --- a/core/src/impl/Kokkos_TaskTeamMember.hpp +++ b/core/src/impl/Kokkos_TaskTeamMember.hpp @@ -90,22 +90,22 @@ class TaskTeamMemberAdapter : public TeamMember { // (rule of 6 constructors) - KOKKOS_INLINE_FUNCTION + KOKKOS_DEFAULTED_FUNCTION TaskTeamMemberAdapter() = default; - KOKKOS_INLINE_FUNCTION + KOKKOS_DEFAULTED_FUNCTION TaskTeamMemberAdapter(TaskTeamMemberAdapter const&) = default; - KOKKOS_INLINE_FUNCTION + KOKKOS_DEFAULTED_FUNCTION TaskTeamMemberAdapter(TaskTeamMemberAdapter&&) = default; - KOKKOS_INLINE_FUNCTION + KOKKOS_DEFAULTED_FUNCTION TaskTeamMemberAdapter& operator=(TaskTeamMemberAdapter const&) = default; - KOKKOS_INLINE_FUNCTION + KOKKOS_DEFAULTED_FUNCTION TaskTeamMemberAdapter& operator=(TaskTeamMemberAdapter&&) = default; - KOKKOS_INLINE_FUNCTION ~TaskTeamMemberAdapter() = default; + KOKKOS_DEFAULTED_FUNCTION ~TaskTeamMemberAdapter() = default; //---------------------------------------- diff --git a/core/src/impl/Kokkos_Traits.hpp b/core/src/impl/Kokkos_Traits.hpp index 3457725ec6c..32e78b7f5f6 100644 --- a/core/src/impl/Kokkos_Traits.hpp +++ b/core/src/impl/Kokkos_Traits.hpp @@ -129,7 +129,7 @@ struct are_integral { enum { value = // Accept std::is_integral OR std::is_enum as an integral value - // since a simple enum value is automically convertable to an + // since a simple enum value is automically convertible to an // integral value. (std::is_integral::value || std::is_enum::value) && are_integral::value @@ -137,132 +137,6 @@ struct are_integral { }; //---------------------------------------------------------------------------- -/* C++11 conformal compile-time type traits utilities. - * Prefer to use C++11 when portably available. - */ -//---------------------------------------------------------------------------- -// C++11 Helpers: - -template -struct integral_constant { - // Declaration of 'static const' causes an unresolved linker symbol in debug - // static const T value = v ; - enum { value = T(v) }; - typedef T value_type; - typedef integral_constant type; - KOKKOS_INLINE_FUNCTION operator T() { return v; } -}; - -typedef integral_constant false_type; -typedef integral_constant true_type; - -//---------------------------------------------------------------------------- -// C++11 Type relationships: - -template -struct is_same : public false_type {}; -template -struct is_same : public true_type {}; - -//---------------------------------------------------------------------------- -// C++11 Type properties: - -template -struct is_const : public false_type {}; -template -struct is_const : public true_type {}; -template -struct is_const : public true_type {}; - -template -struct is_array : public false_type {}; -template -struct is_array : public true_type {}; -template -struct is_array : public true_type {}; - -//---------------------------------------------------------------------------- -// C++11 Type transformations: - -template -struct remove_const { - typedef T type; -}; -template -struct remove_const { - typedef T type; -}; -template -struct remove_const { - typedef T& type; -}; - -template -struct add_const { - typedef const T type; -}; -template -struct add_const { - typedef const T& type; -}; -template -struct add_const { - typedef const T type; -}; -template -struct add_const { - typedef const T& type; -}; - -template -struct remove_reference { - typedef T type; -}; -template -struct remove_reference { - typedef T type; -}; -template -struct remove_reference { - typedef const T type; -}; - -template -struct remove_extent { - typedef T type; -}; -template -struct remove_extent { - typedef T type; -}; -template -struct remove_extent { - typedef T type; -}; - -//---------------------------------------------------------------------------- -// C++11 Other type generators: - -template -struct condition { - typedef F type; -}; - -template -struct condition { - typedef T type; -}; - -template -struct enable_if; - -template -struct enable_if { - typedef T type; -}; - -//---------------------------------------------------------------------------- - } // namespace Impl } // namespace Kokkos @@ -280,19 +154,6 @@ struct enable_if_type { typedef T type; }; -//---------------------------------------------------------------------------- - -template -struct bool_ : public integral_constant {}; - -template -struct unsigned_ : public integral_constant {}; - -template -struct int_ : public integral_constant {}; - -typedef bool_ true_; -typedef bool_ false_; //---------------------------------------------------------------------------- // if_ @@ -302,10 +163,10 @@ struct if_c { typedef FalseType type; - typedef typename remove_const::type>::type - value_type; + typedef typename std::remove_const< + typename std::remove_reference::type>::type value_type; - typedef typename add_const::type const_value_type; + typedef typename std::add_const::type const_value_type; static KOKKOS_INLINE_FUNCTION const_value_type& select(const_value_type& v) { return v; @@ -337,10 +198,10 @@ struct if_c { typedef TrueType type; - typedef typename remove_const::type>::type - value_type; + typedef typename std::remove_const< + typename std::remove_reference::type>::type value_type; - typedef typename add_const::type const_value_type; + typedef typename std::add_const::type const_value_type; static KOKKOS_INLINE_FUNCTION const_value_type& select(const_value_type& v) { return v; @@ -387,50 +248,26 @@ struct if_ : public if_c {}; //---------------------------------------------------------------------------- -// Allows aliased types: -template -struct is_integral - : public integral_constant< - bool, - (std::is_same::value || - std::is_same::value || - std::is_same::value || - std::is_same::value || - std::is_same::value || - std::is_same::value || - std::is_same::value || - std::is_same::value || - std::is_same::value || - std::is_same::value || - - std::is_same::value || std::is_same::value || - std::is_same::value || std::is_same::value || - std::is_same::value || - std::is_same::value || - std::is_same::value || - std::is_same::value)> {}; -//---------------------------------------------------------------------------- - template -struct is_label : public false_type {}; +struct is_label : public std::false_type {}; template <> -struct is_label : public true_type {}; +struct is_label : public std::true_type {}; template <> -struct is_label : public true_type {}; +struct is_label : public std::true_type {}; template -struct is_label : public true_type {}; +struct is_label : public std::true_type {}; template -struct is_label : public true_type {}; +struct is_label : public std::true_type {}; template <> -struct is_label : public true_type {}; +struct is_label : public std::true_type {}; template <> -struct is_label : public true_type {}; +struct is_label : public std::true_type {}; // These 'constexpr'functions can be used as // both regular functions and meta-function. @@ -515,20 +352,6 @@ struct integral_nonzero_constant { //---------------------------------------------------------------------------- -template -struct is_integral_constant : public false_ { - typedef void integral_type; - enum { integral_value = 0 }; -}; - -template -struct is_integral_constant> : public true_ { - typedef T integral_type; - enum { integral_value = v }; -}; - -//---------------------------------------------------------------------------- - template class TypeList; diff --git a/core/src/impl/Kokkos_VLAEmulation.hpp b/core/src/impl/Kokkos_VLAEmulation.hpp index 27469328786..9b90864199c 100644 --- a/core/src/impl/Kokkos_VLAEmulation.hpp +++ b/core/src/impl/Kokkos_VLAEmulation.hpp @@ -185,7 +185,7 @@ struct ObjectWithVLAEmulation { // Note: We can't do this at class scope because it unnecessarily requires // vla_value_type to be a complete type - static_assert(not std::is_abstract::value, + static_assert(!std::is_abstract::value, "Can't use abstract type with VLA emulation"); KOKKOS_EXPECTS(num_entries >= 0); diff --git a/core/src/impl/Kokkos_ViewArray.hpp b/core/src/impl/Kokkos_ViewArray.hpp index 6123a608a7c..119ad4eccf8 100644 --- a/core/src/impl/Kokkos_ViewArray.hpp +++ b/core/src/impl/Kokkos_ViewArray.hpp @@ -338,7 +338,7 @@ class ViewMapping > { //---------------------------------------- - KOKKOS_INLINE_FUNCTION ~ViewMapping() {} + KOKKOS_DEFAULTED_FUNCTION ~ViewMapping() = default; KOKKOS_INLINE_FUNCTION ViewMapping() : m_impl_handle(), m_impl_offset(), m_stride(0) {} KOKKOS_INLINE_FUNCTION ViewMapping(const ViewMapping &rhs) @@ -349,7 +349,6 @@ class ViewMapping > { m_impl_handle = rhs.m_impl_handle; m_impl_offset = rhs.m_impl_offset; m_stride = rhs.m_stride; - ; return *this; } @@ -469,7 +468,7 @@ class ViewMapping< KOKKOS_INLINE_FUNCTION static void assign(DstType &dst, const SrcType &src, - const TrackType &src_track) { + const TrackType & /*src_track*/) { static_assert(is_assignable, "Can only convert to array_type"); typedef typename DstType::offset_type dst_offset_type; diff --git a/core/src/impl/Kokkos_ViewCtor.hpp b/core/src/impl/Kokkos_ViewCtor.hpp index 33c6e73f056..93a267ffa32 100644 --- a/core/src/impl/Kokkos_ViewCtor.hpp +++ b/core/src/impl/Kokkos_ViewCtor.hpp @@ -238,7 +238,10 @@ struct ViewCtorProp : public ViewCtorProp... { /* Copy from a matching property subset */ template ViewCtorProp(ViewCtorProp const &arg) - : ViewCtorProp(((ViewCtorProp const &)arg))... {} + : ViewCtorProp( + static_cast const &>(arg))... { + (void)arg; + } }; } /* namespace Impl */ diff --git a/core/src/impl/Kokkos_ViewLayoutTiled.hpp b/core/src/impl/Kokkos_ViewLayoutTiled.hpp index 2071f931e39..27f4375e566 100644 --- a/core/src/impl/Kokkos_ViewLayoutTiled.hpp +++ b/core/src/impl/Kokkos_ViewLayoutTiled.hpp @@ -601,39 +601,10 @@ struct ViewOffset< //---------------------------------------- -#ifdef KOKKOS_CUDA_9_DEFAULTED_BUG_WORKAROUND - KOKKOS_INLINE_FUNCTION ~ViewOffset() {} - KOKKOS_INLINE_FUNCTION ViewOffset() {} - KOKKOS_INLINE_FUNCTION ViewOffset(const ViewOffset& rhs) - : m_dim(rhs.m_dim), - m_tile_N0(rhs.m_tile_N0), - m_tile_N1(rhs.m_tile_N1), - m_tile_N2(rhs.m_tile_N2), - m_tile_N3(rhs.m_tile_N3), - m_tile_N4(rhs.m_tile_N4), - m_tile_N5(rhs.m_tile_N5), - m_tile_N6(rhs.m_tile_N6), - m_tile_N7(rhs.m_tile_N7) {} - - KOKKOS_INLINE_FUNCTION ViewOffset& operator=(const ViewOffset& rhs) { - m_dim = rhs.m_dim; - m_tile_N0 = rhs.m_tile_N0; - m_tile_N1 = rhs.m_tile_N1; - m_tile_N2 = rhs.m_tile_N2; - m_tile_N3 = rhs.m_tile_N3; - m_tile_N4 = rhs.m_tile_N4; - m_tile_N5 = rhs.m_tile_N5; - m_tile_N6 = rhs.m_tile_N6; - m_tile_N7 = rhs.m_tile_N7; - return *this; - } - -#else - KOKKOS_INLINE_FUNCTION ~ViewOffset() = default; - KOKKOS_INLINE_FUNCTION ViewOffset() = default; - KOKKOS_INLINE_FUNCTION ViewOffset(const ViewOffset&) = default; - KOKKOS_INLINE_FUNCTION ViewOffset& operator=(const ViewOffset&) = default; -#endif + KOKKOS_DEFAULTED_FUNCTION ~ViewOffset() = default; + KOKKOS_DEFAULTED_FUNCTION ViewOffset() = default; + KOKKOS_DEFAULTED_FUNCTION ViewOffset(const ViewOffset&) = default; + KOKKOS_DEFAULTED_FUNCTION ViewOffset& operator=(const ViewOffset&) = default; template KOKKOS_INLINE_FUNCTION constexpr ViewOffset( diff --git a/core/src/impl/Kokkos_ViewMapping.hpp b/core/src/impl/Kokkos_ViewMapping.hpp index f6dfacb4d8b..a8dc1fb84a6 100644 --- a/core/src/impl/Kokkos_ViewMapping.hpp +++ b/core/src/impl/Kokkos_ViewMapping.hpp @@ -252,7 +252,7 @@ template struct ViewDimensionJoin; template -struct ViewDimensionJoin, ViewDimension > { +struct ViewDimensionJoin, ViewDimension> { typedef ViewDimension type; }; @@ -263,7 +263,7 @@ struct ViewDimensionAssignable; template struct ViewDimensionAssignable, - ViewDimension > { + ViewDimension> { typedef ViewDimension dst; typedef ViewDimension src; @@ -327,18 +327,18 @@ struct is_integral_extent_type { }; template -struct is_integral_extent_type > { +struct is_integral_extent_type> { enum { value = std::is_integral::value ? 1 : 0 }; }; template -struct is_integral_extent_type > { +struct is_integral_extent_type> { enum { value = std::is_integral::value ? 1 : 0 }; }; // Assuming '2 == initializer_list::size()' template -struct is_integral_extent_type > { +struct is_integral_extent_type> { enum { value = std::is_integral::value ? 1 : 0 }; }; @@ -641,10 +641,10 @@ struct SubviewExtents { error(buf + n, buf_len - n, domain_rank + 1, range_rank + 1, dim, args...); } +#if defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST) template KOKKOS_FORCEINLINE_FUNCTION void error(const ViewDimension& dim, Args... args) const { -#if defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST) enum { LEN = 1024 }; char buffer[LEN]; @@ -652,10 +652,14 @@ struct SubviewExtents { error(buffer + n, LEN - n, 0, 0, dim, args...); Kokkos::Impl::throw_runtime_exception(std::string(buffer)); + } #else + template + KOKKOS_FORCEINLINE_FUNCTION void error(const ViewDimension&, + Args...) const { Kokkos::abort("Kokkos::subview bounds error"); -#endif } +#endif #else @@ -726,18 +730,18 @@ template struct ViewDataType; template -struct ViewDataType > { +struct ViewDataType> { typedef T type; }; template -struct ViewDataType > { - typedef typename ViewDataType >::type type; +struct ViewDataType> { + typedef typename ViewDataType>::type type; }; template -struct ViewDataType > { - typedef typename ViewDataType >::type type[N]; +struct ViewDataType> { + typedef typename ViewDataType>::type type[N]; }; /**\brief Analysis of View data type. @@ -2740,7 +2744,7 @@ struct ViewValueFunctor; template struct ViewValueFunctor { - typedef Kokkos::RangePolicy PolicyType; + typedef Kokkos::RangePolicy> PolicyType; typedef typename ExecSpace::execution_space Exec; Exec space; @@ -2778,6 +2782,12 @@ struct ViewValueFunctor { : "Kokkos::View::initialization"), 0, &kpID); } +#endif +#ifdef KOKKOS_ENABLE_CUDA + if (std::is_same::value) { + Kokkos::Impl::cuda_prefetch_pointer(space, ptr, sizeof(ValueType) * n, + true); + } #endif const Kokkos::Impl::ParallelFor closure( *this, PolicyType(0, n)); @@ -2800,7 +2810,7 @@ struct ViewValueFunctor { template struct ViewValueFunctor { - typedef Kokkos::RangePolicy PolicyType; + typedef Kokkos::RangePolicy> PolicyType; ExecSpace space; ValueType* ptr; @@ -2825,6 +2835,12 @@ struct ViewValueFunctor { Kokkos::Profiling::beginParallelFor("Kokkos::View::initialization", 0, &kpID); } +#endif +#ifdef KOKKOS_ENABLE_CUDA + if (std::is_same::value) { + Kokkos::Impl::cuda_prefetch_pointer(space, ptr, sizeof(ValueType) * n, + true); + } #endif const Kokkos::Impl::ParallelFor closure( *this, PolicyType(0, n)); @@ -3069,7 +3085,7 @@ class ViewMapping< //---------------------------------------- - KOKKOS_INLINE_FUNCTION ~ViewMapping() {} + KOKKOS_DEFAULTED_FUNCTION ~ViewMapping() = default; KOKKOS_INLINE_FUNCTION ViewMapping() : m_impl_handle(), m_impl_offset() {} KOKKOS_INLINE_FUNCTION ViewMapping(const ViewMapping& rhs) : m_impl_handle(rhs.m_impl_handle), m_impl_offset(rhs.m_impl_offset) {} @@ -3493,7 +3509,7 @@ struct SubViewDataTypeImpl; /* base case */ template -struct SubViewDataTypeImpl > { +struct SubViewDataTypeImpl> { using type = ValueType; }; @@ -3503,16 +3519,17 @@ template ::type>::value>::type, - ValueType, Experimental::Extents, Integral, Args...> - : SubViewDataTypeImpl, - Args...> {}; + ValueType, Kokkos::Experimental::Extents, Integral, Args...> + : SubViewDataTypeImpl, Args...> {}; /* for ALL slice, subview has the same dimension */ template -struct SubViewDataTypeImpl, - ALL_t, Args...> +struct SubViewDataTypeImpl, ALL_t, + Args...> : SubViewDataTypeImpl::type, - Experimental::Extents, Args...> {}; + Kokkos::Experimental::Extents, Args...> {}; /* for pair-style slice, subview has dynamic dimension, since pair doesn't give * static sizes */ @@ -3522,10 +3539,10 @@ template struct SubViewDataTypeImpl< typename std::enable_if::value>::type, ValueType, - Experimental::Extents, PairLike, Args...> + Kokkos::Experimental::Extents, PairLike, Args...> : SubViewDataTypeImpl< void, typename make_all_extents_into_pointers::type*, - Experimental::Extents, Args...> {}; + Kokkos::Experimental::Extents, Args...> {}; template struct SubViewDataType : SubViewDataTypeImpl {}; diff --git a/core/src/impl/Kokkos_ViewTile.hpp b/core/src/impl/Kokkos_ViewTile.hpp index f1227c2ab61..342927ef775 100644 --- a/core/src/impl/Kokkos_ViewTile.hpp +++ b/core/src/impl/Kokkos_ViewTile.hpp @@ -146,22 +146,10 @@ struct ViewOffset< //---------------------------------------- -#ifdef KOKKOS_CUDA_9_DEFAULTED_BUG_WORKAROUND - KOKKOS_INLINE_FUNCTION ~ViewOffset() {} - KOKKOS_INLINE_FUNCTION ViewOffset() {} - KOKKOS_INLINE_FUNCTION ViewOffset(const ViewOffset& rhs) - : m_dim(rhs.m_dim), m_tile_N0(rhs.m_tile_N0) {} - KOKKOS_INLINE_FUNCTION ViewOffset& operator=(const ViewOffset& rhs) { - m_dim = rhs.m_dim; - m_tile_N0 = rhs.m_tile_N0; - return *this; - } -#else - KOKKOS_INLINE_FUNCTION ~ViewOffset() = default; - KOKKOS_INLINE_FUNCTION ViewOffset() = default; - KOKKOS_INLINE_FUNCTION ViewOffset(const ViewOffset&) = default; - KOKKOS_INLINE_FUNCTION ViewOffset& operator=(const ViewOffset&) = default; -#endif + KOKKOS_DEFAULTED_FUNCTION ~ViewOffset() = default; + KOKKOS_DEFAULTED_FUNCTION ViewOffset() = default; + KOKKOS_DEFAULTED_FUNCTION ViewOffset(const ViewOffset&) = default; + KOKKOS_DEFAULTED_FUNCTION ViewOffset& operator=(const ViewOffset&) = default; template KOKKOS_INLINE_FUNCTION constexpr ViewOffset( diff --git a/core/src/kokkos.pc.in b/core/src/kokkos.pc.in deleted file mode 100644 index 47786faefb9..00000000000 --- a/core/src/kokkos.pc.in +++ /dev/null @@ -1,71 +0,0 @@ -# -# Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -# -# Under the terms of Contract DE-NA0003525 with NTESS, -# the U.S. Government retains certain rights in this software. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of the Corporation nor the names of the -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# Questions? Contact Christian R. Trott (crtrott@sandia.gov) -# - -# Add the directory where kokkos.pc got installed to your PKG_CONFIG_PATH - -# Use this on commandline with: -# c++ `pkg-config --cflags --libs kokkos` -o myapp myapp.cpp - -# Use this in a Makefile: -# myapp: myapp.cpp -# $(CC) `pkg-config --cflags --libs kokkos` -o $@ $< - -# Use this in autotools: -# configure.ac: -# PKG_CHECK_MODULES([KOKKOS], [kokkos]) -# Makefile.am: -# myapp_CFLAGS = $(KOKKOS_CFLAGS) -# myapp_LDADD = $(KOKKOS_LIBS) - -# Use this in CMake: -# CMakeLists.txt: -# find_package(PkgConfig) -# pkg_check_modules(KOKKOS IMPORTED_TARGET kokkos) -# target_link_libraries( PkgConfig::KOKKOS) - -libdir=@CMAKE_INSTALL_FULL_LIBDIR@ -includedir=@CMAKE_INSTALL_FULL_INCLUDEDIR@ - -Name: kokkos -Description: Kokkos C++ Performance Portability Programming EcoSystem -URL: https://github.com/kokkos -Version: @Kokkos_VERSION@ -Requires: -Libs: -L${libdir} -lkokkos @KOKKOS_EXTRA_LIBS_LIST@ @KOKKOS_LINK_FLAGS@ -Libs.private: -lm -Cflags: -I${includedir} @KOKKOS_CXXFLAGS@ diff --git a/core/unit_test/CMakeLists.txt b/core/unit_test/CMakeLists.txt index c0957e83a88..dec2f5e9207 100644 --- a/core/unit_test/CMakeLists.txt +++ b/core/unit_test/CMakeLists.txt @@ -12,8 +12,38 @@ KOKKOS_ADD_TEST_LIBRARY( HEADERS ${GTEST_SOURCE_DIR}/gtest/gtest.h SOURCES ${GTEST_SOURCE_DIR}/gtest/gtest-all.cc ) -KOKKOS_TARGET_COMPILE_DEFINITIONS(kokkos_gtest PUBLIC "-DGTEST_HAS_PTHREAD=0") -KOKKOS_TARGET_INCLUDE_DIRECTORIES(kokkos_gtest PUBLIC ${GTEST_SOURCE_DIR}) +#These can be direct, no need for Tribits or Kokkos wrappers + +# WORKAROUND FOR HIPCC +IF(Kokkos_ENABLE_HIP) + TARGET_COMPILE_DEFINITIONS(kokkos_gtest PUBLIC "-DGTEST_HAS_PTHREAD=0 --amdgpu-target=gfx906") +ELSE() + TARGET_COMPILE_DEFINITIONS(kokkos_gtest PUBLIC "-DGTEST_HAS_PTHREAD=0") +ENDIF() + +TARGET_INCLUDE_DIRECTORIES(kokkos_gtest PUBLIC ${GTEST_SOURCE_DIR}) +#Gtest minimally requires C++11 +TARGET_COMPILE_FEATURES(kokkos_gtest PUBLIC cxx_std_11) + +# +# Define Incremental Testing Feature Levels +# Define Device name mappings (i.e. what comes after Kokkos:: for the ExecSpace) +# + +SET(KOKKOS_CUDA_FEATURE_LEVEL 999) +SET(KOKKOS_CUDA_NAME Cuda) +SET(KOKKOS_HIP_FEATURE_LEVEL 12) +SET(KOKKOS_HIP_NAME Experimental::HIP) +SET(KOKKOS_HPX_FEATURE_LEVEL 999) +SET(KOKKOS_HPX_NAME Experimental::HPX) +SET(KOKKOS_OPENMP_FEATURE_LEVEL 999) +SET(KOKKOS_OPENMP_NAME OpenMP) +SET(KOKKOS_OPENMPTARGET_FEATURE_LEVEL 10) +SET(KOKKOS_OPENMPTARGET_NAME Experimental::OpenMPTarget) +SET(KOKKOS_SERIAL_FEATURE_LEVEL 999) +SET(KOKKOS_SERIAL_NAME Serial) +SET(KOKKOS_THREADS_FEATURE_LEVEL 999) +SET(KOKKOS_THREADS_NAME Threads) # @@ -24,7 +54,7 @@ KOKKOS_TARGET_INCLUDE_DIRECTORIES(kokkos_gtest PUBLIC ${GTEST_SOURCE_DIR}) KOKKOS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) KOKKOS_INCLUDE_DIRECTORIES(REQUIRED_DURING_INSTALLATION_TESTING ${CMAKE_CURRENT_SOURCE_DIR}) -foreach(Tag Threads;Serial;OpenMP;Cuda;HPX) +foreach(Tag Threads;Serial;OpenMP;Cuda;HPX;OpenMPTarget;HIP) # Because there is always an exception to the rule if(Tag STREQUAL "Threads") set(DEVICE "PTHREAD") @@ -46,6 +76,7 @@ foreach(Tag Threads;Serial;OpenMP;Cuda;HPX) ${dir}/Test${Tag}_AtomicOperations_complexfloat.cpp ${dir}/Test${Tag}_AtomicViews.cpp ${dir}/Test${Tag}_Atomics.cpp + ${dir}/Test${Tag}_Concepts.cpp ${dir}/Test${Tag}_Complex.cpp ${dir}/Test${Tag}_Crs.cpp ${dir}/Test${Tag}_DeepCopyAlignment.cpp @@ -59,6 +90,7 @@ foreach(Tag Threads;Serial;OpenMP;Cuda;HPX) ${dir}/Test${Tag}_MDRange_e.cpp ${dir}/Test${Tag}_Other.cpp ${dir}/Test${Tag}_RangePolicy.cpp + ${dir}/Test${Tag}_RangePolicyRequire.cpp ${dir}/Test${Tag}_Reductions.cpp ${dir}/Test${Tag}_Reducers_a.cpp ${dir}/Test${Tag}_Reducers_b.cpp @@ -104,6 +136,30 @@ foreach(Tag Threads;Serial;OpenMP;Cuda;HPX) ) endforeach() +if(Kokkos_ENABLE_OPENMPTARGET) + list(REMOVE_ITEM OpenMPTarget_SOURCES + openmptarget/TestOpenMPTarget_AtomicOperations_complexdouble.cpp + openmptarget/TestOpenMPTarget_MDRange_a.cpp + openmptarget/TestOpenMPTarget_MDRange_b.cpp + openmptarget/TestOpenMPTarget_MDRange_c.cpp + openmptarget/TestOpenMPTarget_MDRange_d.cpp + openmptarget/TestOpenMPTarget_MDRange_e.cpp + openmptarget/TestOpenMPTarget_Other.cpp + openmptarget/TestOpenMPTarget_Scan.cpp + openmptarget/TestOpenMPTarget_Team.cpp + openmptarget/TestOpenMPTarget_TeamScratch.cpp + openmptarget/TestOpenMPTarget_ViewAPI_e.cpp + openmptarget/TestOpenMPTarget_ViewMapping_subview.cpp + openmptarget/TestOpenMPTarget_ViewOfClass.cpp + ) +endif() + +if(Kokkos_ENABLE_HIP) + # FIXME Linktime error: undefined reference to + # Kokkos::Impl::ViewDimensin<0ul, ...>(unsigned int, ...) + list(REMOVE_ITEM Serial_SOURCES serial/TestSerial_ViewLayoutStrideAssignment.cpp) +endif() + if(Kokkos_ENABLE_SERIAL) KOKKOS_ADD_EXECUTABLE_AND_TEST( UnitTest_Serial @@ -150,42 +206,11 @@ if(Kokkos_ENABLE_HPX) ) endif() -if(Kokkos_ENABLE_QTHREADS) +if(Kokkos_ENABLE_OPENMPTARGET) KOKKOS_ADD_EXECUTABLE_AND_TEST( - UnitTest_Qthreads + UnitTest_OpenMPTarget SOURCES - UnitTestMainInit.cpp - qthreads/TestQthreads_Atomics.cpp - qthreads/TestQthreads_Complex.cpp - qthreads/TestQthreads_DeepCopyAlignment.cpp - qthreads/TestQthreads_Other.cpp - qthreads/TestQthreads_Reductions.cpp - qthreads/TestQthreads_Reducers_a.cpp - qthreads/TestQthreads_Reducers_b.cpp - qthreads/TestQthreads_Reducers_c.cpp - qthreads/TestQthreads_Reducers_d.cpp - qthreads/TestQthreads_SubView_a.cpp - qthreads/TestQthreads_SubView_b.cpp - qthreads/TestQthreads_SubView_c01.cpp - qthreads/TestQthreads_SubView_c02.cpp - qthreads/TestQthreads_SubView_c03.cpp - qthreads/TestQthreads_SubView_c04.cpp - qthreads/TestQthreads_SubView_c05.cpp - qthreads/TestQthreads_SubView_c06.cpp - qthreads/TestQthreads_SubView_c07.cpp - qthreads/TestQthreads_SubView_c08.cpp - qthreads/TestQthreads_SubView_c09.cpp - qthreads/TestQthreads_SubView_c10.cpp - qthreads/TestQthreads_SubView_c11.cpp - qthreads/TestQthreads_SubView_c12.cpp - qthreads/TestQthreads_SubView_c13.cpp - qthreads/TestQthreads_Team.cpp - qthreads/TestQthreads_View_64bit.cpp - qthreads/TestQthreads_ViewAPI_a.cpp - qthreads/TestQthreads_ViewAPI_b.cpp - qthreads/TestQthreads_ViewAPI_c.cpp - qthreads/TestQthreads_ViewAPI_d.cpp - qthreads/TestQthreads_ViewAPI_e.cpp + ${OpenMPTarget_SOURCES} ) endif() @@ -233,22 +258,66 @@ if(Kokkos_ENABLE_CUDA) ) endif() +if(Kokkos_ENABLE_HIP) + # FIXME_HIP + LIST(REMOVE_ITEM HIP_SOURCES + hip/TestHIP_AtomicOperations_complexdouble.cpp + hip/TestHIP_Other.cpp + hip/TestHIP_Reductions_DeviceView.cpp + hip/TestHIP_Team.cpp + hip/TestHIP_TeamReductionScan.cpp + hip/TestHIP_TeamScratch.cpp + hip/TestHIP_TeamTeamSize.cpp + hip/TestHIP_TeamVectorRange.cpp + hip/TestHIP_UniqueToken.cpp + hip/TestHIP_ViewAPI_a.cpp + hip/TestHIP_ViewAPI_b.cpp + hip/TestHIP_ViewAPI_e.cpp + hip/TestHIP_ViewLayoutStrideAssignment.cpp + hip/TestHIP_WorkGraph.cpp + ) + + KOKKOS_ADD_EXECUTABLE_AND_TEST( + UnitTest_HIP + SOURCES + ${HIP_SOURCES} + hip/TestHIPHostPinned_ViewAPI_a.cpp + hip/TestHIPHostPinned_ViewAPI_b.cpp + hip/TestHIPHostPinned_ViewAPI_c.cpp + hip/TestHIPHostPinned_ViewAPI_d.cpp + hip/TestHIPHostPinned_ViewAPI_e.cpp + hip/TestHIPHostPinned_ViewCopy.cpp + hip/TestHIPHostPinned_ViewMapping_a.cpp + hip/TestHIPHostPinned_ViewMapping_b.cpp + hip/TestHIPHostPinned_ViewMapping_subview.cpp + ) + KOKKOS_ADD_EXECUTABLE_AND_TEST( + UnitTest_HIPInterOpInit + SOURCES + UnitTestMain.cpp + hip/TestHIP_InterOp_Init.cpp + ) +endif() + +SET(DEFAULT_DEVICE_SOURCES + UnitTestMainInit.cpp + default/TestDefaultDeviceType.cpp + default/TestDefaultDeviceType_a1.cpp + default/TestDefaultDeviceType_b1.cpp + default/TestDefaultDeviceType_c1.cpp + default/TestDefaultDeviceType_a2.cpp + default/TestDefaultDeviceType_b2.cpp + default/TestDefaultDeviceType_c2.cpp + default/TestDefaultDeviceType_a3.cpp + default/TestDefaultDeviceType_b3.cpp + default/TestDefaultDeviceType_c3.cpp + default/TestDefaultDeviceType_d.cpp + default/TestDefaultDeviceTypeResize.cpp +) + KOKKOS_ADD_EXECUTABLE_AND_TEST( UnitTest_Default - SOURCES - UnitTestMainInit.cpp - default/TestDefaultDeviceType.cpp - default/TestDefaultDeviceType_a1.cpp - default/TestDefaultDeviceType_b1.cpp - default/TestDefaultDeviceType_c1.cpp - default/TestDefaultDeviceType_a2.cpp - default/TestDefaultDeviceType_b2.cpp - default/TestDefaultDeviceType_c2.cpp - default/TestDefaultDeviceType_a3.cpp - default/TestDefaultDeviceType_b3.cpp - default/TestDefaultDeviceType_c3.cpp - default/TestDefaultDeviceType_d.cpp - default/TestDefaultDeviceTypeResize.cpp + SOURCES ${DEFAULT_DEVICE_SOURCES} ) KOKKOS_ADD_EXECUTABLE_AND_TEST( @@ -275,6 +344,7 @@ KOKKOS_ADD_ADVANCED_TEST( UnitTest_PushFinalizeHook_terminate ALWAYS_FAIL_ON_ZERO_RETURN ) +if(NOT KOKKOS_HAS_TRILINOS) KOKKOS_ADD_TEST_EXECUTABLE( StackTraceTestExec SOURCES @@ -287,20 +357,9 @@ KOKKOS_ADD_TEST_EXECUTABLE( ) # We need -rdynamic on GNU platforms for the stacktrace functionality # to work correctly with shared libraries -if(NOT KOKKOS_HAS_TRILINOS) -SET_PROPERTY(TARGET StackTraceTestExec PROPERTY ENABLE_EXPORTS 1) - -KOKKOS_ADD_TEST( NAME UnitTest_StackTraceTest_normal - EXE StackTraceTestExec - FAIL_REGULAR_EXPRESSION "FAILED" - ) - -KOKKOS_ADD_TEST( NAME UnitTest_StackTraceTest_terminate - EXE StackTraceTestExec - FAIL_REGULAR_EXPRESSION "FAILED" - ) +KOKKOS_SET_EXE_PROPERTY(StackTraceTestExec ENABLE_EXPORTS ON) -KOKKOS_ADD_TEST( NAME UnitTest_StackTraceTest_generic_term +KOKKOS_ADD_TEST( NAME UnitTest_StackTraceTest EXE StackTraceTestExec FAIL_REGULAR_EXPRESSION "FAILED" ) @@ -325,5 +384,52 @@ KOKKOS_ADD_EXECUTABLE_AND_TEST( SOURCES UnitTestMain.cpp TestHostBarrier.cpp ) +FUNCTION (KOKKOS_ADD_INCREMENTAL_TEST DEVICE) + KOKKOS_OPTION( ${DEVICE}_EXCLUDE_TESTS "" STRING "Incremental test exclude list" ) + # Add unit test main + SET(${DEVICE}_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/UnitTestMainInit.cpp) + + # Iterate over incremental tests in directory + APPEND_GLOB(INCREMENTAL_FILE_LIST ${CMAKE_CURRENT_SOURCE_DIR}/incremental/*.hpp) + SET(DEVICE_NAME ${KOKKOS_${DEVICE}_NAME}) + FOREACH (CURRENT_FILE_PATH ${INCREMENTAL_FILE_LIST}) + GET_FILENAME_COMPONENT( CURRENT_FILE_NAME ${CURRENT_FILE_PATH} NAME ) + STRING (REPLACE ".hpp" "" CURRENT_TEST_NAME ${CURRENT_FILE_NAME}) + IF (NOT CURRENT_TEST_NAME IN_LIST Kokkos_${DEVICE}_EXCLUDE_TESTS) + SET (CURRENT_TEST_OUTPUT_FILENAME ${CURRENT_TEST_NAME}_${DEVICE}) + FILE( STRINGS ${CURRENT_FILE_PATH} CURRENT_REQUIRED_FEATURE_LINE REGEX "Kokkos_Feature_Level_Required" ) + # From each test get level implementation required + STRING( REGEX REPLACE ".*Kokkos_Feature_Level_Required:" "" CURRENT_REQUIRED_FEATURE_LEVEL ${CURRENT_REQUIRED_FEATURE_LINE} ) + # Cross-reference list of dependencies with selected feature list > matching feature test files are added to test applications + IF (KOKKOS_${DEVICE}_FEATURE_LEVEL GREATER_EQUAL CURRENT_REQUIRED_FEATURE_LEVEL) + CONFIGURE_FILE (IncrementalTest.cpp.in ${CMAKE_BINARY_DIR}/core/unit_test/generated/${CURRENT_TEST_OUTPUT_FILENAME}.cpp ) + SET(${DEVICE}_SOURCES ${${DEVICE}_SOURCES}; ${CMAKE_BINARY_DIR}/core/unit_test/generated/${CURRENT_TEST_OUTPUT_FILENAME}.cpp) + ENDIF() + ENDIF() + ENDFOREACH() + + STRING(TOUPPER ${DEVICE} UC_DEVICE) + + KOKKOS_OPTION ( + ENABLE_${UC_DEVICE} ON BOOL "ENABLE ${UC_DEVICE}" + ) + + KOKKOS_ADD_EXECUTABLE_AND_TEST( + IncrementalTest_${DEVICE} + SOURCES ${${DEVICE}_SOURCES} + ) + + TARGET_INCLUDE_DIRECTORIES( ${PACKAGE_NAME}_IncrementalTest_${DEVICE} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/incremental ) + +ENDFUNCTION() + +FOREACH (DEVICE ${KOKKOS_ENABLED_DEVICES}) + KOKKOS_ADD_INCREMENTAL_TEST(${DEVICE}) +ENDFOREACH() + +KOKKOS_ADD_EXECUTABLE_AND_TEST( + UnitTest_CTestDevice + SOURCES UnitTestMain.cpp TestCTestDevice.cpp +) diff --git a/core/unit_test/IncrementalTest.cpp.in b/core/unit_test/IncrementalTest.cpp.in new file mode 100644 index 00000000000..e4358efe9dd --- /dev/null +++ b/core/unit_test/IncrementalTest.cpp.in @@ -0,0 +1,58 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#ifndef KOKKOS_TEST_@BACK_END_NAME@_@CURRENT_TEST_NAME@ +#define KOKKOS_TEST_@BACK_END_NAME@_@CURRENT_TEST_NAME@ + +#include +#include + +#define TEST_CATEGORY @DEVICE@ +#define TEST_EXECSPACE Kokkos::@DEVICE_NAME@ + +#if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) +#include <@CURRENT_FILE_NAME@> +#endif + +#endif + + diff --git a/core/unit_test/Makefile b/core/unit_test/Makefile index cf945bd718f..1a386adc4aa 100644 --- a/core/unit_test/Makefile +++ b/core/unit_test/Makefile @@ -8,7 +8,7 @@ vpath %.cpp ${KOKKOS_PATH}/core/unit_test/serial vpath %.cpp ${KOKKOS_PATH}/core/unit_test/threads vpath %.cpp ${KOKKOS_PATH}/core/unit_test/openmp vpath %.cpp ${KOKKOS_PATH}/core/unit_test/openmptarget -vpath %.cpp ${KOKKOS_PATH}/core/unit_test/qthreads +vpath %.cpp ${KOKKOS_PATH}/core/unit_test/hip vpath %.cpp ${KOKKOS_PATH}/core/unit_test/hpx vpath %.cpp ${KOKKOS_PATH}/core/unit_test/cuda vpath %.cpp ${KOKKOS_PATH}/core/unit_test/rocm @@ -67,7 +67,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) OBJ_CUDA = UnitTestMainInit.o gtest-all.o OBJ_CUDA += TestCuda_Init.o OBJ_CUDA += TestCuda_SharedAlloc.o TestCudaUVM_SharedAlloc.o TestCudaHostPinned_SharedAlloc.o - OBJ_CUDA += TestCuda_RangePolicy.o + OBJ_CUDA += TestCuda_RangePolicy.o TestCuda_RangePolicyRequire.o OBJ_CUDA += TestCuda_ViewAPI_a.o TestCuda_ViewAPI_b.o TestCuda_ViewAPI_c.o TestCuda_ViewAPI_d.o TestCuda_ViewAPI_e.o OBJ_CUDA += TestCuda_DeepCopyAlignment.o OBJ_CUDA += TestCuda_ViewMapping_a.o TestCuda_ViewMapping_b.o TestCuda_ViewMapping_subview.o TestCuda_ViewResize.o TestCuda_ViewLayoutStrideAssignment.o @@ -103,7 +103,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) OBJ_CUDA += TestCuda_LocalDeepCopy.o OBJ_CUDA += TestCuda_DebugSerialExecution.o OBJ_CUDA += TestCuda_DebugPinUVMSpace.o - + TARGETS += KokkosCore_UnitTest_Cuda TARGETS += KokkosCore_UnitTest_CudaInterOpInit TARGETS += KokkosCore_UnitTest_CudaInterOpStreams @@ -166,7 +166,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1) OBJ_THREADS = UnitTestMainInit.o gtest-all.o OBJ_THREADS += TestThreads_Init.o OBJ_THREADS += TestThreads_SharedAlloc.o - OBJ_THREADS += TestThreads_RangePolicy.o + OBJ_THREADS += TestThreads_RangePolicy.o TestThreads_RangePolicyRequire.o OBJ_THREADS += TestThreads_View_64bit.o OBJ_THREADS += TestThreads_ViewAPI_a.o TestThreads_ViewAPI_b.o TestThreads_ViewAPI_c.o TestThreads_ViewAPI_d.o TestThreads_ViewAPI_e.o OBJ_THREADS += TestThreads_DeepCopyAlignment.o @@ -201,7 +201,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) OBJ_OPENMP = UnitTestMainInit.o gtest-all.o OBJ_OPENMP += TestOpenMP_Init.o OBJ_OPENMP += TestOpenMP_SharedAlloc.o - OBJ_OPENMP += TestOpenMP_RangePolicy.o + OBJ_OPENMP += TestOpenMP_RangePolicy.o TestOpenMP_RangePolicyRequire.o OBJ_OPENMP += TestOpenMP_View_64bit.o OBJ_OPENMP += TestOpenMP_ViewAPI_a.o TestOpenMP_ViewAPI_b.o TestOpenMP_ViewAPI_c.o TestOpenMP_ViewAPI_d.o TestOpenMP_ViewAPI_e.o OBJ_OPENMP += TestOpenMP_DeepCopyAlignment.o @@ -242,11 +242,12 @@ ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1) OBJ_OPENMPTARGET += TestOpenMPTarget_Init.o #OBJ_OPENMPTARGET += TestOpenMPTarget_SharedAlloc.o OBJ_OPENMPTARGET += TestOpenMPTarget_RangePolicy.o - OBJ_OPENMPTARGET += TestOpenMPTarget_ViewAPI_a.o TestOpenMPTarget_ViewAPI_b.o TestOpenMPTarget_ViewAPI_c.o TestOpenMPTarget_ViewAPI_d.o TestOpenMPTarget_ViewAPI_e.o #Some commented out code + OBJ_OPENMPTARGET += TestOpenMPTarget_ViewAPI_a.o TestOpenMPTarget_ViewAPI_b.o TestOpenMPTarget_ViewAPI_c.o TestOpenMPTarget_ViewAPI_d.o #Some commented out code + #OBJ_OPENMPTARGET += TestOpenMPTarget_ViewAPI_e.o OBJ_OPENMPTARGET += TestOpenMPTarget_DeepCopyAlignment.o OBJ_OPENMPTARGET += TestOpenMPTarget_ViewMapping_a.o OBJ_OPENMPTARGET += TestOpenMPTarget_ViewMapping_b.o - OBJ_OPENMPTARGET += TestOpenMPTarget_ViewMapping_subview.o + #OBJ_OPENMPTARGET += TestOpenMPTarget_ViewMapping_subview.o #OBJ_OPENMPTARGET += TestOpenMPTarget_ViewOfClass.o OBJ_OPENMPTARGET += TestOpenMPTarget_SubView_a.o TestOpenMPTarget_SubView_b.o #The following subview tests need something like UVM: @@ -255,12 +256,13 @@ ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1) #OBJ_OPENMPTARGET += TestOpenMPTarget_SubView_c07.o TestOpenMPTarget_SubView_c08.o TestOpenMPTarget_SubView_c09.o #OBJ_OPENMPTARGET += TestOpenMPTarget_SubView_c10.o TestOpenMPTarget_SubView_c11.o TestOpenMPTarget_SubView_c12.o #OBJ_OPENMPTARGET += TestOpenMPTarget_Reductions.o # Need custom reductions - #OBJ_OPENMPTARGET += TestOpenMPTarget_Reducers_a.o TestOpenMPTarget_Reducers_b.o TestOpenMPTarget_Reducers_c.o TestOpenMPTarget_Reducers_d.o + OBJ_OPENMPTARGET += TestOpenMPTarget_Reducers_a.o TestOpenMPTarget_Reducers_b.o TestOpenMPTarget_Reducers_c.o TestOpenMPTarget_Reducers_d.o #OBJ_OPENMPTARGET += TestOpenMPTarget_Scan.o OBJ_OPENMPTARGET += TestOpenMPTarget_Complex.o OBJ_OPENMPTARGET += TestOpenMPTarget_AtomicOperations_int.o TestOpenMPTarget_AtomicOperations_unsignedint.o TestOpenMPTarget_AtomicOperations_longint.o OBJ_OPENMPTARGET += TestOpenMPTarget_AtomicOperations_unsignedlongint.o TestOpenMPTarget_AtomicOperations_longlongint.o TestOpenMPTarget_AtomicOperations_double.o TestOpenMPTarget_AtomicOperations_float.o - OBJ_OPENMPTARGET += TestOpenMPTarget_AtomicOperations_complexfloat.o TestOpenMPTarget_AtomicOperations_complexdouble.o + #OBJ_OPENMPTARGET += TestOpenMPTarget_AtomicOperations_complexfloat.o + #OBJ_OPENMPTARGET += TestOpenMPTarget_AtomicOperations_complexdouble.o OBJ_OPENMPTARGET += TestOpenMPTarget_AtomicViews.o OBJ_OPENMPTARGET += TestOpenMPTarget_Atomics.o # Commented Out Arbitrary Type Atomics #OBJ_OPENMPTARGET += TestOpenMPTarget_Team.o # There is still a static function in this @@ -275,28 +277,26 @@ ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1) TEST_TARGETS += test-openmptarget endif -ifeq ($(KOKKOS_INTERNAL_USE_QTHREADS), 1) - OBJ_QTHREADS = TestQthreads_Other.o TestQthreads_Reductions.o TestQthreads_Atomics.o TestQthreads_Team.o - OBJ_QTHREADS += TestQthreads_SubView_a.o TestQthreads_SubView_b.o - OBJ_QTHREADS += TestQthreads_SubView_c01.o TestQthreads_SubView_c02.o TestQthreads_SubView_c03.o - OBJ_QTHREADS += TestQthreads_SubView_c04.o TestQthreads_SubView_c05.o TestQthreads_SubView_c06.o - OBJ_QTHREADS += TestQthreads_SubView_c07.o TestQthreads_SubView_c08.o TestQthreads_SubView_c09.o - OBJ_QTHREADS += TestQthreads_SubView_c10.o TestQthreads_SubView_c11.o TestQthreads_SubView_c12.o - OBJ_QTHREADS += TestQthreads_ViewAPI_a.o TestQthreads_ViewAPI_b.o TestQthreads_ViewAPI_c.o TestQthreads_ViewAPI_d.o TestQthreads_ViewAPI_e.o UnitTestMain.o gtest-all.o - TARGETS += KokkosCore_UnitTest_Qthreads - - OBJ_QTHREADS2 = UnitTestMainInit.o gtest-all.o - OBJ_QTHREADS2 += TestQthreads_Complex.o - TARGETS += KokkosCore_UnitTest_Qthreads2 - - TEST_TARGETS += test-qthreads +ifeq ($(KOKKOS_INTERNAL_USE_HIP), 1) + OBJ_HIP = UnitTestMainInit.o gtest-all.o + OBJ_HIP += TestHIP_Init.o + OBJ_HIP += TestHIP_Reducers_a.o TestHIP_Reducers_b.o TestHIP_Reducers_c.o TestHIP_Reducers_d.o + OBJ_HIP += TestHIP_Reductions.o + OBJ_HIP += TestHIP_MDRange_a.o TestHIP_MDRange_b.o TestHIP_MDRange_c.o TestHIP_MDRange_d.o TestHIP_MDRange_e.o + OBJ_HIP += TestHIP_Spaces.o + OBJ_HIP += TestHIPHostPinned_ViewCopy.o TestHIPHostPinned_ViewAPI_a.o TestHIPHostPinned_ViewAPI_b.o TestHIPHostPinned_ViewAPI_c.o TestHIPHostPinned_ViewAPI_d.o TestHIPHostPinned_ViewAPI_e.o + OBJ_HIP += TestHIPHostPinned_ViewMapping_a.o TestHIPHostPinned_ViewMapping_b.o TestHIPHostPinned_ViewMapping_subview.o + + TARGETS += KokkosCore_UnitTest_HIP + + TEST_TARGETS += test-hip endif ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1) OBJ_HPX = UnitTestMainInit.o gtest-all.o OBJ_HPX += TestHPX_Init.o OBJ_HPX += TestHPX_SharedAlloc.o - OBJ_HPX += TestHPX_RangePolicy.o + OBJ_HPX += TestHPX_RangePolicy.o TestHPX_RangePolicyRequire.o OBJ_HPX += TestHPX_View_64bit.o OBJ_HPX += TestHPX_ViewAPI_a.o TestHPX_ViewAPI_b.o TestHPX_ViewAPI_c.o TestHPX_ViewAPI_d.o TestHPX_ViewAPI_e.o OBJ_HPX += TestHPX_ViewMapping_a.o TestHPX_ViewMapping_b.o TestHPX_ViewMapping_subview.o TestHPX_ViewResize.o @@ -335,7 +335,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1) OBJ_SERIAL = UnitTestMainInit.o gtest-all.o OBJ_SERIAL += TestSerial_Init.o OBJ_SERIAL += TestSerial_SharedAlloc.o - OBJ_SERIAL += TestSerial_RangePolicy.o + OBJ_SERIAL += TestSerial_RangePolicy.o TestSerial_RangePolicyRequire.o OBJ_SERIAL += TestSerial_View_64bit.o OBJ_SERIAL += TestSerial_ViewAPI_a.o TestSerial_ViewAPI_b.o TestSerial_ViewAPI_c.o TestSerial_ViewAPI_d.o TestSerial_ViewAPI_e.o OBJ_SERIAL += TestSerial_DeepCopyAlignment.o @@ -414,7 +414,7 @@ TEST_TARGETS += ${INITTESTS_TEST_TARGETS} KokkosCore_UnitTest_Cuda: $(OBJ_CUDA) $(KOKKOS_LINK_DEPENDS) $(LINK) $(EXTRA_PATH) $(OBJ_CUDA) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosCore_UnitTest_Cuda - + KokkosCore_UnitTest_CudaInterOpInit: UnitTestMain.o gtest-all.o TestCuda_InterOp_Init.o $(KOKKOS_LINK_DEPENDS) $(LINK) $(EXTRA_PATH) UnitTestMain.o gtest-all.o TestCuda_InterOp_Init.o $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosCore_UnitTest_CudaInterOpInit KokkosCore_UnitTest_CudaInterOpStreams: UnitTestMain.o gtest-all.o TestCuda_InterOp_Streams.o $(KOKKOS_LINK_DEPENDS) @@ -438,11 +438,8 @@ KokkosCore_UnitTest_OpenMPTarget: $(OBJ_OPENMPTARGET) $(KOKKOS_LINK_DEPENDS) KokkosCore_UnitTest_Serial: $(OBJ_SERIAL) $(KOKKOS_LINK_DEPENDS) $(LINK) $(EXTRA_PATH) $(OBJ_SERIAL) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosCore_UnitTest_Serial -KokkosCore_UnitTest_Qthreads: $(OBJ_QTHREADS) $(KOKKOS_LINK_DEPENDS) - $(LINK) $(EXTRA_PATH) $(OBJ_QTHREADS) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosCore_UnitTest_Qthreads - -KokkosCore_UnitTest_Qthreads2: $(OBJ_QTHREADS2) $(KOKKOS_LINK_DEPENDS) - $(LINK) $(EXTRA_PATH) $(OBJ_QTHREADS2) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosCore_UnitTest_Qthreads2 +KokkosCore_UnitTest_HIP: $(OBJ_HIP) $(KOKKOS_LINK_DEPENDS) + $(LINK) $(EXTRA_PATH) $(OBJ_HIP) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosCore_UnitTest_HIP KokkosCore_UnitTest_HPX: $(OBJ_HPX) $(KOKKOS_LINK_DEPENDS) $(LINK) $(EXTRA_PATH) $(OBJ_HPX) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosCore_UnitTest_HPX @@ -496,9 +493,8 @@ test-openmptarget: KokkosCore_UnitTest_OpenMPTarget test-serial: KokkosCore_UnitTest_Serial ./KokkosCore_UnitTest_Serial -test-qthreads: KokkosCore_UnitTest_Qthreads KokkosCore_UnitTest_Qthreads2 - ./KokkosCore_UnitTest_Qthreads - ./KokkosCore_UnitTest_Qthreads2 +test-hip: KokkosCore_UnitTest_HIP + ./KokkosCore_UnitTest_HIP test-hpx: KokkosCore_UnitTest_HPX ./KokkosCore_UnitTest_HPX diff --git a/core/unit_test/TestAtomic.hpp b/core/unit_test/TestAtomic.hpp index a72a179e02e..809f9dc01fd 100644 --- a/core/unit_test/TestAtomic.hpp +++ b/core/unit_test/TestAtomic.hpp @@ -527,7 +527,19 @@ TEST(TEST_CATEGORY, atomics) { ASSERT_TRUE((TestAtomic::Loop(100, 3))); #ifndef KOKKOS_ENABLE_OPENMPTARGET -#ifndef KOKKOS_ENABLE_ROCM // ROCM doesn't yet support atomics for >64bit types + ASSERT_TRUE((TestAtomic::Loop, TEST_EXECSPACE>(1, 1))); + ASSERT_TRUE((TestAtomic::Loop, TEST_EXECSPACE>(1, 2))); + ASSERT_TRUE((TestAtomic::Loop, TEST_EXECSPACE>(1, 3))); + + ASSERT_TRUE( + (TestAtomic::Loop, TEST_EXECSPACE>(100, 1))); + ASSERT_TRUE( + (TestAtomic::Loop, TEST_EXECSPACE>(100, 2))); + ASSERT_TRUE( + (TestAtomic::Loop, TEST_EXECSPACE>(100, 3))); + + // FIXME_HIP HIP doesn't yet support atomics for >64bit types properly +#ifndef KOKKOS_ENABLE_HIP ASSERT_TRUE( (TestAtomic::Loop, TEST_EXECSPACE>(1, 1))); ASSERT_TRUE( @@ -542,17 +554,8 @@ TEST(TEST_CATEGORY, atomics) { ASSERT_TRUE( (TestAtomic::Loop, TEST_EXECSPACE>(100, 3))); - ASSERT_TRUE((TestAtomic::Loop, TEST_EXECSPACE>(1, 1))); - ASSERT_TRUE((TestAtomic::Loop, TEST_EXECSPACE>(1, 2))); - ASSERT_TRUE((TestAtomic::Loop, TEST_EXECSPACE>(1, 3))); - - ASSERT_TRUE( - (TestAtomic::Loop, TEST_EXECSPACE>(100, 1))); - ASSERT_TRUE( - (TestAtomic::Loop, TEST_EXECSPACE>(100, 2))); - ASSERT_TRUE( - (TestAtomic::Loop, TEST_EXECSPACE>(100, 3))); - +// WORKAROUND MSVC +#ifndef _WIN32 ASSERT_TRUE( (TestAtomic::Loop, TEST_EXECSPACE>(100, 1))); ASSERT_TRUE( @@ -561,6 +564,7 @@ TEST(TEST_CATEGORY, atomics) { (TestAtomic::Loop, TEST_EXECSPACE>(100, 3))); #endif #endif +#endif } } // namespace Test diff --git a/core/unit_test/TestAtomicViews.hpp b/core/unit_test/TestAtomicViews.hpp index d7a45dc40fe..109598e8c60 100644 --- a/core/unit_test/TestAtomicViews.hpp +++ b/core/unit_test/TestAtomicViews.hpp @@ -97,8 +97,8 @@ struct TestViewOperator_LeftAndRight { right_view right; stride_view left_stride; stride_view right_stride; - long left_alloc; - long right_alloc; + int64_t left_alloc; + int64_t right_alloc; TestViewOperator_LeftAndRight() : left("left"), @@ -255,11 +255,11 @@ class TestAtomicViewAPI { ASSERT_EQ(ax.use_count(), size_t(4)); ASSERT_EQ(const_ax.use_count(), ax.use_count()); - ASSERT_FALSE(ax.data() == 0); - ASSERT_FALSE(const_ax.data() == 0); // referenceable ptr - ASSERT_FALSE(unmanaged_ax.data() == 0); - ASSERT_FALSE(unmanaged_ax_from_ptr_dx.data() == 0); - ASSERT_FALSE(ay.data() == 0); + ASSERT_FALSE(ax.data() == nullptr); + ASSERT_FALSE(const_ax.data() == nullptr); // referenceable ptr + ASSERT_FALSE(unmanaged_ax.data() == nullptr); + ASSERT_FALSE(unmanaged_ax_from_ptr_dx.data() == nullptr); + ASSERT_FALSE(ay.data() == nullptr); // ASSERT_NE( ax, ay ); // Above test results in following runtime error from gtest: // Expected: (ax) != (ay), actual: 32-byte object <30-01 D0-A0 D8-7F @@ -318,13 +318,13 @@ struct InitFunctor_Seq { typedef Kokkos::View view_type; view_type input; - const long length; + const int64_t length; - InitFunctor_Seq(view_type& input_, const long length_) + InitFunctor_Seq(view_type& input_, const int64_t length_) : input(input_), length(length_) {} KOKKOS_INLINE_FUNCTION - void operator()(const long i) const { + void operator()(const int64_t i) const { if (i < length) { input(i) = (T)i; } @@ -336,15 +336,15 @@ struct InitFunctor_ModTimes { typedef Kokkos::View view_type; view_type input; - const long length; - const long remainder; + const int64_t length; + const int64_t remainder; - InitFunctor_ModTimes(view_type& input_, const long length_, - const long remainder_) + InitFunctor_ModTimes(view_type& input_, const int64_t length_, + const int64_t remainder_) : input(input_), length(length_), remainder(remainder_) {} KOKKOS_INLINE_FUNCTION - void operator()(const long i) const { + void operator()(const int64_t i) const { if (i < length) { if (i % (remainder + 1) == remainder) { input(i) = (T)2; @@ -360,15 +360,15 @@ struct InitFunctor_ModShift { typedef Kokkos::View view_type; view_type input; - const long length; - const long remainder; + const int64_t length; + const int64_t remainder; - InitFunctor_ModShift(view_type& input_, const long length_, - const long remainder_) + InitFunctor_ModShift(view_type& input_, const int64_t length_, + const int64_t remainder_) : input(input_), length(length_), remainder(remainder_) {} KOKKOS_INLINE_FUNCTION - void operator()(const long i) const { + void operator()(const int64_t i) const { if (i < length) { if (i % (remainder + 1) == remainder) { input(i) = 1; @@ -390,15 +390,15 @@ struct PlusEqualAtomicViewFunctor { view_type input; atomic_view_type even_odd_result; - const long length; + const int64_t length; // Wrap the result view in an atomic view, use this for operator PlusEqualAtomicViewFunctor(const view_type& input_, - view_type& even_odd_result_, const long length_) + view_type& even_odd_result_, const int64_t length_) : input(input_), even_odd_result(even_odd_result_), length(length_) {} KOKKOS_INLINE_FUNCTION - void operator()(const long i) const { + void operator()(const int64_t i) const { if (i < length) { if (i % 2 == 0) { even_odd_result(0) += input(i); @@ -410,11 +410,11 @@ struct PlusEqualAtomicViewFunctor { }; template -T PlusEqualAtomicView(const long input_length) { +T PlusEqualAtomicView(const int64_t input_length) { typedef Kokkos::View view_type; typedef typename view_type::HostMirror host_view_type; - const long length = input_length; + const int64_t length = input_length; view_type input("input_view", length); view_type result_view("result_view", 2); @@ -435,19 +435,19 @@ T PlusEqualAtomicView(const long input_length) { } template -T PlusEqualAtomicViewCheck(const long input_length) { - const long N = input_length; +T PlusEqualAtomicViewCheck(const int64_t input_length) { + const int64_t N = input_length; T result[2]; if (N % 2 == 0) { - const long half_sum_end = (N / 2) - 1; - const long full_sum_end = N - 1; + const int64_t half_sum_end = (N / 2) - 1; + const int64_t full_sum_end = N - 1; result[0] = half_sum_end * (half_sum_end + 1) / 2; // Even sum. result[1] = (full_sum_end * (full_sum_end + 1) / 2) - result[0]; // Odd sum. } else { - const long half_sum_end = (T)(N / 2); - const long full_sum_end = N - 2; + const int64_t half_sum_end = (T)(N / 2); + const int64_t full_sum_end = N - 2; result[0] = half_sum_end * (half_sum_end - 1) / 2; // Even sum. result[1] = (full_sum_end * (full_sum_end - 1) / 2) - result[0]; // Odd sum. @@ -457,7 +457,7 @@ T PlusEqualAtomicViewCheck(const long input_length) { } template -bool PlusEqualAtomicViewTest(long input_length) { +bool PlusEqualAtomicViewTest(int64_t input_length) { T res = PlusEqualAtomicView(input_length); T resSerial = PlusEqualAtomicViewCheck(input_length); @@ -487,15 +487,16 @@ struct MinusEqualAtomicViewFunctor { view_type input; atomic_view_type even_odd_result; - const long length; + const int64_t length; // Wrap the result view in an atomic view, use this for operator. MinusEqualAtomicViewFunctor(const view_type& input_, - view_type& even_odd_result_, const long length_) + view_type& even_odd_result_, + const int64_t length_) : input(input_), even_odd_result(even_odd_result_), length(length_) {} KOKKOS_INLINE_FUNCTION - void operator()(const long i) const { + void operator()(const int64_t i) const { if (i < length) { if (i % 2 == 0) { even_odd_result(0) -= input(i); @@ -507,11 +508,11 @@ struct MinusEqualAtomicViewFunctor { }; template -T MinusEqualAtomicView(const long input_length) { +T MinusEqualAtomicView(const int64_t input_length) { typedef Kokkos::View view_type; typedef typename view_type::HostMirror host_view_type; - const long length = input_length; + const int64_t length = input_length; view_type input("input_view", length); view_type result_view("result_view", 2); @@ -532,19 +533,19 @@ T MinusEqualAtomicView(const long input_length) { } template -T MinusEqualAtomicViewCheck(const long input_length) { - const long N = input_length; +T MinusEqualAtomicViewCheck(const int64_t input_length) { + const int64_t N = input_length; T result[2]; if (N % 2 == 0) { - const long half_sum_end = (N / 2) - 1; - const long full_sum_end = N - 1; + const int64_t half_sum_end = (N / 2) - 1; + const int64_t full_sum_end = N - 1; result[0] = -1 * (half_sum_end * (half_sum_end + 1) / 2); // Even sum. result[1] = -1 * ((full_sum_end * (full_sum_end + 1) / 2) + result[0]); // Odd sum. } else { - const long half_sum_end = (long)(N / 2); - const long full_sum_end = N - 2; + const int64_t half_sum_end = (int64_t)(N / 2); + const int64_t full_sum_end = N - 2; result[0] = -1 * (half_sum_end * (half_sum_end - 1) / 2); // Even sum. result[1] = -1 * ((full_sum_end * (full_sum_end - 1) / 2) + result[0]); // Odd sum. @@ -554,7 +555,7 @@ T MinusEqualAtomicViewCheck(const long input_length) { } template -bool MinusEqualAtomicViewTest(long input_length) { +bool MinusEqualAtomicViewTest(int64_t input_length) { T res = MinusEqualAtomicView(input_length); T resSerial = MinusEqualAtomicViewCheck(input_length); @@ -584,15 +585,15 @@ struct TimesEqualAtomicViewFunctor { view_type input; atomic_view_type result; - const long length; + const int64_t length; // Wrap the result view in an atomic view, use this for operator TimesEqualAtomicViewFunctor(const view_type& input_, view_type& result_, - const long length_) + const int64_t length_) : input(input_), result(result_), length(length_) {} KOKKOS_INLINE_FUNCTION - void operator()(const long i) const { + void operator()(const int64_t i) const { if (i < length && i > 0) { result(0) *= (double)input(i); } @@ -600,11 +601,11 @@ struct TimesEqualAtomicViewFunctor { }; template -T TimesEqualAtomicView(const long input_length, const long remainder) { +T TimesEqualAtomicView(const int64_t input_length, const int64_t remainder) { typedef Kokkos::View view_type; typedef typename view_type::HostMirror host_view_type; - const long length = input_length; + const int64_t length = input_length; view_type input("input_view", length); view_type result_view("result_view", 1); @@ -626,12 +627,13 @@ T TimesEqualAtomicView(const long input_length, const long remainder) { } template -T TimesEqualAtomicViewCheck(const long input_length, const long remainder) { +T TimesEqualAtomicViewCheck(const int64_t input_length, + const int64_t remainder) { // Analytical result. - const long N = input_length; - T result = 1.0; + const int64_t N = input_length; + T result = 1.0; - for (long i = 2; i < N; ++i) { + for (int64_t i = 2; i < N; ++i) { if (i % (remainder + 1) == remainder) { result *= 2.0; } else { @@ -643,8 +645,8 @@ T TimesEqualAtomicViewCheck(const long input_length, const long remainder) { } template -bool TimesEqualAtomicViewTest(const long input_length) { - const long remainder = 23; +bool TimesEqualAtomicViewTest(const int64_t input_length) { + const int64_t remainder = 23; T res = TimesEqualAtomicView(input_length, remainder); T resSerial = TimesEqualAtomicViewCheck(input_length, remainder); @@ -675,15 +677,15 @@ struct DivEqualAtomicViewFunctor { view_type input; atomic_view_type result; - const long length; + const int64_t length; // Wrap the result view in an atomic view, use this for operator. DivEqualAtomicViewFunctor(const view_type& input_, scalar_view_type& result_, - const long length_) + const int64_t length_) : input(input_), result(result_), length(length_) {} KOKKOS_INLINE_FUNCTION - void operator()(const long i) const { + void operator()(const int64_t i) const { if (i < length && i > 0) { result() /= (double)(input(i)); } @@ -691,12 +693,12 @@ struct DivEqualAtomicViewFunctor { }; template -T DivEqualAtomicView(const long input_length, const long remainder) { +T DivEqualAtomicView(const int64_t input_length, const int64_t remainder) { typedef Kokkos::View view_type; typedef Kokkos::View scalar_view_type; typedef typename scalar_view_type::HostMirror host_scalar_view_type; - const long length = input_length; + const int64_t length = input_length; view_type input("input_view", length); scalar_view_type result_view("result_view"); @@ -718,10 +720,10 @@ T DivEqualAtomicView(const long input_length, const long remainder) { } template -T DivEqualAtomicViewCheck(const long input_length, const long remainder) { - const long N = input_length; - T result = 12121212121.0; - for (long i = 2; i < N; ++i) { +T DivEqualAtomicViewCheck(const int64_t input_length, const int64_t remainder) { + const int64_t N = input_length; + T result = 12121212121.0; + for (int64_t i = 2; i < N; ++i) { if (i % (remainder + 1) == remainder) { result /= 1.0; } else { @@ -733,8 +735,8 @@ T DivEqualAtomicViewCheck(const long input_length, const long remainder) { } template -bool DivEqualAtomicViewTest(const long input_length) { - const long remainder = 23; +bool DivEqualAtomicViewTest(const int64_t input_length) { + const int64_t remainder = 23; T res = DivEqualAtomicView(input_length, remainder); T resSerial = DivEqualAtomicViewCheck(input_length, remainder); @@ -766,15 +768,15 @@ struct ModEqualAtomicViewFunctor { view_type input; atomic_view_type result; - const long length; + const int64_t length; // Wrap the result view in an atomic view, use this for operator. ModEqualAtomicViewFunctor(const view_type& input_, scalar_view_type& result_, - const long length_) + const int64_t length_) : input(input_), result(result_), length(length_) {} KOKKOS_INLINE_FUNCTION - void operator()(const long i) const { + void operator()(const int64_t i) const { if (i < length && i > 0) { result() %= (double)(input(i)); } @@ -782,12 +784,12 @@ struct ModEqualAtomicViewFunctor { }; template -T ModEqualAtomicView(const long input_length, const long remainder) { +T ModEqualAtomicView(const int64_t input_length, const int64_t remainder) { typedef Kokkos::View view_type; typedef Kokkos::View scalar_view_type; typedef typename scalar_view_type::HostMirror host_scalar_view_type; - const long length = input_length; + const int64_t length = input_length; view_type input("input_view", length); scalar_view_type result_view("result_view"); @@ -809,10 +811,10 @@ T ModEqualAtomicView(const long input_length, const long remainder) { } template -T ModEqualAtomicViewCheck(const long input_length, const long remainder) { - const long N = input_length; - T result = 12121212121; - for (long i = 2; i < N; ++i) { +T ModEqualAtomicViewCheck(const int64_t input_length, const int64_t remainder) { + const int64_t N = input_length; + T result = 12121212121; + for (int64_t i = 2; i < N; ++i) { if (i % (remainder + 1) == remainder) { result %= 1; } else { @@ -824,12 +826,12 @@ T ModEqualAtomicViewCheck(const long input_length, const long remainder) { } template -bool ModEqualAtomicViewTest(const long input_length) { +bool ModEqualAtomicViewTest(const int64_t input_length) { static_assert(std::is_integral::value, "ModEqualAtomicView Error: Type must be integral type for this " "unit test"); - const long remainder = 23; + const int64_t remainder = 23; T res = ModEqualAtomicView(input_length, remainder); T resSerial = ModEqualAtomicViewCheck(input_length, remainder); @@ -861,16 +863,16 @@ struct RSEqualAtomicViewFunctor { const view_type input; atomic_view_type result; - const long length; - const long value; + const int64_t length; + const int64_t value; // Wrap the result view in an atomic view, use this for operator. RSEqualAtomicViewFunctor(const view_type& input_, result_view_type& result_, - const long& length_, const long& value_) + const int64_t& length_, const int64_t& value_) : input(input_), result(result_), length(length_), value(value_) {} KOKKOS_INLINE_FUNCTION - void operator()(const long i) const { + void operator()(const int64_t i) const { if (i < length) { if (i % 4 == 0) { result(1, 0, 0, 0) >>= input(i); @@ -886,13 +888,13 @@ struct RSEqualAtomicViewFunctor { }; template -T RSEqualAtomicView(const long input_length, const long value, - const long remainder) { +T RSEqualAtomicView(const int64_t input_length, const int64_t value, + const int64_t remainder) { typedef Kokkos::View view_type; typedef Kokkos::View result_view_type; typedef typename result_view_type::HostMirror host_scalar_view_type; - const long length = input_length; + const int64_t length = input_length; view_type input("input_view", length); result_view_type result_view("result_view", 2, 2, 2, 2); @@ -918,8 +920,8 @@ T RSEqualAtomicView(const long input_length, const long value, } template -T RSEqualAtomicViewCheck(const long input_length, const long value, - const long remainder) { +T RSEqualAtomicViewCheck(const int64_t input_length, const int64_t value, + const int64_t remainder) { T result[4]; result[0] = value; result[1] = value; @@ -927,7 +929,7 @@ T RSEqualAtomicViewCheck(const long input_length, const long value, result[3] = value; T* input = new T[input_length]; - for (long i = 0; i < input_length; ++i) { + for (int64_t i = 0; i < input_length; ++i) { if (i % (remainder + 1) == remainder) { input[i] = 1; } else { @@ -935,7 +937,7 @@ T RSEqualAtomicViewCheck(const long input_length, const long value, } } - for (long i = 0; i < input_length; ++i) { + for (int64_t i = 0; i < input_length; ++i) { if (i % 4 == 0) { result[0] >>= input[i]; } else if (i % 4 == 1) { @@ -953,12 +955,12 @@ T RSEqualAtomicViewCheck(const long input_length, const long value, } template -bool RSEqualAtomicViewTest(const long input_length) { +bool RSEqualAtomicViewTest(const int64_t input_length) { static_assert(std::is_integral::value, "RSEqualAtomicViewTest: Must be integral type for test"); - const long remainder = 61042; // prime - 1 - const long value = 1073741825; // 2^30+1 + const int64_t remainder = 61042; // prime - 1 + const int64_t value = 1073741825; // 2^30+1 T res = RSEqualAtomicView(input_length, value, remainder); T resSerial = RSEqualAtomicViewCheck(input_length, value, remainder); @@ -989,16 +991,16 @@ struct LSEqualAtomicViewFunctor { view_type input; atomic_view_type result; - const long length; - const long value; + const int64_t length; + const int64_t value; // Wrap the result view in an atomic view, use this for operator. LSEqualAtomicViewFunctor(const view_type& input_, result_view_type& result_, - const long& length_, const long& value_) + const int64_t& length_, const int64_t& value_) : input(input_), result(result_), length(length_), value(value_) {} KOKKOS_INLINE_FUNCTION - void operator()(const long i) const { + void operator()(const int64_t i) const { if (i < length) { if (i % 4 == 0) { result(1, 0, 0, 0) <<= input(i); @@ -1014,13 +1016,13 @@ struct LSEqualAtomicViewFunctor { }; template -T LSEqualAtomicView(const long input_length, const long value, - const long remainder) { +T LSEqualAtomicView(const int64_t input_length, const int64_t value, + const int64_t remainder) { typedef Kokkos::View view_type; typedef Kokkos::View result_view_type; typedef typename result_view_type::HostMirror host_scalar_view_type; - const long length = input_length; + const int64_t length = input_length; view_type input("input_view", length); result_view_type result_view("result_view", 2, 2, 2, 2); @@ -1046,8 +1048,8 @@ T LSEqualAtomicView(const long input_length, const long value, } template -T LSEqualAtomicViewCheck(const long input_length, const long value, - const long remainder) { +T LSEqualAtomicViewCheck(const int64_t input_length, const int64_t value, + const int64_t remainder) { T result[4]; result[0] = value; result[1] = value; @@ -1055,7 +1057,7 @@ T LSEqualAtomicViewCheck(const long input_length, const long value, result[3] = value; T* input = new T[input_length]; - for (long i = 0; i < input_length; ++i) { + for (int64_t i = 0; i < input_length; ++i) { if (i % (remainder + 1) == remainder) { input[i] = 1; } else { @@ -1063,7 +1065,7 @@ T LSEqualAtomicViewCheck(const long input_length, const long value, } } - for (long i = 0; i < input_length; ++i) { + for (int64_t i = 0; i < input_length; ++i) { if (i % 4 == 0) { result[0] <<= input[i]; } else if (i % 4 == 1) { @@ -1081,12 +1083,12 @@ T LSEqualAtomicViewCheck(const long input_length, const long value, } template -bool LSEqualAtomicViewTest(const long input_length) { +bool LSEqualAtomicViewTest(const int64_t input_length) { static_assert(std::is_integral::value, "LSEqualAtomicViewTest: Must be integral type for test"); - const long remainder = 61042; // prime - 1 - const long value = 1; // 2^30+1 + const int64_t remainder = 61042; // prime - 1 + const int64_t value = 1; // 2^30+1 T res = LSEqualAtomicView(input_length, value, remainder); T resSerial = LSEqualAtomicViewCheck(input_length, value, remainder); @@ -1116,15 +1118,15 @@ struct AndEqualAtomicViewFunctor { view_type input; atomic_view_type even_odd_result; - const long length; + const int64_t length; // Wrap the result view in an atomic view, use this for operator. AndEqualAtomicViewFunctor(const view_type& input_, - view_type& even_odd_result_, const long length_) + view_type& even_odd_result_, const int64_t length_) : input(input_), even_odd_result(even_odd_result_), length(length_) {} KOKKOS_INLINE_FUNCTION - void operator()(const long i) const { + void operator()(const int64_t i) const { if (i < length) { if (i % 2 == 0) { even_odd_result(0) &= input(i); @@ -1136,11 +1138,11 @@ struct AndEqualAtomicViewFunctor { }; template -T AndEqualAtomicView(const long input_length) { +T AndEqualAtomicView(const int64_t input_length) { typedef Kokkos::View view_type; typedef typename view_type::HostMirror host_view_type; - const long length = input_length; + const int64_t length = input_length; view_type input("input_view", length); view_type result_view("result_view", 2); @@ -1162,10 +1164,10 @@ T AndEqualAtomicView(const long input_length) { } template -T AndEqualAtomicViewCheck(const long input_length) { - const long N = input_length; - T result[2] = {1}; - for (long i = 0; i < N; ++i) { +T AndEqualAtomicViewCheck(const int64_t input_length) { + const int64_t N = input_length; + T result[2] = {1}; + for (int64_t i = 0; i < N; ++i) { if (N % 2 == 0) { result[0] &= (T)i; } else { @@ -1177,7 +1179,7 @@ T AndEqualAtomicViewCheck(const long input_length) { } template -bool AndEqualAtomicViewTest(long input_length) { +bool AndEqualAtomicViewTest(int64_t input_length) { static_assert(std::is_integral::value, "AndEqualAtomicViewTest: Must be integral type for test"); @@ -1210,15 +1212,15 @@ struct OrEqualAtomicViewFunctor { view_type input; atomic_view_type even_odd_result; - const long length; + const int64_t length; // Wrap the result view in an atomic view, use this for operator. OrEqualAtomicViewFunctor(const view_type& input_, view_type& even_odd_result_, - const long length_) + const int64_t length_) : input(input_), even_odd_result(even_odd_result_), length(length_) {} KOKKOS_INLINE_FUNCTION - void operator()(const long i) const { + void operator()(const int64_t i) const { if (i < length) { if (i % 2 == 0) { even_odd_result(0) |= input(i); @@ -1230,11 +1232,11 @@ struct OrEqualAtomicViewFunctor { }; template -T OrEqualAtomicView(const long input_length) { +T OrEqualAtomicView(const int64_t input_length) { typedef Kokkos::View view_type; typedef typename view_type::HostMirror host_view_type; - const long length = input_length; + const int64_t length = input_length; view_type input("input_view", length); view_type result_view("result_view", 2); @@ -1255,10 +1257,10 @@ T OrEqualAtomicView(const long input_length) { } template -T OrEqualAtomicViewCheck(const long input_length) { - const long N = input_length; - T result[2] = {0}; - for (long i = 0; i < N; ++i) { +T OrEqualAtomicViewCheck(const int64_t input_length) { + const int64_t N = input_length; + T result[2] = {0}; + for (int64_t i = 0; i < N; ++i) { if (i % 2 == 0) { result[0] |= (T)i; } else { @@ -1270,7 +1272,7 @@ T OrEqualAtomicViewCheck(const long input_length) { } template -bool OrEqualAtomicViewTest(long input_length) { +bool OrEqualAtomicViewTest(int64_t input_length) { static_assert(std::is_integral::value, "OrEqualAtomicViewTest: Must be integral type for test"); @@ -1303,15 +1305,15 @@ struct XOrEqualAtomicViewFunctor { view_type input; atomic_view_type even_odd_result; - const long length; + const int64_t length; // Wrap the result view in an atomic view, use this for operator. XOrEqualAtomicViewFunctor(const view_type& input_, - view_type& even_odd_result_, const long length_) + view_type& even_odd_result_, const int64_t length_) : input(input_), even_odd_result(even_odd_result_), length(length_) {} KOKKOS_INLINE_FUNCTION - void operator()(const long i) const { + void operator()(const int64_t i) const { if (i < length) { if (i % 2 == 0) { even_odd_result(0) ^= input(i); @@ -1323,11 +1325,11 @@ struct XOrEqualAtomicViewFunctor { }; template -T XOrEqualAtomicView(const long input_length) { +T XOrEqualAtomicView(const int64_t input_length) { typedef Kokkos::View view_type; typedef typename view_type::HostMirror host_view_type; - const long length = input_length; + const int64_t length = input_length; view_type input("input_view", length); view_type result_view("result_view", 2); @@ -1348,10 +1350,10 @@ T XOrEqualAtomicView(const long input_length) { } template -T XOrEqualAtomicViewCheck(const long input_length) { - const long N = input_length; - T result[2] = {0}; - for (long i = 0; i < N; ++i) { +T XOrEqualAtomicViewCheck(const int64_t input_length) { + const int64_t N = input_length; + T result[2] = {0}; + for (int64_t i = 0; i < N; ++i) { if (i % 2 == 0) { result[0] ^= (T)i; } else { @@ -1363,7 +1365,7 @@ T XOrEqualAtomicViewCheck(const long input_length) { } template -bool XOrEqualAtomicViewTest(long input_length) { +bool XOrEqualAtomicViewTest(int64_t input_length) { static_assert(std::is_integral::value, "XOrEqualAtomicViewTest: Must be integral type for test"); @@ -1426,38 +1428,38 @@ bool AtomicViewsTestNonIntegralType(const int length, int test) { namespace Test { TEST(TEST_CATEGORY, atomic_views_integral) { - const long length = 1000000; + const int64_t length = 1000000; { // Integral Types. ASSERT_TRUE( - (TestAtomicViews::AtomicViewsTestIntegralType( + (TestAtomicViews::AtomicViewsTestIntegralType( length, 1))); ASSERT_TRUE( - (TestAtomicViews::AtomicViewsTestIntegralType( + (TestAtomicViews::AtomicViewsTestIntegralType( length, 2))); ASSERT_TRUE( - (TestAtomicViews::AtomicViewsTestIntegralType( + (TestAtomicViews::AtomicViewsTestIntegralType( length, 3))); ASSERT_TRUE( - (TestAtomicViews::AtomicViewsTestIntegralType( + (TestAtomicViews::AtomicViewsTestIntegralType( length, 4))); ASSERT_TRUE( - (TestAtomicViews::AtomicViewsTestIntegralType( + (TestAtomicViews::AtomicViewsTestIntegralType( length, 5))); ASSERT_TRUE( - (TestAtomicViews::AtomicViewsTestIntegralType( + (TestAtomicViews::AtomicViewsTestIntegralType( length, 6))); ASSERT_TRUE( - (TestAtomicViews::AtomicViewsTestIntegralType( + (TestAtomicViews::AtomicViewsTestIntegralType( length, 7))); ASSERT_TRUE( - (TestAtomicViews::AtomicViewsTestIntegralType( + (TestAtomicViews::AtomicViewsTestIntegralType( length, 8))); } } TEST(TEST_CATEGORY, atomic_views_nonintegral) { - const long length = 1000000; + const int64_t length = 1000000; { // Non-Integral Types. ASSERT_TRUE(( diff --git a/core/unit_test/TestCTestDevice.cpp b/core/unit_test/TestCTestDevice.cpp new file mode 100644 index 00000000000..b2ee79b856b --- /dev/null +++ b/core/unit_test/TestCTestDevice.cpp @@ -0,0 +1,138 @@ +#include + +namespace Kokkos { +namespace Impl { + +int get_ctest_gpu(const char *local_rank_str); + +} // namespace Impl +} // namespace Kokkos + +#ifdef _WIN32 +int setenv(const char *name, const char *value, int overwrite) { + int errcode = 0; + if (!overwrite) { + size_t envsize = 0; + errcode = getenv_s(&envsize, NULL, 0, name); + if (errcode || envsize) return errcode; + } + return _putenv_s(name, value); +} + +int unsetenv(const char *name) { return _putenv_s(name, ""); } +#endif + +// Needed because https://github.com/google/googletest/issues/952 has not been +// resolved +#define EXPECT_THROW_WITH_MESSAGE(stmt, etype, whatstring) \ + EXPECT_THROW( \ + try { stmt; } catch (const etype &ex) { \ + EXPECT_EQ(whatstring, std::string(ex.what())); \ + throw; \ + }, \ + etype) + +class ctest_environment : public ::testing::Test { + protected: + void SetUp(); +}; + +void ctest_environment::SetUp() { + setenv("CTEST_KOKKOS_DEVICE_TYPE", "gpus", 1); + setenv("CTEST_RESOURCE_GROUP_COUNT", "10", 1); + unsetenv("CTEST_RESOURCE_GROUP_0"); + setenv("CTEST_RESOURCE_GROUP_1", "threads", 1); + setenv("CTEST_RESOURCE_GROUP_2", "threads,cores", 1); + + setenv("CTEST_RESOURCE_GROUP_3", "gpus", 1); + unsetenv("CTEST_RESOURCE_GROUP_3_GPUS"); + + setenv("CTEST_RESOURCE_GROUP_4", "gpus", 1); + setenv("CTEST_RESOURCE_GROUP_4_GPUS", "id:2", 1); + + setenv("CTEST_RESOURCE_GROUP_5", "gpus", 1); + setenv("CTEST_RESOURCE_GROUP_5_GPUS", "slots:1,id:2", 1); + + setenv("CTEST_RESOURCE_GROUP_6", "gpus", 1); + setenv("CTEST_RESOURCE_GROUP_6_GPUS", "id:2,slots:1", 1); + + setenv("CTEST_RESOURCE_GROUP_7", "threads,gpus", 1); + setenv("CTEST_RESOURCE_GROUP_7_GPUS", "id:3,slots:1", 1); + + setenv("CTEST_RESOURCE_GROUP_8", "gpus,threads", 1); + setenv("CTEST_RESOURCE_GROUP_8_GPUS", "id:1,slots:1", 1); + + setenv("CTEST_RESOURCE_GROUP_9", "cores,gpus,threads", 1); + setenv("CTEST_RESOURCE_GROUP_9_GPUS", "id:4,slots:1", 1); +} + +TEST_F(ctest_environment, no_device_type) { + unsetenv("CTEST_KOKKOS_DEVICE_TYPE"); + EXPECT_EQ(Kokkos::Impl::get_ctest_gpu("0"), 0); +} + +TEST_F(ctest_environment, no_process_count) { + unsetenv("CTEST_RESOURCE_GROUP_COUNT"); + EXPECT_EQ(Kokkos::Impl::get_ctest_gpu("0"), 0); +} + +TEST_F(ctest_environment, invalid_rank) { + EXPECT_THROW_WITH_MESSAGE( + Kokkos::Impl::get_ctest_gpu("10"), std::runtime_error, + "Error: local rank 10 is outside the bounds of resource groups provided " + "by" + " CTest. Raised by Kokkos::Impl::get_ctest_gpu().\nTraceback " + "functionality" + " not available\n"); +} + +TEST_F(ctest_environment, no_type_str) { + EXPECT_THROW_WITH_MESSAGE( + Kokkos::Impl::get_ctest_gpu("0"), std::runtime_error, + "Error: CTEST_RESOURCE_GROUP_0 is not specified. Raised by " + "Kokkos::Impl::get_ctest_gpu().\nTraceback functionality not " + "available\n"); +} + +TEST_F(ctest_environment, missing_type) { + EXPECT_THROW_WITH_MESSAGE( + Kokkos::Impl::get_ctest_gpu("1"), std::runtime_error, + "Error: device type 'gpus' not included in CTEST_RESOURCE_GROUP_1. " + "Raised " + "by Kokkos::Impl::get_ctest_gpu().\nTraceback functionality not available" + "\n"); + EXPECT_THROW_WITH_MESSAGE( + Kokkos::Impl::get_ctest_gpu("2"), std::runtime_error, + "Error: device type 'gpus' not included in CTEST_RESOURCE_GROUP_2. " + "Raised " + "by Kokkos::Impl::get_ctest_gpu().\nTraceback functionality not available" + "\n"); +} + +TEST_F(ctest_environment, no_id_str) { + EXPECT_THROW_WITH_MESSAGE( + Kokkos::Impl::get_ctest_gpu("3"), std::runtime_error, + "Error: CTEST_RESOURCE_GROUP_3_GPUS is not specified. Raised by " + "Kokkos::Impl::get_ctest_gpu().\nTraceback functionality not " + "available\n"); +} + +TEST_F(ctest_environment, invalid_id_str) { + EXPECT_THROW_WITH_MESSAGE( + Kokkos::Impl::get_ctest_gpu("4"), std::runtime_error, + "Error: invalid value of CTEST_RESOURCE_GROUP_4_GPUS: 'id:2'. Raised by " + "Kokkos::Impl::get_ctest_gpu().\nTraceback functionality not " + "available\n"); + EXPECT_THROW_WITH_MESSAGE( + Kokkos::Impl::get_ctest_gpu("5"), std::runtime_error, + "Error: invalid value of CTEST_RESOURCE_GROUP_5_GPUS: 'slots:1,id:2'. " + "Raised by Kokkos::Impl::get_ctest_gpu().\nTraceback functionality not " + "available\n"); +} + +TEST_F(ctest_environment, good) { + EXPECT_EQ(Kokkos::Impl::get_ctest_gpu("6"), 2); + EXPECT_EQ(Kokkos::Impl::get_ctest_gpu("7"), 3); + EXPECT_EQ(Kokkos::Impl::get_ctest_gpu("8"), 1); + EXPECT_EQ(Kokkos::Impl::get_ctest_gpu("9"), 4); +} diff --git a/core/unit_test/TestCXX11.hpp b/core/unit_test/TestCXX11.hpp index 876a3b5da80..405652b29e6 100644 --- a/core/unit_test/TestCXX11.hpp +++ b/core/unit_test/TestCXX11.hpp @@ -361,6 +361,7 @@ bool Test(int test) { return passed; #else + (void)test; return true; #endif } diff --git a/core/unit_test/TestCompilerMacros.hpp b/core/unit_test/TestCompilerMacros.hpp index e8daab9db60..c644daca0e2 100644 --- a/core/unit_test/TestCompilerMacros.hpp +++ b/core/unit_test/TestCompilerMacros.hpp @@ -44,10 +44,7 @@ #include -#if defined(KOKKOS_ENABLE_CUDA) && \ - (!defined(KOKKOS_ENABLE_CUDA_LAMBDA) || \ - ((defined(KOKKOS_ENABLE_SERIAL) || defined(KOKKOS_ENABLE_OPENMP)) && \ - ((CUDA_VERSION < 8000) && defined(__NVCC__)))) +#if defined(KOKKOS_ENABLE_CUDA) && !defined(KOKKOS_ENABLE_CUDA_LAMBDA) #if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) #error "Macro bug: KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA shouldn't be defined" #endif diff --git a/core/unit_test/TestComplex.hpp b/core/unit_test/TestComplex.hpp index e049dcd1756..2bb81052fea 100644 --- a/core/unit_test/TestComplex.hpp +++ b/core/unit_test/TestComplex.hpp @@ -84,8 +84,9 @@ struct TestComplexConstruction { ASSERT_FLOAT_EQ(h_results(8).real(), double(8)); ASSERT_FLOAT_EQ(h_results(8).imag(), 0.0); -#ifndef KOKKOS_ENABLE_ROCM // Copy construction conversion between - // Kokkos::complex and std::complex doesn't compile + // Copy construction conversion between + // Kokkos::complex and std::complex doesn't compile +#ifndef KOKKOS_ENABLE_HIP // FIXME_HIP Kokkos::complex a(1.5, 2.5), b(3.25, 5.25), r_kk; std::complex sa(a), sb(3.25, 5.25), r; r = a; @@ -104,7 +105,7 @@ struct TestComplexConstruction { } KOKKOS_INLINE_FUNCTION - void operator()(const int &i) const { + void operator()(const int & /*i*/) const { Kokkos::complex a(1.5, 2.5); d_results(0) = a; Kokkos::complex b(a); @@ -164,8 +165,10 @@ struct TestComplexBasicMath { ASSERT_FLOAT_EQ(h_results(2).real(), r.real()); ASSERT_FLOAT_EQ(h_results(2).imag(), r.imag()); r = a / b; +#ifndef KOKKOS_WORKAROUND_OPENMPTARGET_CLANG ASSERT_FLOAT_EQ(h_results(3).real(), r.real()); ASSERT_FLOAT_EQ(h_results(3).imag(), r.imag()); +#endif r = d + a; ASSERT_FLOAT_EQ(h_results(4).real(), r.real()); ASSERT_FLOAT_EQ(h_results(4).imag(), r.imag()); @@ -212,8 +215,10 @@ struct TestComplexBasicMath { ASSERT_FLOAT_EQ(h_results(18).real(), r.real()); ASSERT_FLOAT_EQ(h_results(18).imag(), r.imag()); r = c / a; +#ifndef KOKKOS_WORKAROUND_OPENMPTARGET_CLANG ASSERT_FLOAT_EQ(h_results(19).real(), r.real()); ASSERT_FLOAT_EQ(h_results(19).imag(), r.imag()); +#endif r = a; /* r = a+e; */ ASSERT_FLOAT_EQ(h_results(20).real(), r.real() + e); @@ -227,7 +232,7 @@ struct TestComplexBasicMath { } KOKKOS_INLINE_FUNCTION - void operator()(const int &i) const { + void operator()(const int & /*i*/) const { Kokkos::complex a(1.5, 2.5); Kokkos::complex b(3.25, 5.75); // Basic math complex / complex @@ -320,7 +325,7 @@ struct TestComplexSpecialFunctions { } KOKKOS_INLINE_FUNCTION - void operator()(const int &i) const { + void operator()(const int & /*i*/) const { Kokkos::complex a(1.5, 2.5); Kokkos::complex b(3.25, 5.75); double c = 9.3; @@ -356,4 +361,41 @@ TEST(TEST_CATEGORY, complex_special_funtions) { TEST(TEST_CATEGORY, complex_io) { testComplexIO(); } +TEST(TEST_CATEGORY, complex_trivially_copyable) { + using RealType = double; + + // Kokkos::complex is trivially copyable when RealType is + // trivially copyable + // Simply disable the check for IBM's XL compiler since we can't reliably + // check for a version that defines relevant functions. +#if !defined(__ibmxl__) + // clang claims compatibility with gcc 4.2.1 but all versions tested know + // about std::is_trivially_copyable. +#if !defined(__clang__) +#define KOKKOS_COMPILER_GNU_VERSION \ + __GNUC__ * 100 + __GNUC_MINOR__ * 10 + __GNUC_PATCHLEVEL__ +#endif +#if KOKKOS_COMPILER_GNU_VERSION == 0 || KOKKOS_COMPILER_GNU_VERSION > 500 + ASSERT_TRUE(std::is_trivially_copyable>::value || + !std::is_trivially_copyable::value); +#elif KOKKOS_COMPILER_GNU_VERSION > 480 + ASSERT_TRUE( + (std::has_trivial_copy_constructor>::value && + std::has_trivial_copy_assign>::value && + std::is_trivially_destructible>::value) || + !(std::has_trivial_copy_constructor::value && + std::has_trivial_copy_assign::value && + std::is_trivially_destructible::value)); +#else + ASSERT_TRUE( + (std::has_trivial_copy_constructor>::value && + std::has_trivial_copy_assign>::value && + std::has_trivial_destructor>::value) || + !(std::has_trivial_copy_constructor::value && + std::has_trivial_copy_assign::value && + std::has_trivial_destructor::value)); +#endif +#endif +} + } // namespace Test diff --git a/core/unit_test/TestConcepts.hpp b/core/unit_test/TestConcepts.hpp new file mode 100644 index 00000000000..dcd4d948a65 --- /dev/null +++ b/core/unit_test/TestConcepts.hpp @@ -0,0 +1,81 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include + +namespace TestConcept { + +using ExecutionSpace = TEST_EXECSPACE; +using MemorySpace = typename ExecutionSpace::memory_space; +using DeviceType = typename ExecutionSpace::device_type; + +static_assert(Kokkos::is_execution_space{}, ""); +static_assert(Kokkos::is_execution_space{}, ""); +static_assert(!Kokkos::is_execution_space{}, ""); +static_assert(!Kokkos::is_execution_space{}, ""); + +static_assert(Kokkos::is_memory_space{}, ""); +static_assert(Kokkos::is_memory_space{}, ""); +static_assert(!Kokkos::is_memory_space{}, ""); +static_assert(!Kokkos::is_memory_space{}, ""); + +static_assert(Kokkos::is_device{}, ""); +static_assert(Kokkos::is_device{}, ""); +static_assert(!Kokkos::is_device{}, ""); +static_assert(!Kokkos::is_device{}, ""); + +static_assert(!Kokkos::is_device{}, ""); +static_assert(!Kokkos::is_device{}, ""); + +static_assert(Kokkos::is_space{}, ""); +static_assert(Kokkos::is_space{}, ""); +static_assert(Kokkos::is_space{}, ""); +static_assert(Kokkos::is_space{}, ""); +static_assert(Kokkos::is_space{}, ""); +static_assert(Kokkos::is_space{}, ""); +static_assert(!Kokkos::is_space{}, ""); +static_assert(!Kokkos::is_space{}, ""); +static_assert(!Kokkos::is_space{}, ""); + +} // namespace TestConcept diff --git a/core/unit_test/TestCrs.hpp b/core/unit_test/TestCrs.hpp index 58071096019..296235aad01 100644 --- a/core/unit_test/TestCrs.hpp +++ b/core/unit_test/TestCrs.hpp @@ -53,7 +53,7 @@ namespace { template struct CountFillFunctor { KOKKOS_INLINE_FUNCTION - std::int32_t operator()(std::int32_t row, std::int32_t *fill) const { + std::int32_t operator()(std::int32_t row, float *fill) const { auto n = (row % 4) + 1; if (fill) { for (std::int32_t j = 0; j < n; ++j) { @@ -153,7 +153,7 @@ struct RunUpdateCrsTest { template void test_count_fill(std::int32_t nrows) { - Kokkos::Crs graph; + Kokkos::Crs graph; Kokkos::count_and_fill_crs(graph, nrows, CountFillFunctor()); ASSERT_EQ(graph.numRows(), nrows); auto row_map = Kokkos::create_mirror_view(graph.row_map); @@ -176,12 +176,12 @@ void test_count_fill(std::int32_t nrows) { template void test_constructor(std::int32_t nrows) { for (int nTest = 1; nTest < 5; nTest++) { - typedef Kokkos::Crs crs_int32; - crs_int32 graph; + typedef Kokkos::Crs crs_type; + crs_type graph; Kokkos::count_and_fill_crs(graph, nrows, CountFillFunctor()); ASSERT_EQ(graph.numRows(), nrows); - RunUpdateCrsTest crstest(graph); + RunUpdateCrsTest crstest(graph); crstest.run_test(nTest); auto row_map = Kokkos::create_mirror_view(graph.row_map); diff --git a/core/unit_test/TestDeepCopy.hpp b/core/unit_test/TestDeepCopy.hpp index a7e80cef366..56c259ff6a3 100644 --- a/core/unit_test/TestDeepCopy.hpp +++ b/core/unit_test/TestDeepCopy.hpp @@ -2,6 +2,7 @@ namespace Test { +#ifdef KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA namespace Impl { template struct TestDeepCopy { @@ -210,5 +211,149 @@ TEST(TEST_CATEGORY, deep_copy_alignment) { Kokkos::HostSpace>::run_test(100000); } } +#endif +// KOKKOS_IMPL_HIP_CLANG_WORKAROUND +#ifndef KOKKOS_ENABLE_HIP +namespace Impl { +template +struct TestDeepCopyScalarConversion { + struct TagFill {}; + struct TagCompare {}; + + using view_type_s1_1d = Kokkos::View; + using view_type_s2_1d = Kokkos::View; + using view_type_s1_2d = Kokkos::View; + using view_type_s2_2d = Kokkos::View; + + using base_layout1 = typename std::conditional< + std::is_same::value, Kokkos::LayoutLeft, + Layout1>::type; + using base_layout2 = typename std::conditional< + std::is_same::value, Kokkos::LayoutLeft, + Layout2>::type; + + using base_type_s1_1d = Kokkos::View; + using base_type_s2_1d = Kokkos::View; + using base_type_s1_2d = Kokkos::View; + using base_type_s2_2d = Kokkos::View; + + view_type_s1_1d view_s1_1d; + view_type_s2_1d view_s2_1d; + view_type_s1_2d view_s1_2d; + view_type_s2_2d view_s2_2d; + + Kokkos::View error_count; + + void create_views(int64_t N0, int64_t N1) { + base_type_s1_1d b_s1_1d("TestDeepCopyConversion::b_s1_1d", N0); + base_type_s2_1d b_s2_1d("TestDeepCopyConversion::b_s2_1d", N0); + base_type_s1_2d b_s1_2d("TestDeepCopyConversion::b_s1_2d", N0, N1); + base_type_s2_2d b_s2_2d("TestDeepCopyConversion::b_s2_2d", N0, N1); + + view_s1_1d = view_type_s1_1d(b_s1_1d, Kokkos::ALL); + view_s2_1d = view_type_s2_1d(b_s2_1d, Kokkos::ALL); + view_s1_2d = view_type_s1_2d(b_s1_2d, Kokkos::ALL, Kokkos::ALL); + view_s2_2d = view_type_s2_2d(b_s2_2d, Kokkos::ALL, Kokkos::ALL); + + error_count = Kokkos::View( + "TestDeepCopyConversion::error_count"); + } + + KOKKOS_FUNCTION + void operator()(TagFill, const int64_t i) const { + view_s2_1d(i) = static_cast(i + 1); + for (int64_t j = 0; j < static_cast(view_s2_2d.extent(1)); j++) + view_s2_2d(i, j) = static_cast((i + 1) * 1000 + j + 1); + } + + KOKKOS_FUNCTION + void operator()(TagCompare, const int64_t i) const { + int64_t errors = 0; + if (view_s1_1d(i) != static_cast(static_cast(i + 1))) + errors++; + for (int64_t j = 0; j < static_cast(view_s1_2d.extent(1)); j++) { + if (view_s1_2d(i, j) != + static_cast(static_cast((i + 1) * 1000 + j + 1))) + errors++; + } + if (errors > 0) Kokkos::atomic_add(&error_count(), errors); + } + + void run_tests(int64_t N0, int64_t N1) { + create_views(N0, N1); + + Kokkos::parallel_for("TestDeepCopyConversion::Fill", + Kokkos::RangePolicy>(0, N0), + *this); + + Kokkos::deep_copy(view_s1_1d, view_s2_1d); + Kokkos::deep_copy(view_s1_2d, view_s2_2d); + + Kokkos::parallel_for("TestDeepCopyConversion::Compare", + Kokkos::RangePolicy>(0, N0), + *this); + + int64_t errors = 0; + Kokkos::deep_copy(errors, error_count); + ASSERT_TRUE(errors == 0); + + Kokkos::deep_copy(view_s1_1d, static_cast(0)); + Kokkos::deep_copy(view_s1_2d, static_cast(0)); + + Kokkos::parallel_for("TestDeepCopyConversion::Compare", + Kokkos::RangePolicy>(0, N0), + *this); + Kokkos::deep_copy(errors, error_count); + ASSERT_TRUE(errors > 0); + + Kokkos::deep_copy(error_count, 0); + Kokkos::deep_copy(TEST_EXECSPACE(), view_s1_1d, view_s2_1d); + Kokkos::deep_copy(TEST_EXECSPACE(), view_s1_2d, view_s2_2d); + + Kokkos::parallel_for("TestDeepCopyConversion::Compare", + Kokkos::RangePolicy>(0, N0), + *this); + + Kokkos::deep_copy(errors, error_count); + ASSERT_TRUE(errors == 0); + } +}; +} // namespace Impl + +TEST(TEST_CATEGORY, deep_copy_conversion) { + int64_t N0 = 19381; + int64_t N1 = 17; + + using right = Kokkos::LayoutRight; + using left = Kokkos::LayoutLeft; + using stride = Kokkos::LayoutStride; + + Impl::TestDeepCopyScalarConversion().run_tests( + N0, N1); + Impl::TestDeepCopyScalarConversion().run_tests( + N0, N1); + Impl::TestDeepCopyScalarConversion().run_tests( + N0, N1); + Impl::TestDeepCopyScalarConversion().run_tests( + N0, N1); + Impl::TestDeepCopyScalarConversion().run_tests( + N0, N1); + + Impl::TestDeepCopyScalarConversion().run_tests( + N0, N1); + Impl::TestDeepCopyScalarConversion().run_tests( + N0, N1); + Impl::TestDeepCopyScalarConversion().run_tests( + N0, N1); + Impl::TestDeepCopyScalarConversion().run_tests( + N0, N1); + Impl::TestDeepCopyScalarConversion().run_tests( + N0, N1); +} +#endif } // namespace Test diff --git a/core/unit_test/TestLocalDeepCopy.hpp b/core/unit_test/TestLocalDeepCopy.hpp index 9bca373a407..c776481c706 100644 --- a/core/unit_test/TestLocalDeepCopy.hpp +++ b/core/unit_test/TestLocalDeepCopy.hpp @@ -934,7 +934,6 @@ void impl_test_local_deepcopy_rangepolicy_rank_7(const int N) { //------------------------------------------------------------------------------------------------------------- #if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) -#if !defined(KOKKOS_ENABLE_CUDA) || (8000 <= CUDA_VERSION) TEST(TEST_CATEGORY, local_deepcopy_teampolicy_layoutleft) { typedef TEST_EXECSPACE ExecSpace; typedef Kokkos::View ViewType; @@ -1043,5 +1042,73 @@ TEST(TEST_CATEGORY, local_deepcopy_rangepolicy_layoutright) { } } #endif -#endif + +namespace Impl { +template +using ShMemView = + Kokkos::View; + +struct DeepCopyScratchFunctor { + DeepCopyScratchFunctor( + Kokkos::View check_view_1, + Kokkos::View check_view_2) + : check_view_1_(check_view_1), + check_view_2_(check_view_2), + N_(check_view_1.extent(0)) {} + + KOKKOS_INLINE_FUNCTION void operator()( + Kokkos::TeamPolicy>::member_type team) + const { + using ShmemType = TEST_EXECSPACE::scratch_memory_space; + auto shview = + Impl::ShMemView(team.team_scratch(1), N_, 1); + + Kokkos::parallel_for( + Kokkos::TeamThreadRange(team, N_), KOKKOS_LAMBDA(const size_t& index) { + auto thread_shview = Kokkos::subview(shview, index, Kokkos::ALL()); + Kokkos::Experimental::local_deep_copy(thread_shview, index); + }); + Kokkos::Experimental::local_deep_copy( + team, check_view_1_, Kokkos::subview(shview, Kokkos::ALL(), 0)); + + Kokkos::Experimental::local_deep_copy(team, shview, 6.); + Kokkos::Experimental::local_deep_copy( + team, check_view_2_, Kokkos::subview(shview, Kokkos::ALL(), 0)); + } + + Kokkos::View check_view_1_; + Kokkos::View check_view_2_; + int const N_; +}; +} // namespace Impl + +TEST(TEST_CATEGORY, deep_copy_scratch) { + using TestDeviceTeamPolicy = Kokkos::TeamPolicy; + + const int N = 8; + const int bytes_per_team = + Impl::ShMemView::shmem_size(N, 1); + + TestDeviceTeamPolicy policy(1, Kokkos::AUTO); + auto team_exec = policy.set_scratch_size(1, Kokkos::PerTeam(bytes_per_team)); + + Kokkos::View check_view_1("check_1", + N); + Kokkos::View check_view_2("check_2", + N); + + Kokkos::parallel_for( + team_exec, Impl::DeepCopyScratchFunctor{check_view_1, check_view_2}); + auto host_copy_1 = + Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), check_view_1); + auto host_copy_2 = + Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), check_view_2); + + for (unsigned int i = 0; i < N; ++i) { + ASSERT_EQ(host_copy_1(i), i); + ASSERT_EQ(host_copy_2(i), 6.0); + } +} } // namespace Test diff --git a/core/unit_test/TestMDRange.hpp b/core/unit_test/TestMDRange.hpp index 1e1bd869ca0..c4288f21a11 100644 --- a/core/unit_test/TestMDRange.hpp +++ b/core/unit_test/TestMDRange.hpp @@ -256,7 +256,6 @@ struct TestMDRange_2D { static void test_reduce2(const int N0, const int N1) { #if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) -#if !defined(KOKKOS_ENABLE_CUDA) || (8000 <= CUDA_VERSION) { typedef typename Kokkos::MDRangePolicy, Kokkos::IndexType > @@ -269,13 +268,12 @@ struct TestMDRange_2D { double sum = 0.0; parallel_reduce( range, - KOKKOS_LAMBDA(const int i, const int j, double &lsum) { + KOKKOS_LAMBDA(const int /*i*/, const int /*j*/, double &lsum) { lsum += 1.0; }, sum); ASSERT_EQ(sum, N0 * N1); } -#endif #endif { @@ -361,7 +359,6 @@ struct TestMDRange_2D { } // Test Min reducer with lambda #if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) -#if !defined(KOKKOS_ENABLE_CUDA) || (8000 <= CUDA_VERSION) { typedef typename Kokkos::MDRangePolicy, Kokkos::IndexType > @@ -387,7 +384,6 @@ struct TestMDRange_2D { ASSERT_EQ(min, 4.0); } -#endif #endif // Tagged operator test { @@ -532,7 +528,6 @@ struct TestMDRange_2D { static void test_for2(const int N0, const int N1) { #if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) -#if !defined(KOKKOS_ENABLE_CUDA) || (8000 <= CUDA_VERSION) { typedef typename Kokkos::MDRangePolicy, Kokkos::IndexType > @@ -571,7 +566,6 @@ struct TestMDRange_2D { ASSERT_EQ(counter, 0); } -#endif #endif { @@ -909,7 +903,6 @@ struct TestMDRange_3D { static void test_reduce3(const int N0, const int N1, const int N2) { #if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) -#if !defined(KOKKOS_ENABLE_CUDA) || (8000 <= CUDA_VERSION) { typedef typename Kokkos::MDRangePolicy, Kokkos::IndexType > @@ -922,13 +915,11 @@ struct TestMDRange_3D { double sum = 0.0; parallel_reduce( range, - KOKKOS_LAMBDA(const int i, const int j, const int k, double &lsum) { - lsum += 1.0; - }, + KOKKOS_LAMBDA(const int /*i*/, const int /*j*/, const int /*k*/, + double &lsum) { lsum += 1.0; }, sum); ASSERT_EQ(sum, N0 * N1 * N2); } -#endif #endif { @@ -1013,7 +1004,6 @@ struct TestMDRange_3D { } // Test Min reducer with lambda #if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) -#if !defined(KOKKOS_ENABLE_CUDA) || (8000 <= CUDA_VERSION) { typedef typename Kokkos::MDRangePolicy, Kokkos::IndexType > @@ -1046,7 +1036,6 @@ struct TestMDRange_3D { ASSERT_EQ(min, min_identity); } } -#endif #endif // Tagged operator test @@ -1193,7 +1182,6 @@ struct TestMDRange_3D { static void test_for3(const int N0, const int N1, const int N2) { #if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) -#if !defined(KOKKOS_ENABLE_CUDA) || (8000 <= CUDA_VERSION) { typedef typename Kokkos::MDRangePolicy, Kokkos::IndexType > @@ -1236,7 +1224,6 @@ struct TestMDRange_3D { ASSERT_EQ(counter, 0); } -#endif #endif { @@ -1549,7 +1536,6 @@ struct TestMDRange_4D { static void test_reduce4(const int N0, const int N1, const int N2, const int N3) { #if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) -#if !defined(KOKKOS_ENABLE_CUDA) || (8000 <= CUDA_VERSION) { typedef typename Kokkos::MDRangePolicy, Kokkos::IndexType > @@ -1562,12 +1548,11 @@ struct TestMDRange_4D { double sum = 0.0; parallel_reduce( range, - KOKKOS_LAMBDA(const int i, const int j, const int k, const int l, - double &lsum) { lsum += 1.0; }, + KOKKOS_LAMBDA(const int /*i*/, const int /*j*/, const int /*k*/, + const int /*l*/, double &lsum) { lsum += 1.0; }, sum); ASSERT_EQ(sum, N0 * N1 * N2 * N3); } -#endif #endif { @@ -1656,7 +1641,6 @@ struct TestMDRange_4D { // Test Min reducer with lambda #if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) -#if !defined(KOKKOS_ENABLE_CUDA) || (8000 <= CUDA_VERSION) { typedef typename Kokkos::MDRangePolicy, Kokkos::IndexType > @@ -1684,7 +1668,6 @@ struct TestMDRange_4D { ASSERT_EQ(min, 16.0); } -#endif #endif // Tagged operator test @@ -1834,7 +1817,6 @@ struct TestMDRange_4D { static void test_for4(const int N0, const int N1, const int N2, const int N3) { #if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) -#if !defined(KOKKOS_ENABLE_CUDA) || (8000 <= CUDA_VERSION) { typedef typename Kokkos::MDRangePolicy, Kokkos::IndexType > @@ -1878,7 +1860,6 @@ struct TestMDRange_4D { ASSERT_EQ(counter, 0); } -#endif #endif { @@ -2206,7 +2187,6 @@ struct TestMDRange_5D { static void test_reduce5(const int N0, const int N1, const int N2, const int N3, const int N4) { #if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) -#if !defined(KOKKOS_ENABLE_CUDA) || (8000 <= CUDA_VERSION) { typedef typename Kokkos::MDRangePolicy, Kokkos::IndexType > @@ -2220,12 +2200,12 @@ struct TestMDRange_5D { double sum = 0.0; parallel_reduce( range, - KOKKOS_LAMBDA(const int i, const int j, const int k, const int l, - const int m, double &lsum) { lsum += 1.0; }, + KOKKOS_LAMBDA(const int /*i*/, const int /*j*/, const int /*k*/, + const int /*l*/, const int /*m*/, + double &lsum) { lsum += 1.0; }, sum); ASSERT_EQ(sum, N0 * N1 * N2 * N3 * N4); } -#endif #endif { @@ -2320,7 +2300,6 @@ struct TestMDRange_5D { // Test Min reducer with lambda #if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) -#if !defined(KOKKOS_ENABLE_CUDA) || (8000 <= CUDA_VERSION) { typedef typename Kokkos::MDRangePolicy, Kokkos::IndexType > @@ -2352,7 +2331,6 @@ struct TestMDRange_5D { ASSERT_EQ(min, 32.0); } -#endif #endif // Tagged operator test @@ -2404,7 +2382,6 @@ struct TestMDRange_5D { static void test_for5(const int N0, const int N1, const int N2, const int N3, const int N4) { #if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) -#if !defined(KOKKOS_ENABLE_CUDA) || (8000 <= CUDA_VERSION) { typedef typename Kokkos::MDRangePolicy, Kokkos::IndexType > @@ -2452,7 +2429,6 @@ struct TestMDRange_5D { ASSERT_EQ(counter, 0); } -#endif #endif { @@ -2798,7 +2774,6 @@ struct TestMDRange_6D { static void test_reduce6(const int N0, const int N1, const int N2, const int N3, const int N4, const int N5) { #if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) -#if !defined(KOKKOS_ENABLE_CUDA) || (8000 <= CUDA_VERSION) { typedef typename Kokkos::MDRangePolicy, Kokkos::IndexType > @@ -2812,13 +2787,12 @@ struct TestMDRange_6D { double sum = 0.0; parallel_reduce( range, - KOKKOS_LAMBDA(const int i, const int j, const int k, const int l, - const int m, const int n, + KOKKOS_LAMBDA(const int /*i*/, const int /*j*/, const int /*k*/, + const int /*l*/, const int /*m*/, const int /*n*/, double &lsum) { lsum += 1.0; }, sum); ASSERT_EQ(sum, N0 * N1 * N2 * N3 * N4 * N5); } -#endif #endif { @@ -2914,7 +2888,6 @@ struct TestMDRange_6D { // Test Min reducer with lambda #if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) -#if !defined(KOKKOS_ENABLE_CUDA) || (8000 <= CUDA_VERSION) { typedef typename Kokkos::MDRangePolicy, Kokkos::IndexType > @@ -2948,7 +2921,6 @@ struct TestMDRange_6D { ASSERT_EQ(min, 64.0); } -#endif #endif // Tagged operator test @@ -3001,7 +2973,6 @@ struct TestMDRange_6D { static void test_for6(const int N0, const int N1, const int N2, const int N3, const int N4, const int N5) { #if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) -#if !defined(KOKKOS_ENABLE_CUDA) || (8000 <= CUDA_VERSION) { typedef typename Kokkos::MDRangePolicy, Kokkos::IndexType > @@ -3051,7 +3022,6 @@ struct TestMDRange_6D { ASSERT_EQ(counter, 0); } -#endif #endif { diff --git a/core/unit_test/TestMemoryPool.hpp b/core/unit_test/TestMemoryPool.hpp index bc78a2908ed..cc18a903051 100644 --- a/core/unit_test/TestMemoryPool.hpp +++ b/core/unit_test/TestMemoryPool.hpp @@ -162,10 +162,10 @@ void test_host_memory_pool_stats() { // Aborts because exceeds max block size: // void * p2048 = pool.allocate(2048); - ASSERT_NE(p0064, (void*)0); - ASSERT_NE(p0128, (void*)0); - ASSERT_NE(p0256, (void*)0); - ASSERT_NE(p1024, (void*)0); + ASSERT_NE(p0064, nullptr); + ASSERT_NE(p0128, nullptr); + ASSERT_NE(p0256, nullptr); + ASSERT_NE(p1024, nullptr); pool.deallocate(p0064, 64); pool.deallocate(p0128, 128); @@ -475,17 +475,15 @@ void test_memory_pool_corners(const bool print_statistics, template struct TestMemoryPoolHuge { - TestMemoryPoolHuge() {} - enum : size_t { num_superblock = 0 }; using value_type = long; KOKKOS_INLINE_FUNCTION - void operator()(int i, long& err) const noexcept {} + void operator()(int /*i*/, long& /*err*/) const noexcept {} KOKKOS_INLINE_FUNCTION - void operator()(int i) const noexcept {} + void operator()(int /*i*/) const noexcept {} }; template diff --git a/core/unit_test/TestRange.hpp b/core/unit_test/TestRange.hpp index 1fd6a508f78..9bd13ad2395 100644 --- a/core/unit_test/TestRange.hpp +++ b/core/unit_test/TestRange.hpp @@ -54,9 +54,10 @@ template struct TestRange { typedef int value_type; ///< typedef required for the parallel_reduce - typedef Kokkos::View view_type; + typedef Kokkos::View view_type; view_type m_flags; + view_type result_view; struct VerifyInitTag {}; struct ResetTag {}; @@ -65,9 +66,19 @@ struct TestRange { struct VerifyOffsetTag {}; int N; +#ifndef KOKKOS_WORKAROUND_OPENMPTARGET_GCC static const int offset = 13; +#else + int offset; +#endif TestRange(const size_t N_) - : m_flags(Kokkos::ViewAllocateWithoutInitializing("flags"), N_), N(N_) {} + : m_flags(Kokkos::ViewAllocateWithoutInitializing("flags"), N_), + result_view(Kokkos::ViewAllocateWithoutInitializing("results"), N_), + N(N_) { +#ifdef KOKKOS_WORKAROUND_OPENMPTARGET_GCC + offset = 13; +#endif + } void test_for() { typename view_type::HostMirror host_flags = @@ -185,7 +196,7 @@ struct TestRange { //---------------------------------------- void test_reduce() { - int total = 0; + value_type total = 0; Kokkos::parallel_for(Kokkos::RangePolicy(0, N), *this); @@ -220,15 +231,31 @@ struct TestRange { Kokkos::parallel_for(Kokkos::RangePolicy(0, N), *this); + auto check_scan_results = [&]() { + auto const host_mirror = + Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), result_view); + for (int i = 0; i < N; ++i) { + if (((i + 1) * i) / 2 != host_mirror(i)) { + std::cout << "Error at " << i << std::endl; + EXPECT_EQ(size_t(((i + 1) * i) / 2), size_t(host_mirror(i))); + } + } + }; + Kokkos::parallel_scan( "TestKernelScan", Kokkos::RangePolicy(0, N), *this); - int total = 0; + check_scan_results(); + + value_type total = 0; Kokkos::parallel_scan( "TestKernelScanWithTotal", Kokkos::RangePolicy(0, N), *this, total); + + check_scan_results(); + ASSERT_EQ(size_t((N - 1) * (N) / 2), size_t(total)); // sum( 0 .. N-1 ) } @@ -239,16 +266,16 @@ struct TestRange { if (final) { if (update != (i * (i + 1)) / 2) { - printf("TestRange::test_scan error %d : %d != %d\n", i, - (i * (i + 1)) / 2, m_flags(i)); + printf("TestRange::test_scan error (%d,%d) : %d != %d\n", i, m_flags(i), + (i * (i + 1)) / 2, update); } + result_view(i) = update; } } void test_dynamic_policy() { - auto const N_no_implicit_capture = N; #if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) -#if !defined(KOKKOS_ENABLE_CUDA) || (8000 <= CUDA_VERSION) + auto const N_no_implicit_capture = N; typedef Kokkos::RangePolicy > policy_t; @@ -273,7 +300,7 @@ struct TestRange { int error = 0; Kokkos::parallel_reduce( Kokkos::RangePolicy(0, N), - KOKKOS_LAMBDA(const int &i, int &lsum) { + KOKKOS_LAMBDA(const int &i, value_type &lsum) { lsum += (a(i) != (i < N_no_implicit_capture / 2 ? 1 : 10000)); }, error); @@ -300,10 +327,10 @@ struct TestRange { count("Count", ExecSpace::concurrency()); Kokkos::View a("A", N); - int sum = 0; + value_type sum = 0; Kokkos::parallel_reduce( policy_t(0, N), - KOKKOS_LAMBDA(const int &i, int &lsum) { + KOKKOS_LAMBDA(const int &i, value_type &lsum) { for (int k = 0; k < (i < N_no_implicit_capture / 2 ? 1 : 10000); k++) { a(i)++; @@ -321,7 +348,7 @@ struct TestRange { int error = 0; Kokkos::parallel_reduce( Kokkos::RangePolicy(0, N), - KOKKOS_LAMBDA(const int &i, int &lsum) { + KOKKOS_LAMBDA(const int &i, value_type &lsum) { lsum += (a(i) != (i < N_no_implicit_capture / 2 ? 1 : 10000)); }, error); @@ -342,7 +369,6 @@ struct TestRange { //} } } -#endif #endif } }; @@ -417,7 +443,7 @@ TEST(TEST_CATEGORY, range_scan) { TestRange > f(0); f.test_scan(); } -#if !defined(KOKKOS_ENABLE_CUDA) && !defined(KOKKOS_ENABLE_ROCM) +#if !defined(KOKKOS_ENABLE_CUDA) && !defined(KOKKOS_ENABLE_HIP) { TestRange > f(0); f.test_dynamic_policy(); @@ -432,7 +458,7 @@ TEST(TEST_CATEGORY, range_scan) { TestRange > f(3); f.test_scan(); } -#if !defined(KOKKOS_ENABLE_CUDA) && !defined(KOKKOS_ENABLE_ROCM) +#if !defined(KOKKOS_ENABLE_CUDA) && !defined(KOKKOS_ENABLE_HIP) { TestRange > f(3); f.test_dynamic_policy(); @@ -447,7 +473,7 @@ TEST(TEST_CATEGORY, range_scan) { TestRange > f(1001); f.test_scan(); } -#if !defined(KOKKOS_ENABLE_CUDA) && !defined(KOKKOS_ENABLE_ROCM) +#if !defined(KOKKOS_ENABLE_CUDA) && !defined(KOKKOS_ENABLE_HIP) { TestRange > f(1001); f.test_dynamic_policy(); diff --git a/core/unit_test/TestRangeRequire.hpp b/core/unit_test/TestRangeRequire.hpp new file mode 100644 index 00000000000..a75af0b95b8 --- /dev/null +++ b/core/unit_test/TestRangeRequire.hpp @@ -0,0 +1,518 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include + +#include + +// This file is largely duplicating TestRange.hpp but it applies +// Kokkos::Experimental require at every place where a parallel +// operation is executed. + +namespace Test { + +namespace { + +template +struct TestRangeRequire { + typedef int value_type; ///< typedef required for the parallel_reduce + + typedef Kokkos::View view_type; + + view_type m_flags; + + struct VerifyInitTag {}; + struct ResetTag {}; + struct VerifyResetTag {}; + struct OffsetTag {}; + struct VerifyOffsetTag {}; + + int N; + static const int offset = 13; + TestRangeRequire(const size_t N_) + : m_flags(Kokkos::ViewAllocateWithoutInitializing("flags"), N_), N(N_) {} + + void test_for() { + typename view_type::HostMirror host_flags = + Kokkos::create_mirror_view(m_flags); + + Kokkos::parallel_for( + Kokkos::Experimental::require( + Kokkos::RangePolicy(0, N), Property()), + *this); + +#if defined(KOKKOS_ENABLE_PROFILING) + { + typedef TestRangeRequire ThisType; + std::string label("parallel_for"); + Kokkos::Impl::ParallelConstructName pcn(label); + ASSERT_EQ(pcn.get(), label); + std::string empty_label(""); + Kokkos::Impl::ParallelConstructName empty_pcn( + empty_label); + ASSERT_EQ(empty_pcn.get(), typeid(ThisType).name()); + } +#endif + + Kokkos::parallel_for( + Kokkos::Experimental::require( + Kokkos::RangePolicy(0, N), + Property()), + *this); + +#if defined(KOKKOS_ENABLE_PROFILING) + { + typedef TestRangeRequire ThisType; + std::string label("parallel_for"); + Kokkos::Impl::ParallelConstructName pcn(label); + ASSERT_EQ(pcn.get(), label); + std::string empty_label(""); + Kokkos::Impl::ParallelConstructName empty_pcn( + empty_label); + ASSERT_EQ(empty_pcn.get(), std::string(typeid(ThisType).name()) + "/" + + typeid(VerifyInitTag).name()); + } +#endif + + Kokkos::deep_copy(host_flags, m_flags); + + int error_count = 0; + for (int i = 0; i < N; ++i) { + if (int(i) != host_flags(i)) ++error_count; + } + ASSERT_EQ(error_count, int(0)); + + Kokkos::parallel_for( + Kokkos::Experimental::require( + Kokkos::RangePolicy(0, N), + Property()), + *this); + Kokkos::parallel_for( + std::string("TestKernelFor"), + Kokkos::Experimental::require( + Kokkos::RangePolicy(0, N), + Property()), + *this); + + Kokkos::deep_copy(host_flags, m_flags); + + error_count = 0; + for (int i = 0; i < N; ++i) { + if (int(2 * i) != host_flags(i)) ++error_count; + } + ASSERT_EQ(error_count, int(0)); + + Kokkos::parallel_for( + Kokkos::Experimental::require( + Kokkos::RangePolicy(offset, + N + offset), + Property()), + *this); + Kokkos::parallel_for( + std::string("TestKernelFor"), + Kokkos::Experimental::require( + Kokkos::RangePolicy, + VerifyOffsetTag>(0, N), + Property()), + *this); + + Kokkos::deep_copy(host_flags, m_flags); + + error_count = 0; + for (int i = 0; i < N; ++i) { + if (i + offset != host_flags(i)) ++error_count; + } + ASSERT_EQ(error_count, int(0)); + } + + KOKKOS_INLINE_FUNCTION + void operator()(const int i) const { m_flags(i) = i; } + + KOKKOS_INLINE_FUNCTION + void operator()(const VerifyInitTag &, const int i) const { + if (i != m_flags(i)) { + printf("TestRangeRequire::test_for error at %d != %d\n", i, m_flags(i)); + } + } + + KOKKOS_INLINE_FUNCTION + void operator()(const ResetTag &, const int i) const { + m_flags(i) = 2 * m_flags(i); + } + + KOKKOS_INLINE_FUNCTION + void operator()(const VerifyResetTag &, const int i) const { + if (2 * i != m_flags(i)) { + printf("TestRangeRequire::test_for error at %d != %d\n", i, m_flags(i)); + } + } + + KOKKOS_INLINE_FUNCTION + void operator()(const OffsetTag &, const int i) const { + m_flags(i - offset) = i; + } + + KOKKOS_INLINE_FUNCTION + void operator()(const VerifyOffsetTag &, const int i) const { + if (i + offset != m_flags(i)) { + printf("TestRangeRequire::test_for error at %d != %d\n", i + offset, + m_flags(i)); + } + } + + //---------------------------------------- + + void test_reduce() { + int total = 0; + + Kokkos::parallel_for( + Kokkos::Experimental::require( + Kokkos::RangePolicy(0, N), Property()), + *this); + + Kokkos::parallel_reduce( + "TestKernelReduce", + Kokkos::Experimental::require( + Kokkos::RangePolicy(0, N), Property()), + *this, total); + // sum( 0 .. N-1 ) + ASSERT_EQ(size_t((N - 1) * (N) / 2), size_t(total)); + + Kokkos::parallel_reduce( + Kokkos::Experimental::require( + Kokkos::RangePolicy(offset, + N + offset), + Property()), + *this, total); + // sum( 1 .. N ) + ASSERT_EQ(size_t((N) * (N + 1) / 2), size_t(total)); + } + + KOKKOS_INLINE_FUNCTION + void operator()(const int i, value_type &update) const { + update += m_flags(i); + } + + KOKKOS_INLINE_FUNCTION + void operator()(const OffsetTag &, const int i, value_type &update) const { + update += 1 + m_flags(i - offset); + } + + //---------------------------------------- + + void test_scan() { + Kokkos::parallel_for(Kokkos::RangePolicy(0, N), + *this); + + Kokkos::parallel_scan( + "TestKernelScan", + Kokkos::RangePolicy(0, N), *this); + + int total = 0; + Kokkos::parallel_scan( + "TestKernelScanWithTotal", + Kokkos::RangePolicy(0, N), *this, + total); + ASSERT_EQ(size_t((N - 1) * (N) / 2), size_t(total)); // sum( 0 .. N-1 ) + } + + KOKKOS_INLINE_FUNCTION + void operator()(const OffsetTag &, const int i, value_type &update, + bool final) const { + update += m_flags(i); + + if (final) { + if (update != (i * (i + 1)) / 2) { + printf("TestRangeRequire::test_scan error %d : %d != %d\n", i, + (i * (i + 1)) / 2, m_flags(i)); + } + } + } + + void test_dynamic_policy() { +#if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) + auto const N_no_implicit_capture = N; + typedef Kokkos::RangePolicy > + policy_t; + + { + Kokkos::View > + count("Count", ExecSpace::concurrency()); + Kokkos::View a("A", N); + + Kokkos::parallel_for( + policy_t(0, N), KOKKOS_LAMBDA(const int &i) { + for (int k = 0; k < (i < N_no_implicit_capture / 2 ? 1 : 10000); + k++) { + a(i)++; + } +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE + count(ExecSpace::hardware_thread_id())++; +#else + count( ExecSpace::impl_hardware_thread_id() )++; +#endif + }); + + int error = 0; + Kokkos::parallel_reduce( + Kokkos::RangePolicy(0, N), + KOKKOS_LAMBDA(const int &i, int &lsum) { + lsum += (a(i) != (i < N_no_implicit_capture / 2 ? 1 : 10000)); + }, + error); + ASSERT_EQ(error, 0); + + if ((ExecSpace::concurrency() > (int)1) && + (N > static_cast(4 * ExecSpace::concurrency()))) { + size_t min = N; + size_t max = 0; + for (int t = 0; t < ExecSpace::concurrency(); t++) { + if (count(t) < min) min = count(t); + if (count(t) > max) max = count(t); + } + ASSERT_TRUE(min < max); + + // if ( ExecSpace::concurrency() > 2 ) { + // ASSERT_TRUE( 2 * min < max ); + //} + } + } + + { + Kokkos::View > + count("Count", ExecSpace::concurrency()); + Kokkos::View a("A", N); + + int sum = 0; + Kokkos::parallel_reduce( + policy_t(0, N), + KOKKOS_LAMBDA(const int &i, int &lsum) { + for (int k = 0; k < (i < N_no_implicit_capture / 2 ? 1 : 10000); + k++) { + a(i)++; + } +#ifdef KOKKOS_ENABLE_DEPRECATED_CODE + count(ExecSpace::hardware_thread_id())++; +#else + count(ExecSpace::impl_hardware_thread_id())++; +#endif + lsum++; + }, + sum); + ASSERT_EQ(sum, N); + + int error = 0; + Kokkos::parallel_reduce( + Kokkos::RangePolicy(0, N), + KOKKOS_LAMBDA(const int &i, int &lsum) { + lsum += (a(i) != (i < N_no_implicit_capture / 2 ? 1 : 10000)); + }, + error); + ASSERT_EQ(error, 0); + + if ((ExecSpace::concurrency() > (int)1) && + (N > static_cast(4 * ExecSpace::concurrency()))) { + size_t min = N; + size_t max = 0; + for (int t = 0; t < ExecSpace::concurrency(); t++) { + if (count(t) < min) min = count(t); + if (count(t) > max) max = count(t); + } + ASSERT_TRUE(min < max); + + // if ( ExecSpace::concurrency() > 2 ) { + // ASSERT_TRUE( 2 * min < max ); + //} + } + } +#endif + } +}; + +} // namespace + +TEST(TEST_CATEGORY, range_for_require) { + using Property = Kokkos::Experimental::WorkItemProperty::HintLightWeight_t; + { + TestRangeRequire, Property> + f(0); + f.test_for(); + } + { + TestRangeRequire, + Property> + f(0); + f.test_for(); + } + + { + TestRangeRequire, Property> + f(2); + f.test_for(); + } + { + TestRangeRequire, + Property> + f(3); + f.test_for(); + } + + { + TestRangeRequire, Property> + f(1000); + f.test_for(); + } + { + TestRangeRequire, + Property> + f(1001); + f.test_for(); + } +} + +TEST(TEST_CATEGORY, range_reduce_require) { + using Property = Kokkos::Experimental::WorkItemProperty::HintLightWeight_t; + { + TestRangeRequire, Property> + f(0); + f.test_reduce(); + } + { + TestRangeRequire, + Property> + f(0); + f.test_reduce(); + } + + { + TestRangeRequire, Property> + f(2); + f.test_reduce(); + } + { + TestRangeRequire, + Property> + f(3); + f.test_reduce(); + } + + { + TestRangeRequire, Property> + f(1000); + f.test_reduce(); + } + { + TestRangeRequire, + Property> + f(1001); + f.test_reduce(); + } +} + +#ifndef KOKKOS_ENABLE_OPENMPTARGET +TEST(TEST_CATEGORY, range_scan_require) { + using Property = Kokkos::Experimental::WorkItemProperty::HintLightWeight_t; + { + TestRangeRequire, Property> + f(0); + f.test_scan(); + } + { + TestRangeRequire, + Property> + f(0); + f.test_scan(); + } +#if !defined(KOKKOS_ENABLE_CUDA) && !defined(KOKKOS_ENABLE_HIP) + { + TestRangeRequire, + Property> + f(0); + f.test_dynamic_policy(); + } +#endif + + { + TestRangeRequire, Property> + f(2); + f.test_scan(); + } + { + TestRangeRequire, + Property> + f(3); + f.test_scan(); + } +#if !defined(KOKKOS_ENABLE_CUDA) && !defined(KOKKOS_ENABLE_HIP) + { + TestRangeRequire, + Property> + f(3); + f.test_dynamic_policy(); + } +#endif + + { + TestRangeRequire, Property> + f(1000); + f.test_scan(); + } + { + TestRangeRequire, + Property> + f(1001); + f.test_scan(); + } +#if !defined(KOKKOS_ENABLE_CUDA) && !defined(KOKKOS_ENABLE_HIP) + { + TestRangeRequire, + Property> + f(1001); + f.test_dynamic_policy(); + } +#endif +} +#endif +} // namespace Test diff --git a/core/unit_test/TestReduce.hpp b/core/unit_test/TestReduce.hpp index 6c518429a05..d63d5e8d4a7 100644 --- a/core/unit_test/TestReduce.hpp +++ b/core/unit_test/TestReduce.hpp @@ -95,12 +95,12 @@ class ReduceFunctor { }; template -class ReduceFunctorFinal : public ReduceFunctor { +class ReduceFunctorFinal : public ReduceFunctor { public: - typedef typename ReduceFunctor::value_type value_type; + typedef typename ReduceFunctor::value_type value_type; KOKKOS_INLINE_FUNCTION - ReduceFunctorFinal(const size_t n) : ReduceFunctor(n) {} + ReduceFunctorFinal(const size_t n) : ReduceFunctor(n) {} KOKKOS_INLINE_FUNCTION void final(value_type& dst) const { @@ -198,11 +198,11 @@ class RuntimeReduceMinMax { template class RuntimeReduceFunctorFinal - : public RuntimeReduceFunctor { + : public RuntimeReduceFunctor { public: - typedef RuntimeReduceFunctor base_type; + typedef RuntimeReduceFunctor base_type; typedef typename base_type::value_type value_type; - typedef long scalar_type; + typedef int64_t scalar_type; RuntimeReduceFunctorFinal(const size_t theNwork, const size_t count) : base_type(theNwork, count) {} @@ -237,9 +237,8 @@ class TestReduce { value_type result[Repeat]; - const unsigned long nw = nwork; - const unsigned long nsum = - nw % 2 ? nw * ((nw + 1) / 2) : (nw / 2) * (nw + 1); + const uint64_t nw = nwork; + const uint64_t nsum = nw % 2 ? nw * ((nw + 1) / 2) : (nw / 2) * (nw + 1); for (unsigned i = 0; i < Repeat; ++i) { Kokkos::parallel_reduce(nwork, functor_type(nwork), result[i]); @@ -247,7 +246,7 @@ class TestReduce { for (unsigned i = 0; i < Repeat; ++i) { for (unsigned j = 0; j < Count; ++j) { - const unsigned long correct = 0 == j % 3 ? nw : nsum; + const uint64_t correct = 0 == j % 3 ? nw : nsum; ASSERT_EQ((ScalarType)correct, result[i].value[j]); } } @@ -262,9 +261,8 @@ class TestReduce { value_type result[Repeat]; - const unsigned long nw = nwork; - const unsigned long nsum = - nw % 2 ? nw * ((nw + 1) / 2) : (nw / 2) * (nw + 1); + const uint64_t nw = nwork; + const uint64_t nsum = nw % 2 ? nw * ((nw + 1) / 2) : (nw / 2) * (nw + 1); for (unsigned i = 0; i < Repeat; ++i) { if (i % 2 == 0) { @@ -277,7 +275,7 @@ class TestReduce { for (unsigned i = 0; i < Repeat; ++i) { for (unsigned j = 0; j < Count; ++j) { - const unsigned long correct = 0 == j % 3 ? nw : nsum; + const uint64_t correct = 0 == j % 3 ? nw : nsum; ASSERT_EQ((ScalarType)correct, -result[i].value[j]); } } @@ -305,9 +303,8 @@ class TestReduceDynamic { ScalarType result[Repeat][Count]; - const unsigned long nw = nwork; - const unsigned long nsum = - nw % 2 ? nw * ((nw + 1) / 2) : (nw / 2) * (nw + 1); + const uint64_t nw = nwork; + const uint64_t nsum = nw % 2 ? nw * ((nw + 1) / 2) : (nw / 2) * (nw + 1); for (unsigned i = 0; i < Repeat; ++i) { if (i % 2 == 0) { @@ -320,7 +317,7 @@ class TestReduceDynamic { for (unsigned i = 0; i < Repeat; ++i) { for (unsigned j = 0; j < Count; ++j) { - const unsigned long correct = 0 == j % 3 ? nw : nsum; + const uint64_t correct = 0 == j % 3 ? nw : nsum; ASSERT_EQ((ScalarType)correct, result[i][j]); } } @@ -351,7 +348,7 @@ class TestReduceDynamic { const ScalarType correct = (j % 2) ? amax : amin; ASSERT_EQ((ScalarType)correct, result[i][j]); } else { - const unsigned long correct = j % 2 ? 1 : nwork; + const uint64_t correct = j % 2 ? 1 : nwork; ASSERT_EQ((ScalarType)correct, result[i][j]); } } @@ -366,9 +363,8 @@ class TestReduceDynamic { typename functor_type::scalar_type result[Repeat][Count]; - const unsigned long nw = nwork; - const unsigned long nsum = - nw % 2 ? nw * ((nw + 1) / 2) : (nw / 2) * (nw + 1); + const uint64_t nw = nwork; + const uint64_t nsum = nw % 2 ? nw * ((nw + 1) / 2) : (nw / 2) * (nw + 1); for (unsigned i = 0; i < Repeat; ++i) { if (i % 2 == 0) { @@ -381,7 +377,7 @@ class TestReduceDynamic { for (unsigned i = 0; i < Repeat; ++i) { for (unsigned j = 0; j < Count; ++j) { - const unsigned long correct = 0 == j % 3 ? nw : nsum; + const uint64_t correct = 0 == j % 3 ? nw : nsum; ASSERT_EQ((ScalarType)correct, -result[i][j]); } } @@ -405,9 +401,8 @@ class TestReduceDynamicView { const unsigned CountLimit = 23; - const unsigned long nw = nwork; - const unsigned long nsum = - nw % 2 ? nw * ((nw + 1) / 2) : (nw / 2) * (nw + 1); + const uint64_t nw = nwork; + const uint64_t nsum = nw % 2 ? nw * ((nw + 1) / 2) : (nw / 2) * (nw + 1); for (unsigned count = 0; count < CountLimit; ++count) { result_type result("result", count); @@ -425,7 +420,7 @@ class TestReduceDynamicView { } for (unsigned j = 0; j < count; ++j) { - const unsigned long correct = 0 == j % 3 ? nw : nsum; + const uint64_t correct = 0 == j % 3 ? nw : nsum; ASSERT_EQ(host_result(j), (ScalarType)correct); host_result(j) = 0; } @@ -435,9 +430,9 @@ class TestReduceDynamicView { } // namespace -TEST(TEST_CATEGORY, long_reduce) { - TestReduce(0); - TestReduce(1000000); +TEST(TEST_CATEGORY, int64_t_reduce) { + TestReduce(0); + TestReduce(1000000); } TEST(TEST_CATEGORY, double_reduce) { @@ -445,9 +440,9 @@ TEST(TEST_CATEGORY, double_reduce) { TestReduce(1000000); } -TEST(TEST_CATEGORY, long_reduce_dynamic) { - TestReduceDynamic(0); - TestReduceDynamic(1000000); +TEST(TEST_CATEGORY, int64_t_reduce_dynamic) { + TestReduceDynamic(0); + TestReduceDynamic(1000000); } TEST(TEST_CATEGORY, double_reduce_dynamic) { @@ -455,9 +450,9 @@ TEST(TEST_CATEGORY, double_reduce_dynamic) { TestReduceDynamic(1000000); } -TEST(TEST_CATEGORY, long_reduce_dynamic_view) { - TestReduceDynamicView(0); - TestReduceDynamicView(1000000); +TEST(TEST_CATEGORY, int64_t_reduce_dynamic_view) { + TestReduceDynamicView(0); + TestReduceDynamicView(1000000); } } // namespace Test diff --git a/core/unit_test/TestReduceCombinatorical.hpp b/core/unit_test/TestReduceCombinatorical.hpp index a03dbd11418..fe85f360cb3 100644 --- a/core/unit_test/TestReduceCombinatorical.hpp +++ b/core/unit_test/TestReduceCombinatorical.hpp @@ -467,10 +467,10 @@ struct TestReduceCombinatoricalInstantiation { } template - static void AddLambdaRange(Kokkos::InvalidType, Args... args) {} + static void AddLambdaRange(Kokkos::InvalidType, Args... /*args*/) {} template - static void AddLambdaTeam(Kokkos::InvalidType, Args... args) {} + static void AddLambdaTeam(Kokkos::InvalidType, Args... /*args*/) {} template static void AddFunctor(Args... args) { diff --git a/core/unit_test/TestResize.hpp b/core/unit_test/TestResize.hpp index 14d12955381..32a85f03e9a 100644 --- a/core/unit_test/TestResize.hpp +++ b/core/unit_test/TestResize.hpp @@ -71,7 +71,7 @@ void impl_testResize() { typedef Kokkos::View view_type; view_type view_1d("view_1d", sizes[0]); const int* oldPointer = view_1d.data(); - EXPECT_TRUE(oldPointer != NULL); + EXPECT_TRUE(oldPointer != nullptr); resize_dispatch(Tag{}, view_1d, sizes[0]); const int* newPointer = view_1d.data(); EXPECT_TRUE(oldPointer == newPointer); @@ -80,7 +80,7 @@ void impl_testResize() { typedef Kokkos::View view_type; view_type view_2d("view_2d", sizes[0], sizes[1]); const int* oldPointer = view_2d.data(); - EXPECT_TRUE(oldPointer != NULL); + EXPECT_TRUE(oldPointer != nullptr); resize_dispatch(Tag{}, view_2d, sizes[0], sizes[1]); const int* newPointer = view_2d.data(); EXPECT_TRUE(oldPointer == newPointer); @@ -89,7 +89,7 @@ void impl_testResize() { typedef Kokkos::View view_type; view_type view_3d("view_3d", sizes[0], sizes[1], sizes[2]); const int* oldPointer = view_3d.data(); - EXPECT_TRUE(oldPointer != NULL); + EXPECT_TRUE(oldPointer != nullptr); resize_dispatch(Tag{}, view_3d, sizes[0], sizes[1], sizes[2]); const int* newPointer = view_3d.data(); EXPECT_TRUE(oldPointer == newPointer); @@ -98,7 +98,7 @@ void impl_testResize() { typedef Kokkos::View view_type; view_type view_4d("view_4d", sizes[0], sizes[1], sizes[2], sizes[3]); const int* oldPointer = view_4d.data(); - EXPECT_TRUE(oldPointer != NULL); + EXPECT_TRUE(oldPointer != nullptr); resize_dispatch(Tag{}, view_4d, sizes[0], sizes[1], sizes[2], sizes[3]); const int* newPointer = view_4d.data(); EXPECT_TRUE(oldPointer == newPointer); @@ -108,7 +108,7 @@ void impl_testResize() { view_type view_5d("view_5d", sizes[0], sizes[1], sizes[2], sizes[3], sizes[4]); const int* oldPointer = view_5d.data(); - EXPECT_TRUE(oldPointer != NULL); + EXPECT_TRUE(oldPointer != nullptr); resize_dispatch(Tag{}, view_5d, sizes[0], sizes[1], sizes[2], sizes[3], sizes[4]); const int* newPointer = view_5d.data(); @@ -119,7 +119,7 @@ void impl_testResize() { view_type view_6d("view_6d", sizes[0], sizes[1], sizes[2], sizes[3], sizes[4], sizes[5]); const int* oldPointer = view_6d.data(); - EXPECT_TRUE(oldPointer != NULL); + EXPECT_TRUE(oldPointer != nullptr); resize_dispatch(Tag{}, view_6d, sizes[0], sizes[1], sizes[2], sizes[3], sizes[4], sizes[5]); const int* newPointer = view_6d.data(); @@ -130,7 +130,7 @@ void impl_testResize() { view_type view_7d("view_7d", sizes[0], sizes[1], sizes[2], sizes[3], sizes[4], sizes[5], sizes[6]); const int* oldPointer = view_7d.data(); - EXPECT_TRUE(oldPointer != NULL); + EXPECT_TRUE(oldPointer != nullptr); resize_dispatch(Tag{}, view_7d, sizes[0], sizes[1], sizes[2], sizes[3], sizes[4], sizes[5], sizes[6]); const int* newPointer = view_7d.data(); @@ -141,7 +141,7 @@ void impl_testResize() { view_type view_8d("view_8d", sizes[0], sizes[1], sizes[2], sizes[3], sizes[4], sizes[5], sizes[6], sizes[7]); const int* oldPointer = view_8d.data(); - EXPECT_TRUE(oldPointer != NULL); + EXPECT_TRUE(oldPointer != nullptr); resize_dispatch(Tag{}, view_8d, sizes[0], sizes[1], sizes[2], sizes[3], sizes[4], sizes[5], sizes[6], sizes[7]); const int* newPointer = view_8d.data(); diff --git a/core/unit_test/TestScan.hpp b/core/unit_test/TestScan.hpp index 483f1868802..f7ebbb62a30 100644 --- a/core/unit_test/TestScan.hpp +++ b/core/unit_test/TestScan.hpp @@ -50,7 +50,7 @@ namespace Test { template struct TestScan { typedef Device execution_space; - typedef long int value_type; + typedef int64_t value_type; Kokkos::View > errors; @@ -98,10 +98,12 @@ struct TestScan { Kokkos::parallel_scan(N, *this); - long long int total = 0; + int64_t total = 0; Kokkos::parallel_scan(N, *this, total); - run_check(size_t((N + 1) * N / 2), size_t(total)); + // We can't return a value in a constructor so use a lambda as wrapper to + // ignore it. + [&] { ASSERT_EQ(size_t((N + 1) * N / 2), size_t(total)); }(); check_error(); } @@ -129,10 +131,6 @@ struct TestScan { (void)TestScan(i); } } - - void run_check(const size_t& expected, const size_t& actual) { - ASSERT_EQ(expected, actual); - } }; TEST(TEST_CATEGORY, scan) { diff --git a/core/unit_test/TestSharedAlloc.hpp b/core/unit_test/TestSharedAlloc.hpp index d284b7d61ee..1b67e29d709 100644 --- a/core/unit_test/TestSharedAlloc.hpp +++ b/core/unit_test/TestSharedAlloc.hpp @@ -116,7 +116,7 @@ void test_shared_alloc() { #endif Kokkos::parallel_for(range, [=](size_t i) { - while (0 != + while (nullptr != (r[i] = static_cast(RecordBase::decrement(r[i])))) { #ifdef KOKKOS_DEBUG if (r[i]->use_count() == 1) RecordBase::is_sane(r[i]); @@ -157,7 +157,7 @@ void test_shared_alloc() { #endif Kokkos::parallel_for(range, [=](size_t i) { - while (0 != + while (nullptr != (r[i] = static_cast(RecordBase::decrement(r[i])))) { #ifdef KOKKOS_DEBUG if (r[i]->use_count() == 1) RecordBase::is_sane(r[i]); diff --git a/core/unit_test/TestTaskScheduler.hpp b/core/unit_test/TestTaskScheduler.hpp index 9f593a00fda..ebfdcf1df3f 100644 --- a/core/unit_test/TestTaskScheduler.hpp +++ b/core/unit_test/TestTaskScheduler.hpp @@ -843,11 +843,14 @@ struct TestMultipleDependence { #undef TEST_SCHEDULER #undef TEST_SCHEDULER_SUFFIX +// KOKKOS WORKAROUND WIN32: Theses tests hang with msvc +#ifndef _WIN32 #define TEST_SCHEDULER_SUFFIX _chase_lev #define TEST_SCHEDULER Kokkos::ChaseLevTaskScheduler #include "TestTaskScheduler_single.hpp" #undef TEST_SCHEDULER #undef TEST_SCHEDULER_SUFFIX +#endif #if 0 #define TEST_SCHEDULER_SUFFIX _fixed_mempool diff --git a/core/unit_test/TestTeam.hpp b/core/unit_test/TestTeam.hpp index eebf78f7b6f..d1ee0039696 100644 --- a/core/unit_test/TestTeam.hpp +++ b/core/unit_test/TestTeam.hpp @@ -100,7 +100,7 @@ struct TestTeamPolicy { struct NoOpTag {}; KOKKOS_INLINE_FUNCTION - void operator()(const NoOpTag &, const team_member &member) const {} + void operator()(const NoOpTag &, const team_member & /*member*/) const {} static void test_small_league_size() { int bs = 8; // batch size (number of elements per batch) @@ -139,7 +139,7 @@ struct TestTeamPolicy { struct ReduceTag {}; - typedef long value_type; + typedef int64_t value_type; KOKKOS_INLINE_FUNCTION void operator()(const team_member &member, value_type &update) const { @@ -164,9 +164,9 @@ struct TestTeamPolicy { policy_type_reduce(league_size, 1) .team_size_max(functor, Kokkos::ParallelReduceTag()); - const long N = team_size * league_size; + const int64_t N = team_size * league_size; - long total = 0; + int64_t total = 0; Kokkos::parallel_reduce(policy_type(league_size, team_size), functor, total); @@ -263,9 +263,8 @@ class TestReduceTeam { value_type result[Repeat]; - const unsigned long nw = nwork; - const unsigned long nsum = - nw % 2 ? nw * ((nw + 1) / 2) : (nw / 2) * (nw + 1); + const uint64_t nw = nwork; + const uint64_t nsum = nw % 2 ? nw * ((nw + 1) / 2) : (nw / 2) * (nw + 1); policy_type team_exec(nw, 1); @@ -284,7 +283,7 @@ class TestReduceTeam { for (unsigned i = 0; i < Repeat; ++i) { for (unsigned j = 0; j < Count; ++j) { - const unsigned long correct = 0 == j % 3 ? nw : nsum; + const uint64_t correct = 0 == j % 3 ? nw : nsum; ASSERT_EQ((ScalarType)correct, result[i].value[j]); } } @@ -302,7 +301,7 @@ class ScanTeamFunctor { public: typedef DeviceType execution_space; typedef Kokkos::TeamPolicy policy_type; - typedef long int value_type; + typedef int64_t value_type; Kokkos::View accum; Kokkos::View total; @@ -319,7 +318,7 @@ class ScanTeamFunctor { } struct JoinMax { - typedef long int value_type; + typedef int64_t value_type; KOKKOS_INLINE_FUNCTION void join(value_type volatile &dst, @@ -332,31 +331,31 @@ class ScanTeamFunctor { void operator()(const typename policy_type::member_type ind, value_type &error) const { if (0 == ind.league_rank() && 0 == ind.team_rank()) { - const long int thread_count = ind.league_size() * ind.team_size(); - total() = (thread_count * (thread_count + 1)) / 2; + const int64_t thread_count = ind.league_size() * ind.team_size(); + total() = (thread_count * (thread_count + 1)) / 2; } // Team max: - int long m = (long int)(ind.league_rank() + ind.team_rank()); - ind.team_reduce(Kokkos::Max(m)); + int64_t m = (int64_t)(ind.league_rank() + ind.team_rank()); + ind.team_reduce(Kokkos::Max(m)); if (m != ind.league_rank() + (ind.team_size() - 1)) { printf( "ScanTeamFunctor[%d.%d of %d.%d] reduce_max_answer(%ld) != " "reduce_max(%ld)\n", ind.league_rank(), ind.team_rank(), ind.league_size(), - ind.team_size(), - (long int)(ind.league_rank() + (ind.team_size() - 1)), m); + ind.team_size(), (int64_t)(ind.league_rank() + (ind.team_size() - 1)), + m); } // Scan: - const long int answer = (ind.league_rank() + 1) * ind.team_rank() + - (ind.team_rank() * (ind.team_rank() + 1)) / 2; + const int64_t answer = (ind.league_rank() + 1) * ind.team_rank() + + (ind.team_rank() * (ind.team_rank() + 1)) / 2; - const long int result = + const int64_t result = ind.team_scan(ind.league_rank() + 1 + ind.team_rank() + 1); - const long int result2 = + const int64_t result2 = ind.team_scan(ind.league_rank() + 1 + ind.team_rank() + 1); if (answer != result || answer != result2) { @@ -369,7 +368,7 @@ class ScanTeamFunctor { error = 1; } - const long int thread_rank = + const int64_t thread_rank = ind.team_rank() + ind.team_size() * ind.league_rank(); ind.team_scan(1 + thread_rank, accum.data()); } @@ -379,14 +378,14 @@ template class TestScanTeam { public: typedef DeviceType execution_space; - typedef long int value_type; + typedef int64_t value_type; typedef Kokkos::TeamPolicy policy_type; typedef Test::ScanTeamFunctor functor_type; TestScanTeam(const size_t nteam) { run_test(nteam); } void run_test(const size_t nteam) { - typedef Kokkos::View + typedef Kokkos::View result_type; const unsigned REPEAT = 100000; @@ -405,9 +404,9 @@ class TestScanTeam { nteam, team_exec.team_size_max(functor, Kokkos::ParallelReduceTag())); for (unsigned i = 0; i < Repeat; ++i) { - long int accum = 0; - long int total = 0; - long int error = 0; + int64_t accum = 0; + int64_t total = 0; + int64_t error = 0; Kokkos::deep_copy(functor.accum, total); Kokkos::parallel_reduce(team_exec, functor, result_type(&error)); @@ -445,7 +444,7 @@ struct SharedTeamFunctor { shared_int_array_type; // Tell how much shared memory will be required by this functor. - inline unsigned team_shmem_size(int team_size) const { + inline unsigned team_shmem_size(int /*team_size*/) const { return shared_int_array_type::shmem_size(SHARED_COUNT) + shared_int_array_type::shmem_size(SHARED_COUNT); } @@ -462,7 +461,7 @@ struct SharedTeamFunctor { "member( %d/%d , %d/%d ) Failed to allocate shared memory of size " "%lu\n", ind.league_rank(), ind.league_size(), ind.team_rank(), - ind.team_size(), static_cast(SHARED_COUNT)); + ind.team_size(), static_cast(SHARED_COUNT)); ++update; // Failure to allocate is an error. } else { @@ -523,7 +522,6 @@ struct TestSharedTeam { namespace Test { #if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) -#if !defined(KOKKOS_ENABLE_CUDA) || (8000 <= CUDA_VERSION) template struct TestLambdaSharedTeam { TestLambdaSharedTeam() { run(); } @@ -568,7 +566,7 @@ struct TestLambdaSharedTeam { if ((shared_A.data() == nullptr && SHARED_COUNT > 0) || (shared_B.data() == nullptr && SHARED_COUNT > 0)) { printf("Failed to allocate shared memory of size %lu\n", - static_cast(SHARED_COUNT)); + static_cast(SHARED_COUNT)); ++update; // Failure to allocate is an error. } else { @@ -601,7 +599,6 @@ struct TestLambdaSharedTeam { } }; #endif -#endif } // namespace Test @@ -636,7 +633,7 @@ struct ScratchTeamFunctor { (scratch_A.data() == nullptr && SHARED_TEAM_COUNT > 0) || (scratch_B.data() == nullptr && SHARED_THREAD_COUNT > 0)) { printf("Failed to allocate shared memory of size %lu\n", - static_cast(SHARED_TEAM_COUNT)); + static_cast(SHARED_TEAM_COUNT)); ++update; // Failure to allocate is an error. } else { @@ -739,22 +736,22 @@ KOKKOS_INLINE_FUNCTION int test_team_mulit_level_scratch_loop_body( a_thread2(team.thread_scratch(0), 16); Kokkos::View > - b_team1(team.team_scratch(1), 128000); + b_team1(team.team_scratch(1), 12800); Kokkos::View > - b_thread1(team.thread_scratch(1), 16000); + b_thread1(team.thread_scratch(1), 1600); Kokkos::View > - b_team2(team.team_scratch(1), 128000); + b_team2(team.team_scratch(1), 12800); Kokkos::View > - b_thread2(team.thread_scratch(1), 16000); + b_thread2(team.thread_scratch(1), 1600); Kokkos::View > a_team3(team.team_scratch(0), 128); Kokkos::View > a_thread3(team.thread_scratch(0), 16); Kokkos::View > - b_team3(team.team_scratch(1), 128000); + b_team3(team.team_scratch(1), 12800); Kokkos::View > - b_thread3(team.thread_scratch(1), 16000); + b_thread3(team.thread_scratch(1), 1600); // The explicit types for 0 and 128 are here to test TeamThreadRange accepting // different types for begin and end. @@ -775,7 +772,7 @@ KOKKOS_INLINE_FUNCTION int test_team_mulit_level_scratch_loop_body( team.league_rank() * 100000; }); - Kokkos::parallel_for(Kokkos::TeamThreadRange(team, 0, 128000), + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, 0, 12800), [&](const int &i) { b_team1(i) = 1000000 + i + team.league_rank() * 100000; b_team2(i) = 2000000 + i + team.league_rank() * 100000; @@ -783,7 +780,7 @@ KOKKOS_INLINE_FUNCTION int test_team_mulit_level_scratch_loop_body( }); team.team_barrier(); - Kokkos::parallel_for(Kokkos::ThreadVectorRange(team, 16000), + Kokkos::parallel_for(Kokkos::ThreadVectorRange(team, 1600), [&](const int &i) { b_thread1(i) = 1000000 + 100000 * team.team_rank() + 16 - i + team.league_rank() * 100000; @@ -817,7 +814,7 @@ KOKKOS_INLINE_FUNCTION int test_team_mulit_level_scratch_loop_body( }); Kokkos::parallel_for( - Kokkos::TeamThreadRange(team, 0, 128000), [&](const int &i) { + Kokkos::TeamThreadRange(team, 0, 12800), [&](const int &i) { if (b_team1(i) != 1000000 + i + team.league_rank() * 100000) error++; if (b_team2(i) != 2000000 + i + team.league_rank() * 100000) error++; if (b_team3(i) != 3000000 + i + team.league_rank() * 100000) error++; @@ -825,7 +822,7 @@ KOKKOS_INLINE_FUNCTION int test_team_mulit_level_scratch_loop_body( team.team_barrier(); Kokkos::parallel_for( - Kokkos::ThreadVectorRange(team, 16000), [&](const int &i) { + Kokkos::ThreadVectorRange(team, 1600), [&](const int &i) { if (b_thread1(i) != 1000000 + 100000 * team.team_rank() + 16 - i + team.league_rank() * 100000) error++; @@ -879,11 +876,11 @@ struct ClassNoShmemSizeFunction { const int per_team1 = 3 * Kokkos::View< double *, ExecSpace, - Kokkos::MemoryTraits >::shmem_size(128000); + Kokkos::MemoryTraits >::shmem_size(12800); const int per_thread1 = 3 * Kokkos::View< double *, ExecSpace, - Kokkos::MemoryTraits >::shmem_size(16000); + Kokkos::MemoryTraits >::shmem_size(1600); int team_size = 8; if (team_size > ExecSpace::concurrency()) @@ -952,11 +949,11 @@ struct ClassWithShmemSizeFunction { const int per_team1 = 3 * Kokkos::View< double *, ExecSpace, - Kokkos::MemoryTraits >::shmem_size(128000); + Kokkos::MemoryTraits >::shmem_size(12800); const int per_thread1 = 3 * Kokkos::View< double *, ExecSpace, - Kokkos::MemoryTraits >::shmem_size(16000); + Kokkos::MemoryTraits >::shmem_size(1600); int team_size = 8; if (team_size > ExecSpace::concurrency()) @@ -1008,7 +1005,6 @@ struct ClassWithShmemSizeFunction { template void test_team_mulit_level_scratch_test_lambda() { #ifdef KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA -#if !defined(KOKKOS_ENABLE_CUDA) || (8000 <= CUDA_VERSION) Kokkos::View > errors; Kokkos::View d_errors("Errors"); errors = d_errors; @@ -1023,13 +1019,13 @@ void test_team_mulit_level_scratch_test_lambda() { Kokkos::MemoryTraits >::shmem_size(16); const int per_team1 = - 3 * Kokkos::View< - double *, ExecSpace, - Kokkos::MemoryTraits >::shmem_size(128000); + 3 * + Kokkos::View >::shmem_size(12800); const int per_thread1 = 3 * Kokkos::View >::shmem_size(16000); + Kokkos::MemoryTraits >::shmem_size(1600); int team_size = 8; if (team_size > ExecSpace::concurrency()) @@ -1070,7 +1066,6 @@ void test_team_mulit_level_scratch_test_lambda() { error); ASSERT_EQ(error, 0); #endif -#endif } } // namespace Test @@ -1102,7 +1097,7 @@ struct TestShmemSize { TestShmemSize() { run(); } void run() { - typedef Kokkos::View view_type; + typedef Kokkos::View view_type; size_t d1 = 5; size_t d2 = 6; @@ -1110,7 +1105,7 @@ struct TestShmemSize { size_t size = view_type::shmem_size(d1, d2, d3); - ASSERT_EQ(size, (d1 * d2 * d3 + 1) * sizeof(long)); + ASSERT_EQ(size, (d1 * d2 * d3 + 1) * sizeof(int64_t)); test_layout_stride(); } @@ -1137,16 +1132,129 @@ namespace Test { namespace { -template -struct TestTeamBroadcast { - typedef typename Kokkos::TeamPolicy::member_type - team_member; +template +struct TestTeamBroadcast; + +template +struct TestTeamBroadcast< + ExecSpace, ScheduleType, T, + typename std::enable_if<(sizeof(T) == sizeof(char)), void>::type> { + using team_member = + typename Kokkos::TeamPolicy::member_type; + using memory_space = typename ExecSpace::memory_space; + using value_type = T; + + const value_type offset; - TestTeamBroadcast(const size_t league_size) {} + TestTeamBroadcast(const size_t /*league_size*/, const value_type os_) + : offset(os_) {} struct BroadcastTag {}; - typedef long value_type; + KOKKOS_INLINE_FUNCTION + void operator()(const team_member &teamMember, value_type &update) const { + int lid = teamMember.league_rank(); + int tid = teamMember.team_rank(); + int ts = teamMember.team_size(); + + value_type parUpdate = 0; + value_type value = (value_type)(tid % 0xFF) + offset; + + // broadcast boolean and value to team from source thread + teamMember.team_broadcast(value, lid % ts); + + Kokkos::parallel_reduce( + Kokkos::TeamThreadRange(teamMember, ts), + [&](const int /*j*/, value_type &teamUpdate) { teamUpdate |= value; }, + Kokkos::BOr(parUpdate)); + + if (teamMember.team_rank() == 0) update |= parUpdate; + } + + KOKKOS_INLINE_FUNCTION + void operator()(const BroadcastTag &, const team_member &teamMember, + value_type &update) const { + int lid = teamMember.league_rank(); + int tid = teamMember.team_rank(); + int ts = teamMember.team_size(); + + value_type parUpdate = 0; + value_type value = (value_type)(tid % 0xFF) + offset; + + teamMember.team_broadcast([&](value_type &var) { var -= offset; }, value, + lid % ts); + + Kokkos::parallel_reduce( + Kokkos::TeamThreadRange(teamMember, ts), + [&](const int /*j*/, value_type &teamUpdate) { teamUpdate |= value; }, + Kokkos::BOr(parUpdate)); + + if (teamMember.team_rank() == 0) update |= parUpdate; + } + + static void test_teambroadcast(const size_t league_size, + const value_type off) { + TestTeamBroadcast functor(league_size, off); + + typedef Kokkos::TeamPolicy policy_type; + typedef Kokkos::TeamPolicy + policy_type_f; + + const int team_size = + policy_type_f(league_size, 1) + .team_size_max( + functor, + Kokkos:: + ParallelReduceTag()); // printf("team_size=%d\n",team_size); + + // team_broadcast with value + value_type total = 0; + + Kokkos::parallel_reduce(policy_type(league_size, team_size), functor, + Kokkos::BOr(total)); + + value_type expected_result = 0; + for (unsigned int i = 0; i < league_size; i++) { + value_type val = (value_type((i % team_size % 0xFF)) + off); + expected_result |= val; + } + ASSERT_EQ(expected_result, total); + // printf("team_broadcast with value --" + //"expected_result=%x," + //"total=%x\n",expected_result, total); + + // team_broadcast with function object + total = 0; + + Kokkos::parallel_reduce(policy_type_f(league_size, team_size), functor, + Kokkos::BOr(total)); + + expected_result = 0; + for (unsigned int i = 0; i < league_size; i++) { + value_type val = ((value_type)((i % team_size % 0xFF))); + expected_result |= val; + } + ASSERT_EQ(expected_result, total); + // printf("team_broadcast with function object --" + // "expected_result=%x," + // "total=%x\n",expected_result, total); + } +}; + +template +struct TestTeamBroadcast< + ExecSpace, ScheduleType, T, + typename std::enable_if<(sizeof(T) > sizeof(char)), void>::type> { + using team_member = + typename Kokkos::TeamPolicy::member_type; + using value_type = T; + + const value_type offset; + + TestTeamBroadcast(const size_t /*league_size*/, const value_type os_) + : offset(os_) {} + + struct BroadcastTag {}; KOKKOS_INLINE_FUNCTION void operator()(const team_member &teamMember, value_type &update) const { @@ -1155,16 +1263,26 @@ struct TestTeamBroadcast { int ts = teamMember.team_size(); value_type parUpdate = 0; - value_type value = tid * 3 + 1; + value_type value = (value_type)(tid * 3) + offset; + + // setValue is used to determine if the update should be + // performed at the bottom. The thread id must match the + // thread id used to broadcast the value. It is the + // thread id that matches the league rank mod team size + // this way each league rank will use a different thread id + // which is likely not 0 + bool setValue = ((lid % ts) == tid); + // broadcast boolean and value to team from source thread teamMember.team_broadcast(value, lid % ts); + teamMember.team_broadcast(setValue, lid % ts); Kokkos::parallel_reduce( Kokkos::TeamThreadRange(teamMember, ts), - [&](const int j, value_type &teamUpdate) { teamUpdate += value; }, + [&](const int /*j*/, value_type &teamUpdate) { teamUpdate += value; }, parUpdate); - if (teamMember.team_rank() == 0) update += parUpdate; + if (teamMember.team_rank() == 0 && setValue) update += parUpdate; } KOKKOS_INLINE_FUNCTION @@ -1175,21 +1293,53 @@ struct TestTeamBroadcast { int ts = teamMember.team_size(); value_type parUpdate = 0; - value_type value = tid * 3 + 1; + value_type value = (value_type)(tid * 3) + offset; + + // setValue is used to determine if the update should be + // performed at the bottom. The thread id must match the + // thread id used to broadcast the value. It is the + // thread id that matches the league rank mod team size + // this way each league rank will use a different thread id + // which is likely not 0. Note the logic is switched from + // above because the functor switches it back. + bool setValue = ((lid % ts) != tid); teamMember.team_broadcast([&](value_type &var) { var *= 2; }, value, lid % ts); + teamMember.team_broadcast([&](bool &bVar) { bVar = !bVar; }, setValue, + lid % ts); Kokkos::parallel_reduce( Kokkos::TeamThreadRange(teamMember, ts), - [&](const int j, value_type &teamUpdate) { teamUpdate += value; }, + [&](const int /*j*/, value_type &teamUpdate) { teamUpdate += value; }, parUpdate); - if (teamMember.team_rank() == 0) update += parUpdate; + if (teamMember.team_rank() == 0 && setValue) update += parUpdate; + } + + template + static inline + typename std::enable_if::value, void>::type + compare_test(ScalarType A, ScalarType B) { + if (std::is_same::value) { + ASSERT_DOUBLE_EQ((double)A, (double)B); + } else if (std::is_same::value) { + ASSERT_FLOAT_EQ((double)A, (double)B); + } else { + ASSERT_EQ(A, B); + } + } + + template + static inline + typename std::enable_if::value, void>::type + compare_test(ScalarType A, ScalarType B) { + ASSERT_EQ(A, B); } - static void test_teambroadcast(const size_t league_size) { - TestTeamBroadcast functor(league_size); + static void test_teambroadcast(const size_t league_size, + const value_type off) { + TestTeamBroadcast functor(league_size, off); typedef Kokkos::TeamPolicy policy_type; typedef Kokkos::TeamPolicy @@ -1203,22 +1353,23 @@ struct TestTeamBroadcast { ParallelReduceTag()); // printf("team_size=%d\n",team_size); // team_broadcast with value - long total = 0; + value_type total = 0; Kokkos::parallel_reduce(policy_type(league_size, team_size), functor, total); value_type expected_result = 0; for (unsigned int i = 0; i < league_size; i++) { - value_type val = ((i % team_size) * 3 + 1) * team_size; + value_type val = + (value_type((i % team_size) * 3) + off) * (value_type)team_size; expected_result += val; } - ASSERT_EQ(size_t(expected_result), - size_t(total)); // printf("team_broadcast with value -- - // expected_result=%d, - // total=%d\n",expected_result, total); + compare_test(expected_result, + total); // printf("team_broadcast with value -- + // expected_result=%d, + // total=%d\n",expected_result, total); - // team_broadcast with funtion object + // team_broadcast with function object total = 0; Kokkos::parallel_reduce(policy_type_f(league_size, team_size), functor, @@ -1226,13 +1377,14 @@ struct TestTeamBroadcast { expected_result = 0; for (unsigned int i = 0; i < league_size; i++) { - value_type val = ((i % team_size) * 3 + 1) * 2 * team_size; + value_type val = ((value_type)((i % team_size) * 3) + off) * + (value_type)(2 * team_size); expected_result += val; } - ASSERT_EQ(size_t(expected_result), - size_t(total)); // printf("team_broadcast with funtion object -- - // expected_result=%d, - // total=%d\n",expected_result, total); + compare_test(expected_result, + total); // printf("team_broadcast with function object -- + // expected_result=%d, + // total=%d\n",expected_result, total); } }; diff --git a/core/unit_test/TestTeamTeamSize.hpp b/core/unit_test/TestTeamTeamSize.hpp index 4a9b7c41a4a..0703f90cf8a 100644 --- a/core/unit_test/TestTeamTeamSize.hpp +++ b/core/unit_test/TestTeamTeamSize.hpp @@ -78,13 +78,13 @@ template struct FunctorFor { double static_array[S]; KOKKOS_INLINE_FUNCTION - void operator()(const typename PolicyType::member_type& team) const {} + void operator()(const typename PolicyType::member_type& /*team*/) const {} }; template struct FunctorReduce { double static_array[S]; KOKKOS_INLINE_FUNCTION - void operator()(const typename PolicyType::member_type& team, + void operator()(const typename PolicyType::member_type& /*team*/, MyArray& lval) const { for (int j = 0; j < N; j++) lval.values[j] += 1 + lval.values[0]; } @@ -191,7 +191,7 @@ struct PrintFunctor2 { }; TEST(TEST_CATEGORY, team_policy_max_scalar_without_plus_equal_k) { - using ExecSpace = Kokkos::DefaultExecutionSpace; + using ExecSpace = TEST_EXECSPACE; using ReducerType = Kokkos::MinMax; using ReducerValueType = typename ReducerType::value_type; using DynamicScheduleType = Kokkos::Schedule; diff --git a/core/unit_test/TestTeamVector.hpp b/core/unit_test/TestTeamVector.hpp index 790d814f255..c313988efc2 100644 --- a/core/unit_test/TestTeamVector.hpp +++ b/core/unit_test/TestTeamVector.hpp @@ -605,7 +605,7 @@ struct functor_vec_single { KOKKOS_INLINE_FUNCTION void operator()(typename policy_type::member_type team) const { - // Warning: this test case intentionally violates permissable semantics. + // Warning: this test case intentionally violates permissible semantics. // It is not valid to get references to members of the enclosing region // inside a parallel_for and write to it. Scalar value = 0; @@ -622,7 +622,7 @@ struct functor_vec_single { Scalar value2 = 0; Kokkos::parallel_reduce( Kokkos::ThreadVectorRange(team, nStart, nEnd), - [&](int i, Scalar &val) { val += value; }, value2); + [&](int /*i*/, Scalar &val) { val += value; }, value2); if (value2 != (value * (nEnd - nStart))) { printf("FAILED vector_single broadcast %i %i %f %f\n", team.league_rank(), @@ -895,8 +895,7 @@ namespace Test { // Computes y^T*A*x // ( modified from kokkos-tutorials/GTC2016/Exercises/ThreeLevelPar ) -#if (!defined(KOKKOS_ENABLE_CUDA)) || \ - (defined(KOKKOS_ENABLE_CUDA_LAMBDA) && (8000 <= CUDA_VERSION)) +#if (!defined(KOKKOS_ENABLE_CUDA)) || defined(KOKKOS_ENABLE_CUDA_LAMBDA) template class TestTripleNestedReduce { public: diff --git a/core/unit_test/TestTeamVectorRange.hpp b/core/unit_test/TestTeamVectorRange.hpp index 71351c19815..cc837851859 100644 --- a/core/unit_test/TestTeamVectorRange.hpp +++ b/core/unit_test/TestTeamVectorRange.hpp @@ -233,7 +233,7 @@ struct functor_teamvector_for { typedef typename ExecutionSpace::scratch_memory_space shmem_space; typedef Kokkos::View shared_int; - unsigned team_shmem_size(int team_size) const { + unsigned team_shmem_size(int /*team_size*/) const { return shared_int::shmem_size(131); } diff --git a/core/unit_test/TestTemplateMetaFunctions.hpp b/core/unit_test/TestTemplateMetaFunctions.hpp index 1e25d46dfeb..b9c16f506c2 100644 --- a/core/unit_test/TestTemplateMetaFunctions.hpp +++ b/core/unit_test/TestTemplateMetaFunctions.hpp @@ -58,7 +58,7 @@ struct SumPlain { SumPlain(type view_) : view(view_) {} KOKKOS_INLINE_FUNCTION - void operator()(int i, Scalar& val) { val += Scalar(); } + void operator()(int /*i*/, Scalar& val) { val += Scalar(); } }; template @@ -80,7 +80,7 @@ struct SumInitJoinFinalValueType { } KOKKOS_INLINE_FUNCTION - void operator()(int i, value_type& val) const { val += value_type(); } + void operator()(int /*i*/, value_type& val) const { val += value_type(); } }; template @@ -102,7 +102,7 @@ struct SumInitJoinFinalValueType2 { } KOKKOS_INLINE_FUNCTION - void operator()(int i, value_type& val) const { val += value_type(); } + void operator()(int /*i*/, value_type& val) const { val += value_type(); } }; template @@ -157,7 +157,7 @@ struct SumWrongInitJoinFinalValueType { } KOKKOS_INLINE_FUNCTION - void operator()(int i, value_type& val) const { val += value_type(); } + void operator()(int /*i*/, value_type& val) const { val += value_type(); } }; template diff --git a/core/unit_test/TestViewAPI.hpp b/core/unit_test/TestViewAPI.hpp index 36d158d643b..b9847773b60 100644 --- a/core/unit_test/TestViewAPI.hpp +++ b/core/unit_test/TestViewAPI.hpp @@ -841,6 +841,22 @@ struct TestViewMirror { ASSERT_EQ(a_org(5), a_h3(5)); } + template + static typename View::const_type view_const_cast(View const &v) { + return v; + } + + static void test_mirror_copy_const_data_type() { + using ExecutionSpace = typename DeviceType::execution_space; + int const N = 100; + Kokkos::View v("v", N); + Kokkos::deep_copy(v, 255); + auto v_m1 = Kokkos::create_mirror_view_and_copy( + Kokkos::DefaultHostExecutionSpace(), view_const_cast(v)); + auto v_m2 = Kokkos::create_mirror_view_and_copy(ExecutionSpace(), + view_const_cast(v)); + } + template struct CopyUnInit { typedef typename Kokkos::Impl::MirrorViewType< @@ -896,6 +912,7 @@ struct TestViewMirror { test_mirror_view >(); test_mirror_copy >(); test_mirror_copy >(); + test_mirror_copy_const_data_type(); test_mirror_no_initialize >(); test_mirror_no_initialize >(); } @@ -920,8 +937,6 @@ class TestViewAPI { dView4_unmanaged; typedef typename dView0::host_mirror_space host; - TestViewAPI() {} - static void run_test_view_operator_a() { { TestViewOperator f; @@ -1034,12 +1049,12 @@ class TestViewAPI { dView4 dx, dy, dz; hView4 hx, hy, hz; - ASSERT_TRUE(dx.data() == 0); - ASSERT_TRUE(dy.data() == 0); - ASSERT_TRUE(dz.data() == 0); - ASSERT_TRUE(hx.data() == 0); - ASSERT_TRUE(hy.data() == 0); - ASSERT_TRUE(hz.data() == 0); + ASSERT_TRUE(dx.data() == nullptr); + ASSERT_TRUE(dy.data() == nullptr); + ASSERT_TRUE(dz.data() == nullptr); + ASSERT_TRUE(hx.data() == nullptr); + ASSERT_TRUE(hy.data() == nullptr); + ASSERT_TRUE(hz.data() == nullptr); ASSERT_EQ(dx.extent(0), 0u); ASSERT_EQ(dy.extent(0), 0u); ASSERT_EQ(dz.extent(0), 0u); @@ -1096,11 +1111,11 @@ class TestViewAPI { ASSERT_EQ(dx.use_count(), size_t(2)); - ASSERT_FALSE(dx.data() == 0); - ASSERT_FALSE(const_dx.data() == 0); - ASSERT_FALSE(unmanaged_dx.data() == 0); - ASSERT_FALSE(unmanaged_from_ptr_dx.data() == 0); - ASSERT_FALSE(dy.data() == 0); + ASSERT_FALSE(dx.data() == nullptr); + ASSERT_FALSE(const_dx.data() == nullptr); + ASSERT_FALSE(unmanaged_dx.data() == nullptr); + ASSERT_FALSE(unmanaged_from_ptr_dx.data() == nullptr); + ASSERT_FALSE(dy.data() == nullptr); ASSERT_NE(dx, dy); ASSERT_EQ(dx.extent(0), unsigned(N0)); @@ -1139,6 +1154,7 @@ class TestViewAPI { Kokkos::deep_copy(typename hView4::execution_space(), dx, hx); Kokkos::deep_copy(typename hView4::execution_space(), dy, dx); Kokkos::deep_copy(typename hView4::execution_space(), hy, dy); + typename dView4::execution_space().fence(); for (size_t ip = 0; ip < N0; ++ip) for (size_t i1 = 0; i1 < N1; ++i1) @@ -1149,6 +1165,7 @@ class TestViewAPI { Kokkos::deep_copy(typename hView4::execution_space(), dx, T(0)); Kokkos::deep_copy(typename hView4::execution_space(), hx, dx); + typename dView4::execution_space().fence(); for (size_t ip = 0; ip < N0; ++ip) for (size_t i1 = 0; i1 < N1; ++i1) @@ -1172,6 +1189,7 @@ class TestViewAPI { Kokkos::deep_copy(typename dView4::execution_space(), dx, hx); Kokkos::deep_copy(typename dView4::execution_space(), dy, dx); Kokkos::deep_copy(typename dView4::execution_space(), hy, dy); + typename dView4::execution_space().fence(); for (size_t ip = 0; ip < N0; ++ip) for (size_t i1 = 0; i1 < N1; ++i1) @@ -1182,6 +1200,7 @@ class TestViewAPI { Kokkos::deep_copy(typename dView4::execution_space(), dx, T(0)); Kokkos::deep_copy(typename dView4::execution_space(), hx, dx); + typename dView4::execution_space().fence(); for (size_t ip = 0; ip < N0; ++ip) for (size_t i1 = 0; i1 < N1; ++i1) @@ -1233,19 +1252,19 @@ class TestViewAPI { ASSERT_NE(dx, dz); dx = dView4(); - ASSERT_TRUE(dx.data() == 0); - ASSERT_FALSE(dy.data() == 0); - ASSERT_FALSE(dz.data() == 0); + ASSERT_TRUE(dx.data() == nullptr); + ASSERT_FALSE(dy.data() == nullptr); + ASSERT_FALSE(dz.data() == nullptr); dy = dView4(); - ASSERT_TRUE(dx.data() == 0); - ASSERT_TRUE(dy.data() == 0); - ASSERT_FALSE(dz.data() == 0); + ASSERT_TRUE(dx.data() == nullptr); + ASSERT_TRUE(dy.data() == nullptr); + ASSERT_FALSE(dz.data() == nullptr); dz = dView4(); - ASSERT_TRUE(dx.data() == 0); - ASSERT_TRUE(dy.data() == 0); - ASSERT_TRUE(dz.data() == 0); + ASSERT_TRUE(dx.data() == nullptr); + ASSERT_TRUE(dy.data() == nullptr); + ASSERT_TRUE(dz.data() == nullptr); } static void run_test_deep_copy_empty() { @@ -1450,6 +1469,11 @@ class TestViewAPI { } static void run_test_error() { +#ifdef KOKKOS_ENABLE_OPENMPTARGET + if (std::is_same::value) + return; +#endif auto alloc_size = std::numeric_limits::max() - 42; try { auto should_always_fail = dView1("hello_world_failure", alloc_size); diff --git a/core/unit_test/TestViewAPI_e.hpp b/core/unit_test/TestViewAPI_e.hpp index e433535236d..40ae0836307 100644 --- a/core/unit_test/TestViewAPI_e.hpp +++ b/core/unit_test/TestViewAPI_e.hpp @@ -204,13 +204,13 @@ TEST(TEST_CATEGORY, anonymous_space) { test_anonymous_space(); } template struct TestViewOverloadResolution { // Overload based on value_type and rank - static int foo(Kokkos::View a) { return 1; } - static int foo(Kokkos::View a) { return 2; } - static int foo(Kokkos::View a) { return 3; } + static int foo(Kokkos::View /*a*/) { return 1; } + static int foo(Kokkos::View /*a*/) { return 2; } + static int foo(Kokkos::View /*a*/) { return 3; } // Overload based on compile time dimensions - static int bar(Kokkos::View a) { return 4; } - static int bar(Kokkos::View a) { return 5; } + static int bar(Kokkos::View /*a*/) { return 4; } + static int bar(Kokkos::View /*a*/) { return 5; } static void test_function_overload() { Kokkos::View::test_function_overload(); } } // namespace Test + +#include diff --git a/core/unit_test/TestViewCopy.hpp b/core/unit_test/TestViewCopy.hpp index 27018e7f7ba..6db2d3a4f6d 100644 --- a/core/unit_test/TestViewCopy.hpp +++ b/core/unit_test/TestViewCopy.hpp @@ -52,124 +52,446 @@ namespace Test { namespace { -template -struct TestViewCopy { - using InExecSpace = ExecSpace; - - static void test_view_copy(const int dim0, const int dim1, const int dim2) { -#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_ROCM) - // ExecSpace = CudaUVM, CudaHostPinned - // This test will fail at runtime with an illegal memory access if something - // goes wrong Test 1: deep_copy from host_mirror_space to ExecSpace and - // ExecSpace back to host_mirror_space - { - typedef Kokkos::View Rank4ViewType; - Rank4ViewType view_4; - view_4 = Rank4ViewType("view_4", dim0, dim1, dim2, dim2); - - typedef typename Kokkos::Impl::is_space< - InExecSpace>::host_mirror_space::execution_space host_space_type; - Kokkos::View srcView( - "srcView", dim2, dim2); - - // Strided dst view - auto dstView = - Kokkos::subview(view_4, 0, 0, Kokkos::ALL(), Kokkos::ALL()); - - // host_mirror_space to ExecSpace - Kokkos::deep_copy(dstView, srcView); - Kokkos::fence(); - - // ExecSpace to host_mirror_space - Kokkos::deep_copy(srcView, dstView); - Kokkos::fence(); +template +struct CheckResult { + using value_type = typename ViewType::non_const_value_type; + ViewType v; + value_type value; + CheckResult(ViewType v_, value_type value_) : v(v_), value(value_){}; + KOKKOS_FUNCTION + void operator()(const int i, int& lsum) const { + for (int j = 0; j < static_cast(v.extent(1)); j++) { + if (v.access(i, j) != value) lsum++; } + } +}; - // Test 2: deep_copy from Cuda to ExecSpace and ExecSpace back to Cuda - { - typedef Kokkos::View Rank4ViewType; - Rank4ViewType view_4; - view_4 = Rank4ViewType("view_4", dim0, dim1, dim2, dim2); - -#if defined(KOKKOS_ENABLE_CUDA) - typedef typename std::conditional< - Kokkos::Impl::MemorySpaceAccess< - Kokkos::CudaSpace, - typename InExecSpace::memory_space>::accessible, - Kokkos::CudaSpace, InExecSpace>::type space_type; -#endif -#if defined(KOKKOS_ENABLE_ROCM) - typedef typename std::conditional< - Kokkos::Impl::MemorySpaceAccess< - Kokkos::ROCmSpace, - typename InExecSpace::memory_space>::accessible, - Kokkos::ROCmSpace, InExecSpace>::type space_type; -#endif - Kokkos::View srcView( - "srcView", dim2, dim2); - - // Strided dst view - auto dstView = - Kokkos::subview(view_4, 0, 0, Kokkos::ALL(), Kokkos::ALL()); - - // Cuda to ExecSpace - Kokkos::deep_copy(dstView, srcView); - Kokkos::fence(); - - // ExecSpace to Cuda - Kokkos::deep_copy(srcView, dstView); - Kokkos::fence(); - } +template +bool run_check(ViewType v, typename ViewType::value_type value) { + using exec_space = typename ViewType::memory_space::execution_space; + int errors = 0; + Kokkos::fence(); + Kokkos::parallel_reduce(Kokkos::RangePolicy(0, v.extent(0)), + CheckResult(v, value), errors); + return errors == 0; +} - // Test 3: deep_copy from host_space to ExecSpace and ExecSpace back to - // host_space - { - typedef Kokkos::View Rank4ViewType; - Rank4ViewType view_4; - view_4 = Rank4ViewType("view_4", dim0, dim1, dim2, dim2); +} // namespace - typedef Kokkos::HostSpace host_space_type; - Kokkos::View srcView( - "srcView", dim2, dim2); +TEST(TEST_CATEGORY, view_copy_tests) { + int N = 10000; + int M = 10; - // Strided dst view - auto dstView = - Kokkos::subview(view_4, 0, 0, Kokkos::ALL(), Kokkos::ALL()); + Kokkos::View defaulted; + Kokkos::View a("A", N, M); + Kokkos::View b("B", N, M); + auto h_a = Kokkos::create_mirror(a); + auto h_b = Kokkos::create_mirror(b); + auto m_a = Kokkos::create_mirror_view(a); + auto s_a = Kokkos::subview(a, Kokkos::ALL, 1); + auto s_b = Kokkos::subview(b, Kokkos::ALL, 1); + auto hs_a = Kokkos::subview(h_a, Kokkos::ALL, 1); + auto hs_b = Kokkos::subview(h_b, Kokkos::ALL, 1); + auto dev = typename TEST_EXECSPACE::execution_space(); + auto host = Kokkos::DefaultHostExecutionSpace(); + + constexpr bool DevExecCanAccessHost = + Kokkos::Impl::SpaceAccessibility::accessible; + + constexpr bool HostExecCanAccessDev = Kokkos::Impl::SpaceAccessibility< + typename Kokkos::HostSpace::execution_space, + typename TEST_EXECSPACE::memory_space>::accessible; + + // Contiguous copies + { Kokkos::deep_copy(defaulted, defaulted); } + { + Kokkos::deep_copy(a, 1); + ASSERT_TRUE(run_check(a, 1)); + } + { + Kokkos::deep_copy(a, a); + ASSERT_TRUE(run_check(a, 1)); + } + { + Kokkos::deep_copy(m_a, a); + ASSERT_TRUE(run_check(m_a, 1)); + } + { + Kokkos::deep_copy(m_a, 2); + ASSERT_TRUE(run_check(m_a, 2)); + } + { + Kokkos::deep_copy(a, m_a); + ASSERT_TRUE(run_check(a, 2)); + } + { + Kokkos::deep_copy(b, 3); + ASSERT_TRUE(run_check(b, 3)); + } + { + Kokkos::deep_copy(h_a, 4); + ASSERT_TRUE(run_check(h_a, 4)); + } + { + Kokkos::deep_copy(a, b); + ASSERT_TRUE(run_check(a, 3)); + } + { + Kokkos::deep_copy(h_b, h_a); + ASSERT_TRUE(run_check(h_b, 4)); + } + { + Kokkos::deep_copy(h_a, a); + ASSERT_TRUE(run_check(h_a, 3)); + } + { + Kokkos::deep_copy(b, h_b); + ASSERT_TRUE(run_check(b, 4)); + } + // Non contiguous copies + { + Kokkos::deep_copy(s_a, 5); + ASSERT_TRUE(run_check(s_a, 5)); + } + { + Kokkos::deep_copy(hs_a, 6); + ASSERT_TRUE(run_check(hs_a, 6)); + } + { + Kokkos::deep_copy(s_b, s_a); + ASSERT_TRUE(run_check(s_b, 5)); + } + { + Kokkos::deep_copy(hs_b, hs_a); + ASSERT_TRUE(run_check(hs_b, 6)); + } + if (DevExecCanAccessHost || HostExecCanAccessDev) { + { + Kokkos::deep_copy(hs_b, s_b); + ASSERT_TRUE(run_check(hs_b, 5)); + } + { + Kokkos::deep_copy(s_a, hs_a); + ASSERT_TRUE(run_check(s_a, 6)); + } + } - // host_space to ExecSpace - Kokkos::deep_copy(dstView, srcView); - Kokkos::fence(); + // Contiguous copies + { Kokkos::deep_copy(dev, defaulted, defaulted); } + { + Kokkos::deep_copy(dev, a, 1); + ASSERT_TRUE(run_check(a, 1)); + } + { + Kokkos::deep_copy(dev, a, a); + ASSERT_TRUE(run_check(a, 1)); + } + { + Kokkos::deep_copy(dev, m_a, a); + ASSERT_TRUE(run_check(m_a, 1)); + } + { + Kokkos::deep_copy(dev, m_a, 2); + ASSERT_TRUE(run_check(m_a, 2)); + } + { + Kokkos::deep_copy(dev, a, m_a); + ASSERT_TRUE(run_check(a, 2)); + } + { + Kokkos::deep_copy(dev, b, 3); + ASSERT_TRUE(run_check(b, 3)); + } + { + Kokkos::deep_copy(dev, h_a, 4); + ASSERT_TRUE(run_check(h_a, 4)); + } + { + Kokkos::deep_copy(dev, a, b); + ASSERT_TRUE(run_check(a, 3)); + } + { + Kokkos::deep_copy(dev, h_b, h_a); + ASSERT_TRUE(run_check(h_b, 4)); + } + { + Kokkos::deep_copy(dev, h_a, a); + ASSERT_TRUE(run_check(h_a, 3)); + } + { + Kokkos::deep_copy(dev, b, h_b); + ASSERT_TRUE(run_check(b, 4)); + } + // Non contiguous copies + { + Kokkos::deep_copy(dev, s_a, 5); + ASSERT_TRUE(run_check(s_a, 5)); + } + { + Kokkos::deep_copy(dev, hs_a, 6); + ASSERT_TRUE(run_check(hs_a, 6)); + } + { + Kokkos::deep_copy(dev, s_b, s_a); + ASSERT_TRUE(run_check(s_b, 5)); + } + { + Kokkos::deep_copy(dev, hs_b, hs_a); + ASSERT_TRUE(run_check(hs_b, 6)); + } + if (DevExecCanAccessHost || HostExecCanAccessDev) { + { + Kokkos::deep_copy(dev, hs_b, s_b); + ASSERT_TRUE(run_check(hs_b, 5)); + } + { + Kokkos::deep_copy(dev, s_a, hs_a); + ASSERT_TRUE(run_check(s_a, 6)); + } + } - // ExecSpace to host_space - Kokkos::deep_copy(srcView, dstView); - Kokkos::fence(); + // Contiguous copies + { Kokkos::deep_copy(host, defaulted, defaulted); } + { + Kokkos::deep_copy(host, a, 1); + ASSERT_TRUE(run_check(a, 1)); + } + { + Kokkos::deep_copy(host, a, a); + ASSERT_TRUE(run_check(a, 1)); + } + { + Kokkos::deep_copy(host, m_a, a); + ASSERT_TRUE(run_check(m_a, 1)); + } + { + Kokkos::deep_copy(host, m_a, 2); + ASSERT_TRUE(run_check(m_a, 2)); + } + { + Kokkos::deep_copy(host, a, m_a); + ASSERT_TRUE(run_check(a, 2)); + } + { + Kokkos::deep_copy(host, b, 3); + ASSERT_TRUE(run_check(b, 3)); + } + { + Kokkos::deep_copy(host, h_a, 4); + ASSERT_TRUE(run_check(h_a, 4)); + } + { + Kokkos::deep_copy(host, a, b); + ASSERT_TRUE(run_check(a, 3)); + } + { + Kokkos::deep_copy(host, h_b, h_a); + ASSERT_TRUE(run_check(h_b, 4)); + } + { + Kokkos::deep_copy(host, h_a, a); + ASSERT_TRUE(run_check(h_a, 3)); + } + { + Kokkos::deep_copy(host, b, h_b); + ASSERT_TRUE(run_check(b, 4)); + } + // Non contiguous copies + { + Kokkos::deep_copy(host, s_a, 5); + ASSERT_TRUE(run_check(s_a, 5)); + } + { + Kokkos::deep_copy(host, hs_a, 6); + ASSERT_TRUE(run_check(hs_a, 6)); + } + { + Kokkos::deep_copy(host, s_b, s_a); + ASSERT_TRUE(run_check(s_b, 5)); + } + { + Kokkos::deep_copy(host, hs_b, hs_a); + ASSERT_TRUE(run_check(hs_b, 6)); + } + if (DevExecCanAccessHost || HostExecCanAccessDev) { + { + Kokkos::deep_copy(host, hs_b, s_b); + ASSERT_TRUE(run_check(hs_b, 5)); } -#endif - } // end test_view_copy + { + Kokkos::deep_copy(host, s_a, hs_a); + ASSERT_TRUE(run_check(s_a, 6)); + } + } +} -}; // end struct +TEST(TEST_CATEGORY, view_copy_tests_rank_0) { + Kokkos::View defaulted; + Kokkos::View a("A"); + Kokkos::View b("B"); + auto h_a = Kokkos::create_mirror(a); + auto h_b = Kokkos::create_mirror(b); + auto m_a = Kokkos::create_mirror_view(a); + auto dev = typename TEST_EXECSPACE::execution_space(); + auto host = Kokkos::DefaultHostExecutionSpace(); -} // namespace + // No execution space + { Kokkos::deep_copy(defaulted, defaulted); } + { + Kokkos::deep_copy(a, 1); + ASSERT_TRUE(run_check(a, 1)); + } + { + Kokkos::deep_copy(a, a); + ASSERT_TRUE(run_check(a, 1)); + } + { + Kokkos::deep_copy(m_a, a); + ASSERT_TRUE(run_check(m_a, 1)); + } + { + Kokkos::deep_copy(m_a, 2); + ASSERT_TRUE(run_check(m_a, 2)); + } + { + Kokkos::deep_copy(a, m_a); + ASSERT_TRUE(run_check(a, 2)); + } + { + Kokkos::deep_copy(b, 3); + ASSERT_TRUE(run_check(b, 3)); + } + { + Kokkos::deep_copy(h_a, 4); + ASSERT_TRUE(run_check(h_a, 4)); + } + { + Kokkos::deep_copy(a, b); + ASSERT_TRUE(run_check(a, 3)); + } + { + Kokkos::deep_copy(h_b, h_a); + ASSERT_TRUE(run_check(h_b, 4)); + } + { + Kokkos::deep_copy(h_a, a); + ASSERT_TRUE(run_check(h_a, 3)); + } + { + Kokkos::deep_copy(b, h_b); + ASSERT_TRUE(run_check(b, 4)); + } -TEST(TEST_CATEGORY, view_copy_tests) { - // Only include this file to be compiled with CudaUVM and CudaHostPinned - TestViewCopy::test_view_copy(4, 2, 3); - TestViewCopy::test_view_copy(4, 2, 0); + // Device + { Kokkos::deep_copy(dev, defaulted, defaulted); } + { + Kokkos::deep_copy(dev, a, 1); + ASSERT_TRUE(run_check(a, 1)); + } + { + Kokkos::deep_copy(dev, a, a); + ASSERT_TRUE(run_check(a, 1)); + } + { + Kokkos::deep_copy(dev, m_a, a); + ASSERT_TRUE(run_check(m_a, 1)); + } + { + Kokkos::deep_copy(dev, m_a, 2); + ASSERT_TRUE(run_check(m_a, 2)); + } + { + Kokkos::deep_copy(dev, a, m_a); + ASSERT_TRUE(run_check(a, 2)); + } + { + Kokkos::deep_copy(dev, b, 3); + ASSERT_TRUE(run_check(b, 3)); + } + { + Kokkos::deep_copy(dev, h_a, 4); + ASSERT_TRUE(run_check(h_a, 4)); + } + { + Kokkos::deep_copy(dev, a, b); + ASSERT_TRUE(run_check(a, 3)); + } + { + Kokkos::deep_copy(dev, h_b, h_a); + ASSERT_TRUE(run_check(h_b, 4)); + } + { + Kokkos::deep_copy(dev, h_a, a); + ASSERT_TRUE(run_check(h_a, 3)); + } + { + Kokkos::deep_copy(dev, b, h_b); + ASSERT_TRUE(run_check(b, 4)); + } + + // Host + { Kokkos::deep_copy(host, defaulted, defaulted); } + { + Kokkos::deep_copy(host, a, 1); + ASSERT_TRUE(run_check(a, 1)); + } + { + Kokkos::deep_copy(host, a, a); + ASSERT_TRUE(run_check(a, 1)); + } + { + Kokkos::deep_copy(host, m_a, a); + ASSERT_TRUE(run_check(m_a, 1)); + } + { + Kokkos::deep_copy(host, m_a, 2); + ASSERT_TRUE(run_check(m_a, 2)); + } + { + Kokkos::deep_copy(host, a, m_a); + ASSERT_TRUE(run_check(a, 2)); + } + { + Kokkos::deep_copy(host, b, 3); + ASSERT_TRUE(run_check(b, 3)); + } + { + Kokkos::deep_copy(host, h_a, 4); + ASSERT_TRUE(run_check(h_a, 4)); + } + { + Kokkos::deep_copy(host, a, b); + ASSERT_TRUE(run_check(a, 3)); + } + { + Kokkos::deep_copy(host, h_b, h_a); + ASSERT_TRUE(run_check(h_b, 4)); + } + { + Kokkos::deep_copy(host, h_a, a); + ASSERT_TRUE(run_check(h_a, 3)); + } + { + Kokkos::deep_copy(host, b, h_b); + ASSERT_TRUE(run_check(b, 4)); + } } TEST(TEST_CATEGORY, view_copy_degenerated) { - // Only include this file to be compiled with CudaUVM and CudaHostPinned - Kokkos::View> v_um_def_1; - Kokkos::View> v_um_1( - reinterpret_cast(-1), 0); - Kokkos::View v_m_def_1; - Kokkos::View v_m_1("v_m_1", 0); - - Kokkos::View> v_um_def_2; - Kokkos::View> v_um_2( - reinterpret_cast(-1), 0); - Kokkos::View v_m_def_2; - Kokkos::View v_m_2("v_m_2", 0); + Kokkos::View> + v_um_def_1; + Kokkos::View> + v_um_1(reinterpret_cast(-1), 0); + Kokkos::View v_m_def_1; + Kokkos::View v_m_1("v_m_1", 0); + + Kokkos::View> + v_um_def_2; + Kokkos::View> + v_um_2(reinterpret_cast(-1), 0); + Kokkos::View v_m_def_2; + Kokkos::View v_m_2("v_m_2", 0); Kokkos::deep_copy(v_um_def_1, v_um_def_2); Kokkos::deep_copy(v_um_def_1, v_um_2); diff --git a/core/unit_test/TestViewCtorPropEmbeddedDim.hpp b/core/unit_test/TestViewCtorPropEmbeddedDim.hpp index 920278f4d0f..30701b3a4e2 100644 --- a/core/unit_test/TestViewCtorPropEmbeddedDim.hpp +++ b/core/unit_test/TestViewCtorPropEmbeddedDim.hpp @@ -60,7 +60,7 @@ struct TestViewCtorProp_EmbeddedDim { using ViewIntType = typename Kokkos::View; using ViewDoubleType = typename Kokkos::View; - // Cuda 7.0 has issues with using a lamda in parallel_for to initialize the + // Cuda 7.0 has issues with using a lambda in parallel_for to initialize the // view - replace with this functor template struct Functor { diff --git a/core/unit_test/TestViewIsAssignable.hpp b/core/unit_test/TestViewIsAssignable.hpp new file mode 100644 index 00000000000..fcf9f75f37a --- /dev/null +++ b/core/unit_test/TestViewIsAssignable.hpp @@ -0,0 +1,145 @@ +#include + +namespace Test { +namespace Impl { +template +struct TestAssignability { + using mapping_type = + Kokkos::Impl::ViewMapping; + + template + static void try_assign( + ViewTypeDst& dst, ViewTypeSrc& src, + typename std::enable_if::type* = nullptr) { + dst = src; + } + + template + static void try_assign( + ViewTypeDst&, ViewTypeSrc&, + typename std::enable_if::type* = nullptr) { + Kokkos::Impl::throw_runtime_exception( + "TestAssignability::try_assign: Unexpected call path"); + } + + template + static void test(bool always, bool sometimes, Dimensions... dims) { + ViewTypeDst dst; + ViewTypeSrc src("SRC", dims...); + + bool is_always_assignable = + Kokkos::is_always_assignable::value; + bool is_assignable = Kokkos::is_assignable(dst, src); + + // Print out if there is an error with typeid so you can just filter the + // output with c++filt -t to see which assignment causes the error. + if (is_always_assignable != always || is_assignable != sometimes) + printf( + "is_always_assignable: %i (%i), is_assignable: %i (%i) [ %s ] to [ " + "%s ]\n", + is_always_assignable ? 1 : 0, always ? 1 : 0, is_assignable ? 1 : 0, + sometimes ? 1 : 0, typeid(ViewTypeSrc).name(), + typeid(ViewTypeDst).name()); + if (sometimes) { + ASSERT_NO_THROW(try_assign(dst, src)); + } + ASSERT_EQ(always, is_always_assignable); + ASSERT_EQ(sometimes, is_assignable); + } +}; + +} // namespace Impl + +TEST(TEST_CATEGORY, view_is_assignable) { + using namespace Kokkos; + using h_exec = typename DefaultHostExecutionSpace::memory_space; + using d_exec = typename TEST_EXECSPACE::memory_space; + using left = LayoutLeft; + using right = LayoutRight; + using stride = LayoutStride; + // Static/Dynamic Extents + Impl::TestAssignability, + View>::test(true, true, 10); + Impl::TestAssignability, + View>::test(false, true, 10); + Impl::TestAssignability, + View>::test(false, false, 10); + Impl::TestAssignability, + View>::test(true, true); + Impl::TestAssignability, + View>::test(true, true); + Impl::TestAssignability, + View>::test(false, false); + Impl::TestAssignability, + View>::test(true, true, 10, 10); + Impl::TestAssignability, + View>::test(false, true, 10, 10); + Impl::TestAssignability, + View>::test(false, false, 10, + 10); + Impl::TestAssignability, + View>::test(true, true, 10); + Impl::TestAssignability, + View>::test(true, true, 10); + Impl::TestAssignability, + View>::test(false, false, + 10); + + // Mismatch value_type + Impl::TestAssignability, + View>::test(false, false, 10); + + // Layout assignment + Impl::TestAssignability, + View>::test(true, true, 10); + + // This could be made possible (due to the degenerate nature of the views) but + // we do not allow this yet + // TestAssignability,View>::test(false,true,10,1); + Impl::TestAssignability, + View>::test(false, false, 10, + 2); + Impl::TestAssignability, + View>::test(true, true, 10, 2); + Impl::TestAssignability, + View>::test(true, true, 10, 2); + + // Space Assignment + bool expected = Kokkos::Impl::MemorySpaceAccess::assignable; + Impl::TestAssignability, + View>::test(expected, expected, + 10); + expected = Kokkos::Impl::MemorySpaceAccess::assignable; + Impl::TestAssignability, + View>::test(expected, expected, + 10); + + // reference type and const-qualified types + using SomeViewType = View; +#if defined(KOKKOS_ENABLE_CXX17) + static_assert(is_always_assignable_v); + static_assert(is_always_assignable_v); + static_assert(is_always_assignable_v); + static_assert(is_always_assignable_v); + static_assert(is_always_assignable_v); + static_assert(is_always_assignable_v); + static_assert(is_always_assignable_v); + static_assert(is_always_assignable_v); +#else + static_assert(is_always_assignable::value, ""); + static_assert(is_always_assignable::value, ""); + static_assert(is_always_assignable::value, + ""); + static_assert(is_always_assignable::value, + ""); + static_assert(is_always_assignable::value, ""); + static_assert(is_always_assignable::value, ""); + static_assert(is_always_assignable::value, + ""); + static_assert(is_always_assignable::value, + ""); +#endif +} +} // namespace Test diff --git a/core/unit_test/TestViewLayoutStrideAssignment.hpp b/core/unit_test/TestViewLayoutStrideAssignment.hpp index 5e373dbbc91..583d135f351 100644 --- a/core/unit_test/TestViewLayoutStrideAssignment.hpp +++ b/core/unit_test/TestViewLayoutStrideAssignment.hpp @@ -56,7 +56,7 @@ namespace Test { TEST(TEST_CATEGORY, view_layoutstride_left_to_layoutleft_assignment) { typedef TEST_EXECSPACE exec_space; - auto t = time(0); + auto t = time(nullptr); srand(t); // Use current time as seed for random generator printf("view_layoutstride_left_to_layoutleft_assignment: srand(%lu)\n", size_t(t)); @@ -338,7 +338,7 @@ TEST(TEST_CATEGORY, view_layoutstride_left_to_layoutleft_assignment) { TEST(TEST_CATEGORY, view_layoutstride_right_to_layoutright_assignment) { typedef TEST_EXECSPACE exec_space; - auto t = time(0); + auto t = time(nullptr); srand(t); // Use current time as seed for random generator printf("view_layoutstride_right_to_layoutright_assignment: srand(%lu)\n", size_t(t)); @@ -620,7 +620,7 @@ TEST(TEST_CATEGORY, view_layoutstride_right_to_layoutright_assignment) { TEST(TEST_CATEGORY_DEATH, view_layoutstride_right_to_layoutleft_assignment) { typedef TEST_EXECSPACE exec_space; - auto t = time(0); + auto t = time(nullptr); srand(t); // Use current time as seed for random generator printf("view_layoutstride_right_to_layoutleft_assignment: srand(%lu)\n", size_t(t)); @@ -771,7 +771,7 @@ TEST(TEST_CATEGORY_DEATH, view_layoutstride_right_to_layoutleft_assignment) { TEST(TEST_CATEGORY_DEATH, view_layoutstride_left_to_layoutright_assignment) { typedef TEST_EXECSPACE exec_space; - auto t = time(0); + auto t = time(nullptr); srand(t); // Use current time as seed for random generator printf("view_layoutstride_left_to_layoutright_assignment: srand(%lu)\n", size_t(t)); diff --git a/core/unit_test/TestViewLayoutTiled.hpp b/core/unit_test/TestViewLayoutTiled.hpp index 2526a3e7e3c..75eef2d69e9 100644 --- a/core/unit_test/TestViewLayoutTiled.hpp +++ b/core/unit_test/TestViewLayoutTiled.hpp @@ -112,9 +112,10 @@ struct TestViewLayoutTiled { Kokkos::Iterate::Right, Kokkos::Iterate::Right, T0, T1, T2, T3> LayoutRR_4D_2x4x4x2; +#if !defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) + static void test_view_layout_tiled_2d(const int, const int) { +#else static void test_view_layout_tiled_2d(const int N0, const int N1) { -#if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) -#if !defined(KOKKOS_ENABLE_CUDA) || (8000 <= CUDA_VERSION) const int FT = T0 * T1; const int NT0 = int(std::ceil(N0 / T0)); @@ -382,15 +383,14 @@ struct TestViewLayoutTiled { ASSERT_EQ(counter_subview, long(0)); ASSERT_EQ(counter_inc, long(0)); } // end scope -#endif #endif } // end test_view_layout_tiled_2d +#if !defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) + static void test_view_layout_tiled_3d(const int, const int, const int) { +#else static void test_view_layout_tiled_3d(const int N0, const int N1, const int N2) { -#if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) -#if !defined(KOKKOS_ENABLE_CUDA) || (8000 <= CUDA_VERSION) - const int FT = T0 * T1 * T2; const int NT0 = int(std::ceil(N0 / T0)); @@ -680,14 +680,15 @@ struct TestViewLayoutTiled { ASSERT_EQ(counter_subview, long(0)); ASSERT_EQ(counter_inc, long(0)); } // end scope -#endif #endif } // end test_view_layout_tiled_3d +#if !defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) + static void test_view_layout_tiled_4d(const int, const int, const int, + const int){ +#else static void test_view_layout_tiled_4d(const int N0, const int N1, const int N2, const int N3) { -#if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) -#if !defined(KOKKOS_ENABLE_CUDA) || (8000 <= CUDA_VERSION) const int FT = T0 * T1 * T2 * T3; const int NT0 = int(std::ceil(N0 / T0)); @@ -1027,7 +1028,6 @@ struct TestViewLayoutTiled { ASSERT_EQ(counter_subview, long(0)); ASSERT_EQ(counter_inc, long(0)); } // end scope -#endif #endif } // end test_view_layout_tiled_4d diff --git a/core/unit_test/TestViewMapping_a.hpp b/core/unit_test/TestViewMapping_a.hpp index 5e27cc5e51f..7bd6353c2b8 100644 --- a/core/unit_test/TestViewMapping_a.hpp +++ b/core/unit_test/TestViewMapping_a.hpp @@ -77,7 +77,9 @@ void test_view_mapping() { typedef Kokkos::Impl::ViewDimension<0, 0, 0, 0, 0, 0, 0, 0> dim_s0_s0_s0_s0_s0_s0_s0_s0; - // Fully static dimensions should not be larger than an int. +// Fully static dimensions should not be larger than an int. +#ifndef _WIN32 // For some reason on Windows the first test here fails with + // size being 7 bytes on windows??? ASSERT_LE(sizeof(dim_0), sizeof(int)); ASSERT_LE(sizeof(dim_s2), sizeof(int)); ASSERT_LE(sizeof(dim_s2_s3), sizeof(int)); @@ -98,7 +100,7 @@ void test_view_mapping() { ASSERT_EQ(sizeof(dim_s0_s0_s0_s0_s0_s0), 6 * sizeof(unsigned)); ASSERT_LE(sizeof(dim_s0_s0_s0_s0_s0_s0_s0), 8 * sizeof(unsigned)); ASSERT_EQ(sizeof(dim_s0_s0_s0_s0_s0_s0_s0_s0), 8 * sizeof(unsigned)); - +#endif static_assert(int(dim_0::rank) == int(0), ""); static_assert(int(dim_0::rank_dynamic) == int(0), ""); static_assert(int(dim_0::ArgN0) == 1, ""); @@ -900,7 +902,7 @@ void test_view_mapping() { ASSERT_TRUE(offset.span_is_contiguous()); Kokkos::Impl::ViewMapping v( - Kokkos::Impl::ViewCtorProp((int*)0), stride); + Kokkos::Impl::ViewCtorProp(nullptr), stride); } { @@ -1076,17 +1078,20 @@ void test_view_mapping() { typedef typename Kokkos::Impl::HostMirror::Space::execution_space host_exec_space; - Kokkos::parallel_for( - Kokkos::RangePolicy(0, 10), KOKKOS_LAMBDA(int) { - // 'a' is captured by copy, and the capture mechanism converts 'a' to + int errors = 0; + Kokkos::parallel_reduce( + Kokkos::RangePolicy(0, 10), + KOKKOS_LAMBDA(int, int& e) { // an unmanaged copy. When the parallel dispatch accepts a move for // the lambda, this count should become 1. - ASSERT_EQ(a.use_count(), 2); + if (a.use_count() != 2) ++e; V x = a; - ASSERT_EQ(a.use_count(), 2); - ASSERT_EQ(x.use_count(), 2); - }); + if (a.use_count() != 2) ++e; + if (x.use_count() != 2) ++e; + }, + errors); + ASSERT_EQ(errors, 0); #endif // #if !defined( KOKKOS_ENABLE_CUDA_LAMBDA ) } } @@ -1106,7 +1111,7 @@ struct TestViewMapOperator { #endif KOKKOS_INLINE_FUNCTION - void test_left(size_t i0, long& error_count) const { + void test_left(size_t i0, int64_t& error_count) const { #ifdef KOKKOS_ENABLE_DEPPRECATED_CODE typename ViewType::value_type* const base_ptr = &v(0, 0, 0, 0, 0, 0, 0, 0); #else @@ -1121,7 +1126,7 @@ struct TestViewMapOperator { const size_t n6 = v.extent(6); const size_t n7 = v.extent(7); - long offset = 0; + int64_t offset = 0; for (size_t i7 = 0; i7 < n7; ++i7) for (size_t i6 = 0; i6 < n6; ++i6) @@ -1131,9 +1136,10 @@ struct TestViewMapOperator { for (size_t i2 = 0; i2 < n2; ++i2) for (size_t i1 = 0; i1 < n1; ++i1) { #ifdef KOKKOS_ENABLE_DEPRECATED_CODE - const long d = &v(i0, i1, i2, i3, i4, i5, i6, i7) - base_ptr; + const int64_t d = + &v(i0, i1, i2, i3, i4, i5, i6, i7) - base_ptr; #else - const long d = + const int64_t d = &v.access(i0, i1, i2, i3, i4, i5, i6, i7) - base_ptr; #endif if (d < offset) ++error_count; @@ -1144,7 +1150,7 @@ struct TestViewMapOperator { } KOKKOS_INLINE_FUNCTION - void test_right(size_t i0, long& error_count) const { + void test_right(size_t i0, int64_t& error_count) const { #ifdef KOKKOS_ENABLE_DEPRECATED_CODE typename ViewType::value_type* const base_ptr = &v(0, 0, 0, 0, 0, 0, 0, 0); #else @@ -1159,7 +1165,7 @@ struct TestViewMapOperator { const size_t n6 = v.extent(6); const size_t n7 = v.extent(7); - long offset = 0; + int64_t offset = 0; for (size_t i1 = 0; i1 < n1; ++i1) for (size_t i2 = 0; i2 < n2; ++i2) @@ -1169,9 +1175,10 @@ struct TestViewMapOperator { for (size_t i6 = 0; i6 < n6; ++i6) for (size_t i7 = 0; i7 < n7; ++i7) { #ifdef KOKKOS_ENABLE_DEPRECATED_CODE - const long d = &v(i0, i1, i2, i3, i4, i5, i6, i7) - base_ptr; + const int64_t d = + &v(i0, i1, i2, i3, i4, i5, i6, i7) - base_ptr; #else - const long d = + const int64_t d = &v.access(i0, i1, i2, i3, i4, i5, i6, i7) - base_ptr; #endif if (d < offset) ++error_count; @@ -1182,7 +1189,7 @@ struct TestViewMapOperator { } KOKKOS_INLINE_FUNCTION - void operator()(size_t i, long& error_count) const { + void operator()(size_t i, int64_t& error_count) const { if (std::is_same::value) { test_left(i, error_count); @@ -1245,7 +1252,7 @@ struct TestViewMapOperator { v.extent(4) * v.extent(5) * v.extent(6) * v.extent(7), v.span()); - long error_count; + int64_t error_count; Kokkos::RangePolicy range(0, v.extent(0)); Kokkos::parallel_reduce(range, *this, error_count); diff --git a/core/unit_test/TestView_64bit.hpp b/core/unit_test/TestView_64bit.hpp index 0b7b9d425aa..7dc47ccb0f2 100644 --- a/core/unit_test/TestView_64bit.hpp +++ b/core/unit_test/TestView_64bit.hpp @@ -55,7 +55,7 @@ void test_64bit() { Kokkos::parallel_reduce( Kokkos::RangePolicy>(0, N), - KOKKOS_LAMBDA(const int64_t& i, int64_t& lsum) { lsum += 1; }, sum); + KOKKOS_LAMBDA(const int64_t& /*i*/, int64_t& lsum) { lsum += 1; }, sum); ASSERT_EQ(N, sum); } { diff --git a/core/unit_test/TestWorkGraph.hpp b/core/unit_test/TestWorkGraph.hpp index 3ccab7f0eb8..5483051645f 100644 --- a/core/unit_test/TestWorkGraph.hpp +++ b/core/unit_test/TestWorkGraph.hpp @@ -56,7 +56,7 @@ namespace { the N-th fibonacci number as follows: - Each "task" or "work item" computes the i-th fibonacci number - If a task as (i < 2), it will record the known answer ahead of time. - - If a taks has (i >= 2), it will "spawn" two more tasks to compute + - If a task has (i >= 2), it will "spawn" two more tasks to compute the (i - 1) and (i - 2) fibonacci numbers. We do NOT do any de-duplication of these tasks. De-duplication would result in only (N - 2) tasks which must be run in diff --git a/core/unit_test/UnitTestConfig.make b/core/unit_test/UnitTestConfig.make index 97f4af5a8b7..5c93bf69fbd 100644 --- a/core/unit_test/UnitTestConfig.make +++ b/core/unit_test/UnitTestConfig.make @@ -8,7 +8,7 @@ KOKKOS_ARCH_OPTIONS="None AMDAVX ARMv80 ARMv81 ARMv8-ThunderX \ Maxwell Maxwell50 Maxwell52 Maxwell53 Pascal60 Pascal61" #KOKKOS_ARCH_OPTIONS="AMDAVX" -KOKKOS_DEVICE_OPTIONS="Cuda ROCm OpenMP Pthread Serial Qthreads" +KOKKOS_DEVICE_OPTIONS="Cuda ROCm OpenMP Pthread Serial" #KOKKOS_DEVICE_OPTIONS="Cuda" # Configure paths to enable environment query in Makefile.kokkos to work diff --git a/core/unit_test/config/cmaketest/CMakeLists.txt b/core/unit_test/config/cmaketest/CMakeLists.txt index 72fef0a3e81..5d590173944 100644 --- a/core/unit_test/config/cmaketest/CMakeLists.txt +++ b/core/unit_test/config/cmaketest/CMakeLists.txt @@ -56,20 +56,20 @@ foreach(KOKKOS_HOST_ARCH ${KOKKOS_HOST_ARCH_LIST}) set(NEWCONFH ${PREFIX}${KOKKOS_CONFIG_HEADER}) file(RENAME ${KOKKOS_CMAKEFILE} ${NEWCMAKE}) file(RENAME ${KOKKOS_CONFIG_HEADER} ${NEWCONFH}) - - add_test(NAME ${NEWCMAKE}-test + + add_test(NAME ${NEWCMAKE}-test COMMAND ${KOKKOS_TESTDIR}/testmake.sh ${NEWCMAKE} ${KOKKOS_HOST_ARCH} ${KOKKOS_DEV} WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} ) - set_tests_properties(${NEWCMAKE}-test + set_tests_properties(${NEWCMAKE}-test PROPERTIES PASS_REGULAR_EXPRESSION Passed TIMEOUT 15 ) - add_test(NAME ${NEWCONFH}-test - COMMAND ${KOKKOS_TESTDIR}/diffconfig.sh ${NEWCONFH} + add_test(NAME ${NEWCONFH}-test + COMMAND ${KOKKOS_TESTDIR}/diffconfig.sh ${NEWCONFH} WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} ) - set_tests_properties(${NEWCONFH}-test + set_tests_properties(${NEWCONFH}-test PROPERTIES PASS_REGULAR_EXPRESSION Passed TIMEOUT 15 ) diff --git a/core/unit_test/config/results/AMDAVX_Qthreads_KokkosCore_config.h b/core/unit_test/config/results/AMDAVX_Qthreads_KokkosCore_config.h deleted file mode 100644 index fb5d2146300..00000000000 --- a/core/unit_test/config/results/AMDAVX_Qthreads_KokkosCore_config.h +++ /dev/null @@ -1,17 +0,0 @@ -/* --------------------------------------------- -Makefile constructed configuration: -Fri Sep 22 17:22:11 MDT 2017 -----------------------------------------------*/ -#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) -#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." -#else -#define KOKKOS_CORE_CONFIG_H -#endif -/* Execution Spaces */ -#define KOKKOS_HAVE_QTHREADS 1 -/* General Settings */ -#define KOKKOS_HAVE_CXX11 1 -#define KOKKOS_ENABLE_PROFILING -/* Optimization Settings */ -/* Cuda Settings */ -#define KOKKOS_ARCH_AVX 1 diff --git a/core/unit_test/config/results/ARMv8-ThunderX_Qthreads_KokkosCore_config.h b/core/unit_test/config/results/ARMv8-ThunderX_Qthreads_KokkosCore_config.h deleted file mode 100644 index 3865bc4a9a7..00000000000 --- a/core/unit_test/config/results/ARMv8-ThunderX_Qthreads_KokkosCore_config.h +++ /dev/null @@ -1,18 +0,0 @@ -/* --------------------------------------------- -Makefile constructed configuration: -Fri Sep 22 17:22:20 MDT 2017 -----------------------------------------------*/ -#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) -#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." -#else -#define KOKKOS_CORE_CONFIG_H -#endif -/* Execution Spaces */ -#define KOKKOS_HAVE_QTHREADS 1 -/* General Settings */ -#define KOKKOS_HAVE_CXX11 1 -#define KOKKOS_ENABLE_PROFILING -/* Optimization Settings */ -/* Cuda Settings */ -#define KOKKOS_ARCH_ARMV80 1 -#define KOKKOS_ARCH_ARMV8_THUNDERX 1 diff --git a/core/unit_test/config/results/ARMv80_Qthreads_KokkosCore_config.h b/core/unit_test/config/results/ARMv80_Qthreads_KokkosCore_config.h deleted file mode 100644 index 7259a9e9641..00000000000 --- a/core/unit_test/config/results/ARMv80_Qthreads_KokkosCore_config.h +++ /dev/null @@ -1,17 +0,0 @@ -/* --------------------------------------------- -Makefile constructed configuration: -Fri Sep 22 17:22:14 MDT 2017 -----------------------------------------------*/ -#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) -#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." -#else -#define KOKKOS_CORE_CONFIG_H -#endif -/* Execution Spaces */ -#define KOKKOS_HAVE_QTHREADS 1 -/* General Settings */ -#define KOKKOS_HAVE_CXX11 1 -#define KOKKOS_ENABLE_PROFILING -/* Optimization Settings */ -/* Cuda Settings */ -#define KOKKOS_ARCH_ARMV80 1 diff --git a/core/unit_test/config/results/ARMv81_Qthreads_KokkosCore_config.h b/core/unit_test/config/results/ARMv81_Qthreads_KokkosCore_config.h deleted file mode 100644 index e9fc71ad9b6..00000000000 --- a/core/unit_test/config/results/ARMv81_Qthreads_KokkosCore_config.h +++ /dev/null @@ -1,17 +0,0 @@ -/* --------------------------------------------- -Makefile constructed configuration: -Fri Sep 22 17:22:17 MDT 2017 -----------------------------------------------*/ -#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) -#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." -#else -#define KOKKOS_CORE_CONFIG_H -#endif -/* Execution Spaces */ -#define KOKKOS_HAVE_QTHREADS 1 -/* General Settings */ -#define KOKKOS_HAVE_CXX11 1 -#define KOKKOS_ENABLE_PROFILING -/* Optimization Settings */ -/* Cuda Settings */ -#define KOKKOS_ARCH_ARMV81 1 diff --git a/core/unit_test/config/results/BDW_Qthreads_KokkosCore_config.h b/core/unit_test/config/results/BDW_Qthreads_KokkosCore_config.h deleted file mode 100644 index e879e7e1fe9..00000000000 --- a/core/unit_test/config/results/BDW_Qthreads_KokkosCore_config.h +++ /dev/null @@ -1,23 +0,0 @@ -/* --------------------------------------------- -Makefile constructed configuration: -Fri Sep 22 17:22:39 MDT 2017 -----------------------------------------------*/ -#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) -#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." -#else -#define KOKKOS_CORE_CONFIG_H -#endif -/* Execution Spaces */ -#define KOKKOS_HAVE_QTHREADS 1 -#ifndef __CUDA_ARCH__ -#define KOKKOS_ENABLE_TM -#endif -#ifndef __CUDA_ARCH__ -#define KOKKOS_USE_ISA_X86_64 -#endif -/* General Settings */ -#define KOKKOS_HAVE_CXX11 1 -#define KOKKOS_ENABLE_PROFILING -/* Optimization Settings */ -/* Cuda Settings */ -#define KOKKOS_ARCH_AVX2 1 diff --git a/core/unit_test/config/results/BGQ_Qthreads_KokkosCore_config.h b/core/unit_test/config/results/BGQ_Qthreads_KokkosCore_config.h deleted file mode 100644 index 60c7ddcdb5e..00000000000 --- a/core/unit_test/config/results/BGQ_Qthreads_KokkosCore_config.h +++ /dev/null @@ -1,16 +0,0 @@ -/* --------------------------------------------- -Makefile constructed configuration: -Tue Sep 26 15:19:44 MDT 2017 -----------------------------------------------*/ -#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) -#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." -#else -#define KOKKOS_CORE_CONFIG_H -#endif -/* Execution Spaces */ -#define KOKKOS_HAVE_QTHREADS 1 -/* General Settings */ -#define KOKKOS_HAVE_CXX11 1 -#define KOKKOS_ENABLE_PROFILING -/* Optimization Settings */ -/* Cuda Settings */ diff --git a/core/unit_test/config/results/HSW_Qthreads_KokkosCore_config.h b/core/unit_test/config/results/HSW_Qthreads_KokkosCore_config.h deleted file mode 100644 index 253dc35bdfd..00000000000 --- a/core/unit_test/config/results/HSW_Qthreads_KokkosCore_config.h +++ /dev/null @@ -1,20 +0,0 @@ -/* --------------------------------------------- -Makefile constructed configuration: -Fri Sep 22 17:22:36 MDT 2017 -----------------------------------------------*/ -#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) -#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." -#else -#define KOKKOS_CORE_CONFIG_H -#endif -/* Execution Spaces */ -#define KOKKOS_HAVE_QTHREADS 1 -#ifndef __CUDA_ARCH__ -#define KOKKOS_USE_ISA_X86_64 -#endif -/* General Settings */ -#define KOKKOS_HAVE_CXX11 1 -#define KOKKOS_ENABLE_PROFILING -/* Optimization Settings */ -/* Cuda Settings */ -#define KOKKOS_ARCH_AVX2 1 diff --git a/core/unit_test/config/results/KNC_Qthreads_KokkosCore_config.h b/core/unit_test/config/results/KNC_Qthreads_KokkosCore_config.h deleted file mode 100644 index 5f95a83c272..00000000000 --- a/core/unit_test/config/results/KNC_Qthreads_KokkosCore_config.h +++ /dev/null @@ -1,20 +0,0 @@ -/* --------------------------------------------- -Makefile constructed configuration: -Fri Sep 22 17:22:45 MDT 2017 -----------------------------------------------*/ -#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) -#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." -#else -#define KOKKOS_CORE_CONFIG_H -#endif -/* Execution Spaces */ -#define KOKKOS_HAVE_QTHREADS 1 -#ifndef __CUDA_ARCH__ -#define KOKKOS_USE_ISA_KNC -#endif -/* General Settings */ -#define KOKKOS_HAVE_CXX11 1 -#define KOKKOS_ENABLE_PROFILING -/* Optimization Settings */ -/* Cuda Settings */ -#define KOKKOS_ARCH_KNC 1 diff --git a/core/unit_test/config/results/KNL_Qthreads_KokkosCore_config.h b/core/unit_test/config/results/KNL_Qthreads_KokkosCore_config.h deleted file mode 100644 index 6d179d82f8e..00000000000 --- a/core/unit_test/config/results/KNL_Qthreads_KokkosCore_config.h +++ /dev/null @@ -1,20 +0,0 @@ -/* --------------------------------------------- -Makefile constructed configuration: -Fri Sep 22 17:22:48 MDT 2017 -----------------------------------------------*/ -#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) -#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." -#else -#define KOKKOS_CORE_CONFIG_H -#endif -/* Execution Spaces */ -#define KOKKOS_HAVE_QTHREADS 1 -#ifndef __CUDA_ARCH__ -#define KOKKOS_USE_ISA_X86_64 -#endif -/* General Settings */ -#define KOKKOS_HAVE_CXX11 1 -#define KOKKOS_ENABLE_PROFILING -/* Optimization Settings */ -/* Cuda Settings */ -#define KOKKOS_ARCH_AVX512MIC 1 diff --git a/core/unit_test/config/results/Kepler30_Qthreads_KokkosCore_config.h b/core/unit_test/config/results/Kepler30_Qthreads_KokkosCore_config.h deleted file mode 100644 index 410ba5ea159..00000000000 --- a/core/unit_test/config/results/Kepler30_Qthreads_KokkosCore_config.h +++ /dev/null @@ -1,16 +0,0 @@ -/* --------------------------------------------- -Makefile constructed configuration: -Fri Sep 22 17:22:50 MDT 2017 -----------------------------------------------*/ -#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) -#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." -#else -#define KOKKOS_CORE_CONFIG_H -#endif -/* Execution Spaces */ -#define KOKKOS_HAVE_QTHREADS 1 -/* General Settings */ -#define KOKKOS_HAVE_CXX11 1 -#define KOKKOS_ENABLE_PROFILING -/* Optimization Settings */ -/* Cuda Settings */ diff --git a/core/unit_test/config/results/Kepler32_Qthreads_KokkosCore_config.h b/core/unit_test/config/results/Kepler32_Qthreads_KokkosCore_config.h deleted file mode 100644 index f42d0cc5f2f..00000000000 --- a/core/unit_test/config/results/Kepler32_Qthreads_KokkosCore_config.h +++ /dev/null @@ -1,16 +0,0 @@ -/* --------------------------------------------- -Makefile constructed configuration: -Fri Sep 22 17:22:53 MDT 2017 -----------------------------------------------*/ -#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) -#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." -#else -#define KOKKOS_CORE_CONFIG_H -#endif -/* Execution Spaces */ -#define KOKKOS_HAVE_QTHREADS 1 -/* General Settings */ -#define KOKKOS_HAVE_CXX11 1 -#define KOKKOS_ENABLE_PROFILING -/* Optimization Settings */ -/* Cuda Settings */ diff --git a/core/unit_test/config/results/Kepler35_Qthreads_KokkosCore_config.h b/core/unit_test/config/results/Kepler35_Qthreads_KokkosCore_config.h deleted file mode 100644 index 429f5e9e28d..00000000000 --- a/core/unit_test/config/results/Kepler35_Qthreads_KokkosCore_config.h +++ /dev/null @@ -1,16 +0,0 @@ -/* --------------------------------------------- -Makefile constructed configuration: -Fri Sep 22 17:22:55 MDT 2017 -----------------------------------------------*/ -#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) -#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." -#else -#define KOKKOS_CORE_CONFIG_H -#endif -/* Execution Spaces */ -#define KOKKOS_HAVE_QTHREADS 1 -/* General Settings */ -#define KOKKOS_HAVE_CXX11 1 -#define KOKKOS_ENABLE_PROFILING -/* Optimization Settings */ -/* Cuda Settings */ diff --git a/core/unit_test/config/results/Kepler37_Qthreads_KokkosCore_config.h b/core/unit_test/config/results/Kepler37_Qthreads_KokkosCore_config.h deleted file mode 100644 index 2b8a7f81835..00000000000 --- a/core/unit_test/config/results/Kepler37_Qthreads_KokkosCore_config.h +++ /dev/null @@ -1,16 +0,0 @@ -/* --------------------------------------------- -Makefile constructed configuration: -Fri Sep 22 17:22:59 MDT 2017 -----------------------------------------------*/ -#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) -#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." -#else -#define KOKKOS_CORE_CONFIG_H -#endif -/* Execution Spaces */ -#define KOKKOS_HAVE_QTHREADS 1 -/* General Settings */ -#define KOKKOS_HAVE_CXX11 1 -#define KOKKOS_ENABLE_PROFILING -/* Optimization Settings */ -/* Cuda Settings */ diff --git a/core/unit_test/config/results/Kepler_Qthreads_KokkosCore_config.h b/core/unit_test/config/results/Kepler_Qthreads_KokkosCore_config.h deleted file mode 100644 index 021d18c0025..00000000000 --- a/core/unit_test/config/results/Kepler_Qthreads_KokkosCore_config.h +++ /dev/null @@ -1,16 +0,0 @@ -/* --------------------------------------------- -Makefile constructed configuration: -Tue Sep 26 15:19:51 MDT 2017 -----------------------------------------------*/ -#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) -#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." -#else -#define KOKKOS_CORE_CONFIG_H -#endif -/* Execution Spaces */ -#define KOKKOS_HAVE_QTHREADS 1 -/* General Settings */ -#define KOKKOS_HAVE_CXX11 1 -#define KOKKOS_ENABLE_PROFILING -/* Optimization Settings */ -/* Cuda Settings */ diff --git a/core/unit_test/config/results/Maxwell50_Qthreads_KokkosCore_config.h b/core/unit_test/config/results/Maxwell50_Qthreads_KokkosCore_config.h deleted file mode 100644 index be1353365c7..00000000000 --- a/core/unit_test/config/results/Maxwell50_Qthreads_KokkosCore_config.h +++ /dev/null @@ -1,16 +0,0 @@ -/* --------------------------------------------- -Makefile constructed configuration: -Fri Sep 22 17:23:02 MDT 2017 -----------------------------------------------*/ -#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) -#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." -#else -#define KOKKOS_CORE_CONFIG_H -#endif -/* Execution Spaces */ -#define KOKKOS_HAVE_QTHREADS 1 -/* General Settings */ -#define KOKKOS_HAVE_CXX11 1 -#define KOKKOS_ENABLE_PROFILING -/* Optimization Settings */ -/* Cuda Settings */ diff --git a/core/unit_test/config/results/Maxwell52_Qthreads_KokkosCore_config.h b/core/unit_test/config/results/Maxwell52_Qthreads_KokkosCore_config.h deleted file mode 100644 index 58a043c6a3e..00000000000 --- a/core/unit_test/config/results/Maxwell52_Qthreads_KokkosCore_config.h +++ /dev/null @@ -1,16 +0,0 @@ -/* --------------------------------------------- -Makefile constructed configuration: -Fri Sep 22 17:23:05 MDT 2017 -----------------------------------------------*/ -#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) -#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." -#else -#define KOKKOS_CORE_CONFIG_H -#endif -/* Execution Spaces */ -#define KOKKOS_HAVE_QTHREADS 1 -/* General Settings */ -#define KOKKOS_HAVE_CXX11 1 -#define KOKKOS_ENABLE_PROFILING -/* Optimization Settings */ -/* Cuda Settings */ diff --git a/core/unit_test/config/results/Maxwell53_Qthreads_KokkosCore_config.h b/core/unit_test/config/results/Maxwell53_Qthreads_KokkosCore_config.h deleted file mode 100644 index 96fdbef3dcb..00000000000 --- a/core/unit_test/config/results/Maxwell53_Qthreads_KokkosCore_config.h +++ /dev/null @@ -1,16 +0,0 @@ -/* --------------------------------------------- -Makefile constructed configuration: -Fri Sep 22 17:23:08 MDT 2017 -----------------------------------------------*/ -#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) -#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." -#else -#define KOKKOS_CORE_CONFIG_H -#endif -/* Execution Spaces */ -#define KOKKOS_HAVE_QTHREADS 1 -/* General Settings */ -#define KOKKOS_HAVE_CXX11 1 -#define KOKKOS_ENABLE_PROFILING -/* Optimization Settings */ -/* Cuda Settings */ diff --git a/core/unit_test/config/results/Maxwell_Qthreads_KokkosCore_config.h b/core/unit_test/config/results/Maxwell_Qthreads_KokkosCore_config.h deleted file mode 100644 index a7f1fd3803b..00000000000 --- a/core/unit_test/config/results/Maxwell_Qthreads_KokkosCore_config.h +++ /dev/null @@ -1,16 +0,0 @@ -/* --------------------------------------------- -Makefile constructed configuration: -Tue Sep 26 15:20:01 MDT 2017 -----------------------------------------------*/ -#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) -#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." -#else -#define KOKKOS_CORE_CONFIG_H -#endif -/* Execution Spaces */ -#define KOKKOS_HAVE_QTHREADS 1 -/* General Settings */ -#define KOKKOS_HAVE_CXX11 1 -#define KOKKOS_ENABLE_PROFILING -/* Optimization Settings */ -/* Cuda Settings */ diff --git a/core/unit_test/config/results/None_Qthreads_KokkosCore_config.h b/core/unit_test/config/results/None_Qthreads_KokkosCore_config.h deleted file mode 100644 index 6bd8addd974..00000000000 --- a/core/unit_test/config/results/None_Qthreads_KokkosCore_config.h +++ /dev/null @@ -1,16 +0,0 @@ -/* --------------------------------------------- -Makefile constructed configuration: -Tue Sep 26 15:19:23 MDT 2017 -----------------------------------------------*/ -#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) -#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." -#else -#define KOKKOS_CORE_CONFIG_H -#endif -/* Execution Spaces */ -#define KOKKOS_HAVE_QTHREADS 1 -/* General Settings */ -#define KOKKOS_HAVE_CXX11 1 -#define KOKKOS_ENABLE_PROFILING -/* Optimization Settings */ -/* Cuda Settings */ diff --git a/core/unit_test/config/results/Pascal60_Qthreads_KokkosCore_config.h b/core/unit_test/config/results/Pascal60_Qthreads_KokkosCore_config.h deleted file mode 100644 index c5a2d1d707f..00000000000 --- a/core/unit_test/config/results/Pascal60_Qthreads_KokkosCore_config.h +++ /dev/null @@ -1,16 +0,0 @@ -/* --------------------------------------------- -Makefile constructed configuration: -Fri Sep 22 17:23:10 MDT 2017 -----------------------------------------------*/ -#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) -#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." -#else -#define KOKKOS_CORE_CONFIG_H -#endif -/* Execution Spaces */ -#define KOKKOS_HAVE_QTHREADS 1 -/* General Settings */ -#define KOKKOS_HAVE_CXX11 1 -#define KOKKOS_ENABLE_PROFILING -/* Optimization Settings */ -/* Cuda Settings */ diff --git a/core/unit_test/config/results/Pascal61_Qthreads_KokkosCore_config.h b/core/unit_test/config/results/Pascal61_Qthreads_KokkosCore_config.h deleted file mode 100644 index 958aac11da7..00000000000 --- a/core/unit_test/config/results/Pascal61_Qthreads_KokkosCore_config.h +++ /dev/null @@ -1,16 +0,0 @@ -/* --------------------------------------------- -Makefile constructed configuration: -Fri Sep 22 17:23:13 MDT 2017 -----------------------------------------------*/ -#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) -#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." -#else -#define KOKKOS_CORE_CONFIG_H -#endif -/* Execution Spaces */ -#define KOKKOS_HAVE_QTHREADS 1 -/* General Settings */ -#define KOKKOS_HAVE_CXX11 1 -#define KOKKOS_ENABLE_PROFILING -/* Optimization Settings */ -/* Cuda Settings */ diff --git a/core/unit_test/config/results/Power7_Qthreads_KokkosCore_config.h b/core/unit_test/config/results/Power7_Qthreads_KokkosCore_config.h deleted file mode 100644 index dffa8a3f582..00000000000 --- a/core/unit_test/config/results/Power7_Qthreads_KokkosCore_config.h +++ /dev/null @@ -1,20 +0,0 @@ -/* --------------------------------------------- -Makefile constructed configuration: -Fri Sep 22 17:22:22 MDT 2017 -----------------------------------------------*/ -#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) -#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." -#else -#define KOKKOS_CORE_CONFIG_H -#endif -/* Execution Spaces */ -#define KOKKOS_HAVE_QTHREADS 1 -#ifndef __CUDA_ARCH__ -#define KOKKOS_USE_ISA_POWERPCBE -#endif -/* General Settings */ -#define KOKKOS_HAVE_CXX11 1 -#define KOKKOS_ENABLE_PROFILING -/* Optimization Settings */ -/* Cuda Settings */ -#define KOKKOS_ARCH_POWER7 1 diff --git a/core/unit_test/config/results/Power8_Qthreads_KokkosCore_config.h b/core/unit_test/config/results/Power8_Qthreads_KokkosCore_config.h deleted file mode 100644 index 9da90f4f7e8..00000000000 --- a/core/unit_test/config/results/Power8_Qthreads_KokkosCore_config.h +++ /dev/null @@ -1,20 +0,0 @@ -/* --------------------------------------------- -Makefile constructed configuration: -Fri Sep 22 17:22:25 MDT 2017 -----------------------------------------------*/ -#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) -#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." -#else -#define KOKKOS_CORE_CONFIG_H -#endif -/* Execution Spaces */ -#define KOKKOS_HAVE_QTHREADS 1 -#ifndef __CUDA_ARCH__ -#define KOKKOS_USE_ISA_POWERPCLE -#endif -/* General Settings */ -#define KOKKOS_HAVE_CXX11 1 -#define KOKKOS_ENABLE_PROFILING -/* Optimization Settings */ -/* Cuda Settings */ -#define KOKKOS_ARCH_POWER8 1 diff --git a/core/unit_test/config/results/Power9_Qthreads_KokkosCore_config.h b/core/unit_test/config/results/Power9_Qthreads_KokkosCore_config.h deleted file mode 100644 index 5c5be2ed3cb..00000000000 --- a/core/unit_test/config/results/Power9_Qthreads_KokkosCore_config.h +++ /dev/null @@ -1,20 +0,0 @@ -/* --------------------------------------------- -Makefile constructed configuration: -Fri Sep 22 17:22:28 MDT 2017 -----------------------------------------------*/ -#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) -#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." -#else -#define KOKKOS_CORE_CONFIG_H -#endif -/* Execution Spaces */ -#define KOKKOS_HAVE_QTHREADS 1 -#ifndef __CUDA_ARCH__ -#define KOKKOS_USE_ISA_POWERPCLE -#endif -/* General Settings */ -#define KOKKOS_HAVE_CXX11 1 -#define KOKKOS_ENABLE_PROFILING -/* Optimization Settings */ -/* Cuda Settings */ -#define KOKKOS_ARCH_POWER9 1 diff --git a/core/unit_test/config/results/SKX_Qthreads_KokkosCore_config.h b/core/unit_test/config/results/SKX_Qthreads_KokkosCore_config.h deleted file mode 100644 index 2e4b1d61ef9..00000000000 --- a/core/unit_test/config/results/SKX_Qthreads_KokkosCore_config.h +++ /dev/null @@ -1,23 +0,0 @@ -/* --------------------------------------------- -Makefile constructed configuration: -Fri Sep 22 17:22:42 MDT 2017 -----------------------------------------------*/ -#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) -#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." -#else -#define KOKKOS_CORE_CONFIG_H -#endif -/* Execution Spaces */ -#define KOKKOS_HAVE_QTHREADS 1 -#ifndef __CUDA_ARCH__ -#define KOKKOS_ENABLE_TM -#endif -#ifndef __CUDA_ARCH__ -#define KOKKOS_USE_ISA_X86_64 -#endif -/* General Settings */ -#define KOKKOS_HAVE_CXX11 1 -#define KOKKOS_ENABLE_PROFILING -/* Optimization Settings */ -/* Cuda Settings */ -#define KOKKOS_ARCH_AVX512XEON 1 diff --git a/core/unit_test/config/results/SNB_Qthreads_KokkosCore_config.h b/core/unit_test/config/results/SNB_Qthreads_KokkosCore_config.h deleted file mode 100644 index 2f0216f9c4b..00000000000 --- a/core/unit_test/config/results/SNB_Qthreads_KokkosCore_config.h +++ /dev/null @@ -1,20 +0,0 @@ -/* --------------------------------------------- -Makefile constructed configuration: -Fri Sep 22 17:22:34 MDT 2017 -----------------------------------------------*/ -#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) -#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." -#else -#define KOKKOS_CORE_CONFIG_H -#endif -/* Execution Spaces */ -#define KOKKOS_HAVE_QTHREADS 1 -#ifndef __CUDA_ARCH__ -#define KOKKOS_USE_ISA_X86_64 -#endif -/* General Settings */ -#define KOKKOS_HAVE_CXX11 1 -#define KOKKOS_ENABLE_PROFILING -/* Optimization Settings */ -/* Cuda Settings */ -#define KOKKOS_ARCH_AVX 1 diff --git a/core/unit_test/config/results/WSM_Qthreads_KokkosCore_config.h b/core/unit_test/config/results/WSM_Qthreads_KokkosCore_config.h deleted file mode 100644 index d4a78790e39..00000000000 --- a/core/unit_test/config/results/WSM_Qthreads_KokkosCore_config.h +++ /dev/null @@ -1,20 +0,0 @@ -/* --------------------------------------------- -Makefile constructed configuration: -Fri Sep 22 17:22:31 MDT 2017 -----------------------------------------------*/ -#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) -#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." -#else -#define KOKKOS_CORE_CONFIG_H -#endif -/* Execution Spaces */ -#define KOKKOS_HAVE_QTHREADS 1 -#ifndef __CUDA_ARCH__ -#define KOKKOS_USE_ISA_X86_64 -#endif -/* General Settings */ -#define KOKKOS_HAVE_CXX11 1 -#define KOKKOS_ENABLE_PROFILING -/* Optimization Settings */ -/* Cuda Settings */ -#define KOKKOS_ARCH_SSE42 1 diff --git a/core/unit_test/configuration/test-code/test_config_run.bash b/core/unit_test/configuration/test-code/test_config_run.bash index f6b60fa9f09..4750c843c29 100755 --- a/core/unit_test/configuration/test-code/test_config_run.bash +++ b/core/unit_test/configuration/test-code/test_config_run.bash @@ -74,7 +74,7 @@ find cmake/kokkos -name KokkosTargets.cmake -exec grep -h INTERFACE_COMPILE_OPTI #-I flags and -std= flags are not part of CMake's compile options #that's fine, let's ignore thse below -#redunant lines - tail the last one +#redundant lines - tail the last one #awk print each on new line #grep out blank lines #grep out include flags diff --git a/containers/unit_tests/cuda/TestCuda_BitSet.cpp b/core/unit_test/cuda/TestCuda_Concepts.cpp similarity index 98% rename from containers/unit_tests/cuda/TestCuda_BitSet.cpp rename to core/unit_test/cuda/TestCuda_Concepts.cpp index 5306ab38837..31b36cbd8a4 100644 --- a/containers/unit_tests/cuda/TestCuda_BitSet.cpp +++ b/core/unit_test/cuda/TestCuda_Concepts.cpp @@ -1,4 +1,3 @@ - /* //@HEADER // ************************************************************************ @@ -44,4 +43,4 @@ */ #include -#include +#include diff --git a/core/unit_test/cuda/TestCuda_DeepCopyAlignment.cpp b/core/unit_test/cuda/TestCuda_DeepCopyAlignment.cpp index 16793abe082..e8ce7381b4c 100644 --- a/core/unit_test/cuda/TestCuda_DeepCopyAlignment.cpp +++ b/core/unit_test/cuda/TestCuda_DeepCopyAlignment.cpp @@ -43,6 +43,4 @@ */ #include -#if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) #include -#endif diff --git a/containers/unit_tests/cuda/TestCuda_OffsetView.cpp b/core/unit_test/cuda/TestCuda_RangePolicyRequire.cpp similarity index 98% rename from containers/unit_tests/cuda/TestCuda_OffsetView.cpp rename to core/unit_test/cuda/TestCuda_RangePolicyRequire.cpp index b2e851d0999..1317705399c 100644 --- a/containers/unit_tests/cuda/TestCuda_OffsetView.cpp +++ b/core/unit_test/cuda/TestCuda_RangePolicyRequire.cpp @@ -44,4 +44,4 @@ */ #include -#include +#include diff --git a/core/unit_test/cuda/TestCuda_Spaces.cpp b/core/unit_test/cuda/TestCuda_Spaces.cpp index 800d589e94a..d68ffb08655 100644 --- a/core/unit_test/cuda/TestCuda_Spaces.cpp +++ b/core/unit_test/cuda/TestCuda_Spaces.cpp @@ -280,73 +280,6 @@ TEST(cuda, uvm) { } } -/* Removing UVM Allocs Test due to added time to complete overall unit test - * The issue verified with this unit test appears to no longer be an - * problem. Refer to github issue 1880 for more details - * -TEST( cuda, uvm_num_allocs ) -{ - // The max number of UVM allocations allowed is 65536. - #define MAX_NUM_ALLOCS 65536 - - if ( Kokkos::CudaUVMSpace::available() ) { - struct TestMaxUVMAllocs { - - using view_type = Kokkos::View< double*, Kokkos::CudaUVMSpace >; - using view_of_view_type = Kokkos::View< view_type[ MAX_NUM_ALLOCS ] - , Kokkos::CudaUVMSpace >; - - TestMaxUVMAllocs() : view_allocs_test( "view_allocs_test" ) - { - for ( auto i = 0; i < MAX_NUM_ALLOCS; ++i ) { - - // Kokkos will throw a runtime exception if an attempt is made to - // allocate more than the maximum number of uvm allocations. - - // In this test, the max num of allocs occurs when i = MAX_NUM_ALLOCS -- 1 - // since the 'outer' view counts as one UVM allocation, leaving - // 65535 possible UVM allocations, that is 'i in [0, 65535)'. - - // The test will catch the exception thrown in this case and continue. - - if ( i == ( MAX_NUM_ALLOCS - 1 ) ) { - EXPECT_ANY_THROW( { view_allocs_test( i ) = view_type( "inner_view", -1 ); } ); - } - else { - if ( i < MAX_NUM_ALLOCS - 1000 ) { - EXPECT_NO_THROW( { view_allocs_test( i ) = view_type( -"inner_view", 1 ); } ); } else { // This might or might not throw depending on -compilation options. try { view_allocs_test( i ) = view_type( "inner_view", 1 ); - } - catch ( ... ) {} - } - } - - } // End allocation for loop. - - for ( auto i = 0; i < MAX_NUM_ALLOCS - 1; ++i ) { - - view_allocs_test( i ) = view_type(); - - } // End deallocation for loop. - - view_allocs_test = view_of_view_type(); // Deallocate the view of views. - } - - // Member. - view_of_view_type view_allocs_test; - }; - - // Trigger the test via the TestMaxUVMAllocs constructor. - TestMaxUVMAllocs(); - } - - #undef MAX_NUM_ALLOCS -} -*/ - template struct TestViewCudaAccessible { enum { N = 1000 }; diff --git a/core/unit_test/cuda/TestCuda_Team.cpp b/core/unit_test/cuda/TestCuda_Team.cpp index 93aae95bacc..fe57437989e 100644 --- a/core/unit_test/cuda/TestCuda_Team.cpp +++ b/core/unit_test/cuda/TestCuda_Team.cpp @@ -79,26 +79,100 @@ TEST(TEST_CATEGORY, team_reduce) { Kokkos::Schedule >::test_reduce(1000); } -TEST(TEST_CATEGORY, team_broadcast) { - TestTeamBroadcast >::test_teambroadcast(0); - TestTeamBroadcast >::test_teambroadcast(0); - - TestTeamBroadcast >::test_teambroadcast(2); - TestTeamBroadcast >::test_teambroadcast(2); - - TestTeamBroadcast >::test_teambroadcast(16); - TestTeamBroadcast >::test_teambroadcast(16); - - TestTeamBroadcast >:: - test_teambroadcast(1000); - TestTeamBroadcast >:: - test_teambroadcast(1000); +TEST(TEST_CATEGORY, team_broadcast_long) { + TestTeamBroadcast, + long>::test_teambroadcast(0, 1); + TestTeamBroadcast, + long>::test_teambroadcast(0, 1); + + TestTeamBroadcast, + long>::test_teambroadcast(2, 1); + TestTeamBroadcast, + long>::test_teambroadcast(2, 1); + + TestTeamBroadcast, + long>::test_teambroadcast(16, 1); + TestTeamBroadcast, + long>::test_teambroadcast(16, 1); + + // TestTeamBroadcast, long + // >:: + // test_teambroadcast(1000, 1); + // TestTeamBroadcast, long + // >:: + // test_teambroadcast(1000, 1); +} + +TEST(TEST_CATEGORY, team_broadcast_char) { + TestTeamBroadcast, + unsigned char>::test_teambroadcast(0, 1); + TestTeamBroadcast, + unsigned char>::test_teambroadcast(0, 1); + + TestTeamBroadcast, + unsigned char>::test_teambroadcast(2, 1); + TestTeamBroadcast, + unsigned char>::test_teambroadcast(2, 1); + + TestTeamBroadcast, + unsigned char>::test_teambroadcast(16, 1); + TestTeamBroadcast, + unsigned char>::test_teambroadcast(16, 1); + + // TestTeamBroadcast, long + // >:: + // test_teambroadcast(1000, 1); + // TestTeamBroadcast, long + // >:: + // test_teambroadcast(1000, 1); +} + +TEST(TEST_CATEGORY, team_broadcast_float) { + TestTeamBroadcast, + float>::test_teambroadcast(0, 1.3); + TestTeamBroadcast, + float>::test_teambroadcast(0, 1.3); + + TestTeamBroadcast, + float>::test_teambroadcast(2, 1.3); + TestTeamBroadcast, + float>::test_teambroadcast(2, 1.3); + + TestTeamBroadcast, + float>::test_teambroadcast(16, 1.3); + TestTeamBroadcast, + float>::test_teambroadcast(16, 1.3); + + // TestTeamBroadcast, float + // >:: + // test_teambroadcast(1000, 1.3); + // TestTeamBroadcast, float + // >:: + // test_teambroadcast(1000, 1.3); +} + +TEST(TEST_CATEGORY, team_broadcast_double) { + TestTeamBroadcast, + double>::test_teambroadcast(0, 1.3); + TestTeamBroadcast, + double>::test_teambroadcast(0, 1.3); + + TestTeamBroadcast, + double>::test_teambroadcast(2, 1.3); + TestTeamBroadcast, + double>::test_teambroadcast(2, 1.3); + + TestTeamBroadcast, + double>::test_teambroadcast(16, 1.3); + TestTeamBroadcast, + double>::test_teambroadcast(16, 1.3); + + // TestTeamBroadcast, double + // >:: + // test_teambroadcast(1000, 1.3); + // TestTeamBroadcast, + // double >:: + // test_teambroadcast(1000, 1.3); } } // namespace Test diff --git a/core/unit_test/cuda/TestCuda_TeamScratch.cpp b/core/unit_test/cuda/TestCuda_TeamScratch.cpp index 858bf599b5e..63291dbbb49 100644 --- a/core/unit_test/cuda/TestCuda_TeamScratch.cpp +++ b/core/unit_test/cuda/TestCuda_TeamScratch.cpp @@ -58,7 +58,6 @@ TEST(TEST_CATEGORY, team_scratch_request) { } #if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) -#if !defined(KOKKOS_ENABLE_CUDA) || (8000 <= CUDA_VERSION) TEST(TEST_CATEGORY, team_lambda_shared_request) { TestLambdaSharedTeam >(); @@ -68,7 +67,6 @@ TEST(TEST_CATEGORY, team_lambda_shared_request) { TEST(TEST_CATEGORY, scratch_align) { TestScratchAlignment(); } #endif -#endif TEST(TEST_CATEGORY, shmem_size) { TestShmemSize(); } diff --git a/core/unit_test/cuda/TestCuda_ViewAPI_e.cpp b/core/unit_test/cuda/TestCuda_ViewAPI_e.cpp index a15bdbd8d3a..754b90acac4 100644 --- a/core/unit_test/cuda/TestCuda_ViewAPI_e.cpp +++ b/core/unit_test/cuda/TestCuda_ViewAPI_e.cpp @@ -44,3 +44,4 @@ #include #include +#include diff --git a/core/unit_test/default/TestDefaultDeviceType_d.cpp b/core/unit_test/default/TestDefaultDeviceType_d.cpp index 3502e1762f3..ff87b7802cf 100644 --- a/core/unit_test/default/TestDefaultDeviceType_d.cpp +++ b/core/unit_test/default/TestDefaultDeviceType_d.cpp @@ -61,7 +61,7 @@ TEST(defaultdevicetype, malloc) { Kokkos::kokkos_free(data); int* data2 = (int*)Kokkos::kokkos_malloc(0); - ASSERT_TRUE(data2 == NULL); + ASSERT_TRUE(data2 == nullptr); Kokkos::kokkos_free(data2); } diff --git a/core/unit_test/qthreads/TestQthreads_Category.hpp b/core/unit_test/hip/TestHIPHostPinned_Category.hpp similarity index 91% rename from core/unit_test/qthreads/TestQthreads_Category.hpp rename to core/unit_test/hip/TestHIPHostPinned_Category.hpp index fc0a0887004..12c69926c7b 100644 --- a/core/unit_test/qthreads/TestQthreads_Category.hpp +++ b/core/unit_test/hip/TestHIPHostPinned_Category.hpp @@ -42,13 +42,12 @@ //@HEADER */ -#ifndef KOKKOS_TEST_QTHREADS_HPP -#define KOKKOS_TEST_QTHREADS_HPP +#ifndef KOKKOS_TEST_HIPHOSTPINNED_HPP +#define KOKKOS_TEST_HIPHOSTPINNED_HPP #include -#define TEST_CATEGORY qthreads -#define TEST_CATEGORY_DEATH qthreads_DeathTest -#define TEST_EXECSPACE Kokkos::Qthreads +#define TEST_CATEGORY hip_hostpinned +#define TEST_EXECSPACE Kokkos::Experimental::HIPHostPinnedSpace #endif diff --git a/core/unit_test/qthreads/TestQthreads_SubView_c10.cpp b/core/unit_test/hip/TestHIPHostPinned_SharedAlloc.cpp similarity index 91% rename from core/unit_test/qthreads/TestQthreads_SubView_c10.cpp rename to core/unit_test/hip/TestHIPHostPinned_SharedAlloc.cpp index e42c464020d..7b2db4f8bcc 100644 --- a/core/unit_test/qthreads/TestQthreads_SubView_c10.cpp +++ b/core/unit_test/hip/TestHIPHostPinned_SharedAlloc.cpp @@ -42,14 +42,13 @@ //@HEADER */ -#include +#include +#include namespace Test { -TEST_F(qthreads, view_subview_3d_from_5d_right) { -#if 0 - TestViewSubview::test_3d_subview_5d_right< Kokkos::Qthreads >(); -#endif +TEST(TEST_CATEGORY, impl_shared_alloc) { + test_shared_alloc(); } } // namespace Test diff --git a/core/unit_test/qthreads/TestQqthreads_ViewAPI_a.cpp b/core/unit_test/hip/TestHIPHostPinned_ViewAPI_a.cpp similarity index 97% rename from core/unit_test/qthreads/TestQqthreads_ViewAPI_a.cpp rename to core/unit_test/hip/TestHIPHostPinned_ViewAPI_a.cpp index 4339c1b5a02..53f0371b23d 100644 --- a/core/unit_test/qthreads/TestQqthreads_ViewAPI_a.cpp +++ b/core/unit_test/hip/TestHIPHostPinned_ViewAPI_a.cpp @@ -42,5 +42,5 @@ //@HEADER */ -#include +#include #include diff --git a/core/unit_test/qthreads/TestQqthreads_ViewAPI_b.cpp b/core/unit_test/hip/TestHIPHostPinned_ViewAPI_b.cpp similarity index 97% rename from core/unit_test/qthreads/TestQqthreads_ViewAPI_b.cpp rename to core/unit_test/hip/TestHIPHostPinned_ViewAPI_b.cpp index bcbef94ab7e..2e3685d6102 100644 --- a/core/unit_test/qthreads/TestQqthreads_ViewAPI_b.cpp +++ b/core/unit_test/hip/TestHIPHostPinned_ViewAPI_b.cpp @@ -42,5 +42,5 @@ //@HEADER */ -#include +#include #include diff --git a/core/unit_test/qthreads/TestQqthreads_ViewAPI_c.cpp b/core/unit_test/hip/TestHIPHostPinned_ViewAPI_c.cpp similarity index 97% rename from core/unit_test/qthreads/TestQqthreads_ViewAPI_c.cpp rename to core/unit_test/hip/TestHIPHostPinned_ViewAPI_c.cpp index 21c9fd890cf..079a244d43e 100644 --- a/core/unit_test/qthreads/TestQqthreads_ViewAPI_c.cpp +++ b/core/unit_test/hip/TestHIPHostPinned_ViewAPI_c.cpp @@ -42,5 +42,5 @@ //@HEADER */ -#include +#include #include diff --git a/core/unit_test/qthreads/TestQqthreads_ViewAPI_d.cpp b/core/unit_test/hip/TestHIPHostPinned_ViewAPI_d.cpp similarity index 97% rename from core/unit_test/qthreads/TestQqthreads_ViewAPI_d.cpp rename to core/unit_test/hip/TestHIPHostPinned_ViewAPI_d.cpp index ef4bddc5725..bc3843b2c1d 100644 --- a/core/unit_test/qthreads/TestQqthreads_ViewAPI_d.cpp +++ b/core/unit_test/hip/TestHIPHostPinned_ViewAPI_d.cpp @@ -42,5 +42,5 @@ //@HEADER */ -#include +#include #include diff --git a/core/unit_test/qthreads/TestQqthreads_ViewAPI_e.cpp b/core/unit_test/hip/TestHIPHostPinned_ViewAPI_e.cpp similarity index 97% rename from core/unit_test/qthreads/TestQqthreads_ViewAPI_e.cpp rename to core/unit_test/hip/TestHIPHostPinned_ViewAPI_e.cpp index bba5916a5ab..1c80e05fe0f 100644 --- a/core/unit_test/qthreads/TestQqthreads_ViewAPI_e.cpp +++ b/core/unit_test/hip/TestHIPHostPinned_ViewAPI_e.cpp @@ -42,5 +42,5 @@ //@HEADER */ -#include +#include #include diff --git a/containers/unit_tests/cuda/TestCuda_DynamicView.cpp b/core/unit_test/hip/TestHIPHostPinned_ViewCopy.cpp similarity index 96% rename from containers/unit_tests/cuda/TestCuda_DynamicView.cpp rename to core/unit_test/hip/TestHIPHostPinned_ViewCopy.cpp index 9eccb05a25d..59674ad22f6 100644 --- a/containers/unit_tests/cuda/TestCuda_DynamicView.cpp +++ b/core/unit_test/hip/TestHIPHostPinned_ViewCopy.cpp @@ -1,4 +1,3 @@ - /* //@HEADER // ************************************************************************ @@ -43,5 +42,5 @@ //@HEADER */ -#include -#include +#include +#include diff --git a/containers/unit_tests/cuda/TestCuda_DynRankViewAPI_rank12345.cpp b/core/unit_test/hip/TestHIPHostPinned_ViewMapping_a.cpp similarity index 96% rename from containers/unit_tests/cuda/TestCuda_DynRankViewAPI_rank12345.cpp rename to core/unit_test/hip/TestHIPHostPinned_ViewMapping_a.cpp index 66d2d17e8b4..f9b5608d1b2 100644 --- a/containers/unit_tests/cuda/TestCuda_DynRankViewAPI_rank12345.cpp +++ b/core/unit_test/hip/TestHIPHostPinned_ViewMapping_a.cpp @@ -1,4 +1,3 @@ - /* //@HEADER // ************************************************************************ @@ -43,5 +42,5 @@ //@HEADER */ -#include -#include +#include +#include diff --git a/containers/unit_tests/cuda/TestCuda_DynRankViewAPI_generic.cpp b/core/unit_test/hip/TestHIPHostPinned_ViewMapping_b.cpp similarity index 96% rename from containers/unit_tests/cuda/TestCuda_DynRankViewAPI_generic.cpp rename to core/unit_test/hip/TestHIPHostPinned_ViewMapping_b.cpp index 609c8b41a53..bff68fc0725 100644 --- a/containers/unit_tests/cuda/TestCuda_DynRankViewAPI_generic.cpp +++ b/core/unit_test/hip/TestHIPHostPinned_ViewMapping_b.cpp @@ -1,4 +1,3 @@ - /* //@HEADER // ************************************************************************ @@ -43,5 +42,5 @@ //@HEADER */ -#include -#include +#include +#include diff --git a/core/unit_test/hip/TestHIPHostPinned_ViewMapping_subview.cpp b/core/unit_test/hip/TestHIPHostPinned_ViewMapping_subview.cpp new file mode 100644 index 00000000000..cfa9da54959 --- /dev/null +++ b/core/unit_test/hip/TestHIPHostPinned_ViewMapping_subview.cpp @@ -0,0 +1,46 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include diff --git a/core/unit_test/qthreads/TestQqthreads_AtomicOperations_complexdouble.cpp b/core/unit_test/hip/TestHIP_AtomicOperations_complexdouble.cpp similarity index 97% rename from core/unit_test/qthreads/TestQqthreads_AtomicOperations_complexdouble.cpp rename to core/unit_test/hip/TestHIP_AtomicOperations_complexdouble.cpp index 9c6589539a9..644a825bfce 100644 --- a/core/unit_test/qthreads/TestQqthreads_AtomicOperations_complexdouble.cpp +++ b/core/unit_test/hip/TestHIP_AtomicOperations_complexdouble.cpp @@ -42,5 +42,5 @@ //@HEADER */ -#include +#include #include diff --git a/core/unit_test/qthreads/TestQqthreads_AtomicOperations_complexfloat.cpp b/core/unit_test/hip/TestHIP_AtomicOperations_complexfloat.cpp similarity index 97% rename from core/unit_test/qthreads/TestQqthreads_AtomicOperations_complexfloat.cpp rename to core/unit_test/hip/TestHIP_AtomicOperations_complexfloat.cpp index 1163da7285d..f8aaed88cba 100644 --- a/core/unit_test/qthreads/TestQqthreads_AtomicOperations_complexfloat.cpp +++ b/core/unit_test/hip/TestHIP_AtomicOperations_complexfloat.cpp @@ -42,5 +42,5 @@ //@HEADER */ -#include +#include #include diff --git a/core/unit_test/qthreads/TestQqthreads_AtomicOperations_double.cpp b/core/unit_test/hip/TestHIP_AtomicOperations_double.cpp similarity index 97% rename from core/unit_test/qthreads/TestQqthreads_AtomicOperations_double.cpp rename to core/unit_test/hip/TestHIP_AtomicOperations_double.cpp index aee8ccde7fc..9adeaa76ba0 100644 --- a/core/unit_test/qthreads/TestQqthreads_AtomicOperations_double.cpp +++ b/core/unit_test/hip/TestHIP_AtomicOperations_double.cpp @@ -42,5 +42,5 @@ //@HEADER */ -#include +#include #include diff --git a/core/unit_test/qthreads/TestQqthreads_AtomicOperations_float.cpp b/core/unit_test/hip/TestHIP_AtomicOperations_float.cpp similarity index 97% rename from core/unit_test/qthreads/TestQqthreads_AtomicOperations_float.cpp rename to core/unit_test/hip/TestHIP_AtomicOperations_float.cpp index 0bfa8d43d6a..ba7c6d9bc3a 100644 --- a/core/unit_test/qthreads/TestQqthreads_AtomicOperations_float.cpp +++ b/core/unit_test/hip/TestHIP_AtomicOperations_float.cpp @@ -42,5 +42,5 @@ //@HEADER */ -#include +#include #include diff --git a/core/unit_test/qthreads/TestQqthreads_AtomicOperations_int.cpp b/core/unit_test/hip/TestHIP_AtomicOperations_int.cpp similarity index 97% rename from core/unit_test/qthreads/TestQqthreads_AtomicOperations_int.cpp rename to core/unit_test/hip/TestHIP_AtomicOperations_int.cpp index 8900daf81ea..840f2dbd061 100644 --- a/core/unit_test/qthreads/TestQqthreads_AtomicOperations_int.cpp +++ b/core/unit_test/hip/TestHIP_AtomicOperations_int.cpp @@ -42,5 +42,5 @@ //@HEADER */ -#include +#include #include diff --git a/core/unit_test/qthreads/TestQqthreads_AtomicOperations_longint.cpp b/core/unit_test/hip/TestHIP_AtomicOperations_longint.cpp similarity index 97% rename from core/unit_test/qthreads/TestQqthreads_AtomicOperations_longint.cpp rename to core/unit_test/hip/TestHIP_AtomicOperations_longint.cpp index 9dcff923128..be305076636 100644 --- a/core/unit_test/qthreads/TestQqthreads_AtomicOperations_longint.cpp +++ b/core/unit_test/hip/TestHIP_AtomicOperations_longint.cpp @@ -42,5 +42,5 @@ //@HEADER */ -#include +#include #include diff --git a/core/unit_test/qthreads/TestQqthreads_AtomicOperations_longlongint.cpp b/core/unit_test/hip/TestHIP_AtomicOperations_longlongint.cpp similarity index 97% rename from core/unit_test/qthreads/TestQqthreads_AtomicOperations_longlongint.cpp rename to core/unit_test/hip/TestHIP_AtomicOperations_longlongint.cpp index 0f5a0b7df74..85f54dee141 100644 --- a/core/unit_test/qthreads/TestQqthreads_AtomicOperations_longlongint.cpp +++ b/core/unit_test/hip/TestHIP_AtomicOperations_longlongint.cpp @@ -42,5 +42,5 @@ //@HEADER */ -#include +#include #include diff --git a/core/unit_test/qthreads/TestQqthreads_AtomicOperations_unsignedint.cpp b/core/unit_test/hip/TestHIP_AtomicOperations_unsignedint.cpp similarity index 97% rename from core/unit_test/qthreads/TestQqthreads_AtomicOperations_unsignedint.cpp rename to core/unit_test/hip/TestHIP_AtomicOperations_unsignedint.cpp index eee44e84a32..d738b608c29 100644 --- a/core/unit_test/qthreads/TestQqthreads_AtomicOperations_unsignedint.cpp +++ b/core/unit_test/hip/TestHIP_AtomicOperations_unsignedint.cpp @@ -42,5 +42,5 @@ //@HEADER */ -#include +#include #include diff --git a/core/unit_test/qthreads/TestQqthreads_AtomicOperations_unsignedlongint.cpp b/core/unit_test/hip/TestHIP_AtomicOperations_unsignedlongint.cpp similarity index 97% rename from core/unit_test/qthreads/TestQqthreads_AtomicOperations_unsignedlongint.cpp rename to core/unit_test/hip/TestHIP_AtomicOperations_unsignedlongint.cpp index 9379a6efa87..ab25f5d142a 100644 --- a/core/unit_test/qthreads/TestQqthreads_AtomicOperations_unsignedlongint.cpp +++ b/core/unit_test/hip/TestHIP_AtomicOperations_unsignedlongint.cpp @@ -42,5 +42,5 @@ //@HEADER */ -#include +#include #include diff --git a/containers/unit_tests/cuda/TestCuda_DualView.cpp b/core/unit_test/hip/TestHIP_AtomicViews.cpp similarity index 96% rename from containers/unit_tests/cuda/TestCuda_DualView.cpp rename to core/unit_test/hip/TestHIP_AtomicViews.cpp index 5641966db43..0dae0cb9ebb 100644 --- a/containers/unit_tests/cuda/TestCuda_DualView.cpp +++ b/core/unit_test/hip/TestHIP_AtomicViews.cpp @@ -43,5 +43,5 @@ //@HEADER */ -#include -#include +#include +#include diff --git a/core/unit_test/hip/TestHIP_Atomics.cpp b/core/unit_test/hip/TestHIP_Atomics.cpp new file mode 100644 index 00000000000..dda1388237d --- /dev/null +++ b/core/unit_test/hip/TestHIP_Atomics.cpp @@ -0,0 +1,46 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include diff --git a/core/unit_test/hip/TestHIP_Category.hpp b/core/unit_test/hip/TestHIP_Category.hpp new file mode 100644 index 00000000000..8cae165c337 --- /dev/null +++ b/core/unit_test/hip/TestHIP_Category.hpp @@ -0,0 +1,53 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_TEST_HIP_HPP +#define KOKKOS_TEST_HIP_HPP + +#include + +#define TEST_CATEGORY hip +#define TEST_EXECSPACE Kokkos::Experimental::HIP + +#endif diff --git a/containers/unit_tests/cuda/TestCuda_Vector.cpp b/core/unit_test/hip/TestHIP_Complex.cpp similarity index 96% rename from containers/unit_tests/cuda/TestCuda_Vector.cpp rename to core/unit_test/hip/TestHIP_Complex.cpp index 408d0c76fc2..f628fb834f1 100644 --- a/containers/unit_tests/cuda/TestCuda_Vector.cpp +++ b/core/unit_test/hip/TestHIP_Complex.cpp @@ -43,5 +43,5 @@ //@HEADER */ -#include -#include +#include +#include diff --git a/core/unit_test/hip/TestHIP_Concepts.cpp b/core/unit_test/hip/TestHIP_Concepts.cpp new file mode 100644 index 00000000000..bfdfda15dbe --- /dev/null +++ b/core/unit_test/hip/TestHIP_Concepts.cpp @@ -0,0 +1,46 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include diff --git a/core/unit_test/hip/TestHIP_Crs.cpp b/core/unit_test/hip/TestHIP_Crs.cpp new file mode 100644 index 00000000000..b01b787743a --- /dev/null +++ b/core/unit_test/hip/TestHIP_Crs.cpp @@ -0,0 +1,46 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include diff --git a/core/unit_test/qthreads/TestQthreads_DeepCopyAlignment.cpp b/core/unit_test/hip/TestHIP_DeepCopyAlignment.cpp similarity index 97% rename from core/unit_test/qthreads/TestQthreads_DeepCopyAlignment.cpp rename to core/unit_test/hip/TestHIP_DeepCopyAlignment.cpp index edfaa08d29e..43f9d4b5c30 100644 --- a/core/unit_test/qthreads/TestQthreads_DeepCopyAlignment.cpp +++ b/core/unit_test/hip/TestHIP_DeepCopyAlignment.cpp @@ -42,5 +42,5 @@ //@HEADER */ -#include +#include #include diff --git a/containers/unit_tests/cuda/TestCuda_ErrorReporter.cpp b/core/unit_test/hip/TestHIP_FunctorAnalysis.cpp similarity index 96% rename from containers/unit_tests/cuda/TestCuda_ErrorReporter.cpp rename to core/unit_test/hip/TestHIP_FunctorAnalysis.cpp index 806a3c6eccb..f3ae5f47ca9 100644 --- a/containers/unit_tests/cuda/TestCuda_ErrorReporter.cpp +++ b/core/unit_test/hip/TestHIP_FunctorAnalysis.cpp @@ -43,5 +43,5 @@ //@HEADER */ -#include -#include +#include +#include diff --git a/core/unit_test/hip/TestHIP_Init.cpp b/core/unit_test/hip/TestHIP_Init.cpp new file mode 100644 index 00000000000..16a73293b37 --- /dev/null +++ b/core/unit_test/hip/TestHIP_Init.cpp @@ -0,0 +1,49 @@ + +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include +#include +#include diff --git a/core/unit_test/hip/TestHIP_InterOp_Init.cpp b/core/unit_test/hip/TestHIP_InterOp_Init.cpp new file mode 100644 index 00000000000..0dc279fc78a --- /dev/null +++ b/core/unit_test/hip/TestHIP_InterOp_Init.cpp @@ -0,0 +1,85 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include + +namespace Test { + +__global__ void offset(int* p) { + int idx = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x; + if (idx < 100) { + p[idx] += idx; + } +} + +// Test whether allocations survive Kokkos initialize/finalize if done via Raw +// HIP. +TEST(hip, raw_hip_interop) { + int* p; + hipMalloc(&p, sizeof(int) * 100); + Kokkos::InitArguments arguments{-1, -1, -1, false}; + Kokkos::initialize(arguments); + + Kokkos::View> v(p, 100); + Kokkos::deep_copy(v, 5); + + Kokkos::finalize(); + + hipLaunchKernelGGL(offset, dim3(100), dim3(100), 0, 0, p); + HIP_SAFE_CALL(hipDeviceSynchronize()); + + int* h_p = new int[100]; + hipMemcpy(h_p, p, sizeof(int) * 100, hipMemcpyDefault); + HIP_SAFE_CALL(hipDeviceSynchronize()); + int64_t sum = 0; + int64_t sum_expect = 0; + for (int i = 0; i < 100; i++) { + sum += h_p[i]; + sum_expect += 5 + i; + } + + ASSERT_EQ(sum, sum_expect); +} +} // namespace Test diff --git a/core/unit_test/hip/TestHIP_LocalDeepCopy.cpp b/core/unit_test/hip/TestHIP_LocalDeepCopy.cpp new file mode 100644 index 00000000000..4fc7ca680b1 --- /dev/null +++ b/core/unit_test/hip/TestHIP_LocalDeepCopy.cpp @@ -0,0 +1,46 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include diff --git a/core/unit_test/qthreads/TestQqthreads_MDRange_a.cpp b/core/unit_test/hip/TestHIP_MDRange_a.cpp similarity index 97% rename from core/unit_test/qthreads/TestQqthreads_MDRange_a.cpp rename to core/unit_test/hip/TestHIP_MDRange_a.cpp index 7067d841e3e..6bf23f1b2c2 100644 --- a/core/unit_test/qthreads/TestQqthreads_MDRange_a.cpp +++ b/core/unit_test/hip/TestHIP_MDRange_a.cpp @@ -43,5 +43,5 @@ //@HEADER */ -#include +#include #include diff --git a/core/unit_test/qthreads/TestQqthreads_MDRange_b.cpp b/core/unit_test/hip/TestHIP_MDRange_b.cpp similarity index 97% rename from core/unit_test/qthreads/TestQqthreads_MDRange_b.cpp rename to core/unit_test/hip/TestHIP_MDRange_b.cpp index 1f4ba9c984f..d820fa02c9f 100644 --- a/core/unit_test/qthreads/TestQqthreads_MDRange_b.cpp +++ b/core/unit_test/hip/TestHIP_MDRange_b.cpp @@ -43,5 +43,5 @@ //@HEADER */ -#include +#include #include diff --git a/core/unit_test/qthreads/TestQqthreads_MDRange_c.cpp b/core/unit_test/hip/TestHIP_MDRange_c.cpp similarity index 97% rename from core/unit_test/qthreads/TestQqthreads_MDRange_c.cpp rename to core/unit_test/hip/TestHIP_MDRange_c.cpp index 37b4671d055..5c70cf6c6ce 100644 --- a/core/unit_test/qthreads/TestQqthreads_MDRange_c.cpp +++ b/core/unit_test/hip/TestHIP_MDRange_c.cpp @@ -43,5 +43,5 @@ //@HEADER */ -#include +#include #include diff --git a/core/unit_test/qthreads/TestQqthreads_MDRange_d.cpp b/core/unit_test/hip/TestHIP_MDRange_d.cpp similarity index 97% rename from core/unit_test/qthreads/TestQqthreads_MDRange_d.cpp rename to core/unit_test/hip/TestHIP_MDRange_d.cpp index 2bd1fbbfd51..a18fc24d118 100644 --- a/core/unit_test/qthreads/TestQqthreads_MDRange_d.cpp +++ b/core/unit_test/hip/TestHIP_MDRange_d.cpp @@ -43,5 +43,5 @@ //@HEADER */ -#include +#include #include diff --git a/core/unit_test/qthreads/TestQqthreads_MDRange_e.cpp b/core/unit_test/hip/TestHIP_MDRange_e.cpp similarity index 97% rename from core/unit_test/qthreads/TestQqthreads_MDRange_e.cpp rename to core/unit_test/hip/TestHIP_MDRange_e.cpp index 659dd23d7fd..fe011213d3c 100644 --- a/core/unit_test/qthreads/TestQqthreads_MDRange_e.cpp +++ b/core/unit_test/hip/TestHIP_MDRange_e.cpp @@ -43,5 +43,5 @@ //@HEADER */ -#include +#include #include diff --git a/containers/unit_tests/hpx/TestHPX_Vector.cpp b/core/unit_test/hip/TestHIP_RangePolicy.cpp similarity index 96% rename from containers/unit_tests/hpx/TestHPX_Vector.cpp rename to core/unit_test/hip/TestHIP_RangePolicy.cpp index d42cef4b288..5c874182099 100644 --- a/containers/unit_tests/hpx/TestHPX_Vector.cpp +++ b/core/unit_test/hip/TestHIP_RangePolicy.cpp @@ -43,5 +43,5 @@ //@HEADER */ -#include -#include +#include +#include diff --git a/core/unit_test/hip/TestHIP_RangePolicyRequire.cpp b/core/unit_test/hip/TestHIP_RangePolicyRequire.cpp new file mode 100644 index 00000000000..6cf9a36f9fd --- /dev/null +++ b/core/unit_test/hip/TestHIP_RangePolicyRequire.cpp @@ -0,0 +1,47 @@ + +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include diff --git a/core/unit_test/hip/TestHIP_Reducers_a.cpp b/core/unit_test/hip/TestHIP_Reducers_a.cpp new file mode 100644 index 00000000000..ec9331d50cf --- /dev/null +++ b/core/unit_test/hip/TestHIP_Reducers_a.cpp @@ -0,0 +1,46 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include diff --git a/core/unit_test/hip/TestHIP_Reducers_b.cpp b/core/unit_test/hip/TestHIP_Reducers_b.cpp new file mode 100644 index 00000000000..c51c76ae2f2 --- /dev/null +++ b/core/unit_test/hip/TestHIP_Reducers_b.cpp @@ -0,0 +1,46 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include diff --git a/core/unit_test/hip/TestHIP_Reducers_c.cpp b/core/unit_test/hip/TestHIP_Reducers_c.cpp new file mode 100644 index 00000000000..42c21390a2c --- /dev/null +++ b/core/unit_test/hip/TestHIP_Reducers_c.cpp @@ -0,0 +1,46 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include diff --git a/core/unit_test/hip/TestHIP_Reducers_d.cpp b/core/unit_test/hip/TestHIP_Reducers_d.cpp new file mode 100644 index 00000000000..6014405c376 --- /dev/null +++ b/core/unit_test/hip/TestHIP_Reducers_d.cpp @@ -0,0 +1,46 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include diff --git a/core/unit_test/hip/TestHIP_Reductions.cpp b/core/unit_test/hip/TestHIP_Reductions.cpp new file mode 100644 index 00000000000..8bf8cc63aa2 --- /dev/null +++ b/core/unit_test/hip/TestHIP_Reductions.cpp @@ -0,0 +1,47 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include +#include diff --git a/core/unit_test/hip/TestHIP_Scan.cpp b/core/unit_test/hip/TestHIP_Scan.cpp new file mode 100644 index 00000000000..8dc06c70605 --- /dev/null +++ b/core/unit_test/hip/TestHIP_Scan.cpp @@ -0,0 +1,47 @@ + +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include diff --git a/core/unit_test/qthreads/TestQthreads_SubView_a.cpp b/core/unit_test/hip/TestHIP_ScanUnit.cpp similarity index 56% rename from core/unit_test/qthreads/TestQthreads_SubView_a.cpp rename to core/unit_test/hip/TestHIP_ScanUnit.cpp index f3d3396520a..ea38596883f 100644 --- a/core/unit_test/qthreads/TestQthreads_SubView_a.cpp +++ b/core/unit_test/hip/TestHIP_ScanUnit.cpp @@ -1,3 +1,4 @@ + /* //@HEADER // ************************************************************************ @@ -42,74 +43,55 @@ //@HEADER */ -#include - -namespace Test { - -TEST_F(qthreads, view_subview_auto_1d_left) { -#if 0 - TestViewSubview::test_auto_1d< Kokkos::LayoutLeft, Kokkos::Qthreads >(); -#endif -} - -TEST_F(qthreads, view_subview_auto_1d_right) { -#if 0 - TestViewSubview::test_auto_1d< Kokkos::LayoutRight, Kokkos::Qthreads >(); -#endif -} - -TEST_F(qthreads, view_subview_auto_1d_stride) { -#if 0 - TestViewSubview::test_auto_1d< Kokkos::LayoutStride, Kokkos::Qthreads >(); -#endif -} - -TEST_F(qthreads, view_subview_assign_strided) { -#if 0 - TestViewSubview::test_1d_strided_assignment< Kokkos::Qthreads >(); -#endif -} - -TEST_F(qthreads, view_subview_left_0) { -#if 0 - TestViewSubview::test_left_0< Kokkos::Qthreads >(); -#endif -} +#include +#include -TEST_F(qthreads, view_subview_left_1) { -#if 0 - TestViewSubview::test_left_1< Kokkos::Qthreads >(); -#endif -} +struct DummyFunctor { + using value_type = int; + void operator()(const int, value_type &, bool) const {} +}; -TEST_F(qthreads, view_subview_left_2) { -#if 0 - TestViewSubview::test_left_2< Kokkos::Qthreads >(); -#endif -} +template +__global__ void start_intra_block_scan() { + __shared__ DummyFunctor::value_type values[N]; + const int i = hipThreadIdx_y; + values[i] = i + 1; + __syncthreads(); -TEST_F(qthreads, view_subview_left_3) { -#if 0 - TestViewSubview::test_left_3< Kokkos::Qthreads >(); -#endif -} + DummyFunctor f; + Kokkos::Impl::hip_intra_block_reduce_scan(f, + values); -TEST_F(qthreads, view_subview_right_0) { -#if 0 - TestViewSubview::test_right_0< Kokkos::Qthreads >(); -#endif + __syncthreads(); + if (values[i] != ((i + 2) * (i + 1)) / 2) { + printf("Value for %d should be %d but is %d\n", i, ((i + 2) * (i + 1)) / 2, + values[i]); + Kokkos::abort("Test for intra_block_reduce_scan failed!"); + } } -TEST_F(qthreads, view_subview_right_1) { -#if 0 - TestViewSubview::test_right_1< Kokkos::Qthreads >(); -#endif +template +void test_intra_block_scan() { + dim3 grid(1, 1, 1); + dim3 block(1, N, 1); + hipLaunchKernelGGL(start_intra_block_scan, grid, block, 0, 0); } -TEST_F(qthreads, view_subview_right_3) { -#if 0 - TestViewSubview::test_right_3< Kokkos::Qthreads >(); -#endif +TEST(TEST_CATEGORY, scan_unit) { + if (std::is_same< + TEST_EXECSPACE, + typename Kokkos::Experimental::HIPSpace::execution_space>::value) { + test_intra_block_scan<1>(); + test_intra_block_scan<2>(); + test_intra_block_scan<4>(); + test_intra_block_scan<8>(); + test_intra_block_scan<16>(); + test_intra_block_scan<32>(); + test_intra_block_scan<64>(); + test_intra_block_scan<128>(); + test_intra_block_scan<256>(); + // FIXME_HIP block sizes larger than 256 give wrong results. + // test_intra_block_scan<512>(); + // test_intra_block_scan<1024>(); + } } - -} // namespace Test diff --git a/core/unit_test/qthreads/TestQthreads_SubView_c03.cpp b/core/unit_test/hip/TestHIP_SharedAlloc.cpp similarity index 90% rename from core/unit_test/qthreads/TestQthreads_SubView_c03.cpp rename to core/unit_test/hip/TestHIP_SharedAlloc.cpp index 89b68ae42c1..c66e01fe0a0 100644 --- a/core/unit_test/qthreads/TestQthreads_SubView_c03.cpp +++ b/core/unit_test/hip/TestHIP_SharedAlloc.cpp @@ -42,14 +42,14 @@ //@HEADER */ -#include +#include +#include namespace Test { -TEST_F(qthreads, view_subview_1d_assign_randomaccess) { -#if 0 - TestViewSubview::test_1d_assign< Kokkos::Qthreads, Kokkos::MemoryTraits >(); -#endif +TEST(TEST_CATEGORY, impl_shared_alloc) { + test_shared_alloc(); } } // namespace Test diff --git a/core/unit_test/hip/TestHIP_Spaces.cpp b/core/unit_test/hip/TestHIP_Spaces.cpp new file mode 100644 index 00000000000..f13400d096d --- /dev/null +++ b/core/unit_test/hip/TestHIP_Spaces.cpp @@ -0,0 +1,233 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include + +namespace Test { + +__global__ void test_abort() { Kokkos::abort("test_abort"); } + +__global__ void test_hip_spaces_int_value(int *ptr) { + if (*ptr == 42) { + *ptr = 2 * 42; + } +} + +TEST(hip, space_access) { + static_assert(Kokkos::Impl::MemorySpaceAccess::assignable, + ""); + + static_assert(Kokkos::Impl::MemorySpaceAccess< + Kokkos::HostSpace, + Kokkos::Experimental::HIPHostPinnedSpace>::assignable, + ""); + + static_assert( + !Kokkos::Impl::MemorySpaceAccess< + Kokkos::HostSpace, Kokkos::Experimental::HIPSpace>::assignable, + ""); + + static_assert( + !Kokkos::Impl::MemorySpaceAccess< + Kokkos::HostSpace, Kokkos::Experimental::HIPSpace>::accessible, + ""); + + //-------------------------------------- + + static_assert(Kokkos::Impl::MemorySpaceAccess< + Kokkos::Experimental::HIPSpace, + Kokkos::Experimental::HIPSpace>::assignable, + ""); + + static_assert(!Kokkos::Impl::MemorySpaceAccess< + Kokkos::Experimental::HIPSpace, + Kokkos::Experimental::HIPHostPinnedSpace>::assignable, + ""); + + static_assert(Kokkos::Impl::MemorySpaceAccess< + Kokkos::Experimental::HIPSpace, + Kokkos::Experimental::HIPHostPinnedSpace>::accessible, + ""); + + static_assert(!Kokkos::Impl::MemorySpaceAccess::assignable, + ""); + + static_assert(!Kokkos::Impl::MemorySpaceAccess::accessible, + ""); + + //-------------------------------------- + + static_assert(Kokkos::Impl::MemorySpaceAccess< + Kokkos::Experimental::HIPHostPinnedSpace, + Kokkos::Experimental::HIPHostPinnedSpace>::assignable, + ""); + + static_assert( + !Kokkos::Impl::MemorySpaceAccess::assignable, + ""); + + static_assert( + Kokkos::Impl::MemorySpaceAccess::accessible, + ""); + + static_assert(!Kokkos::Impl::MemorySpaceAccess< + Kokkos::Experimental::HIPHostPinnedSpace, + Kokkos::Experimental::HIPSpace>::assignable, + ""); + + static_assert(!Kokkos::Impl::MemorySpaceAccess< + Kokkos::Experimental::HIPHostPinnedSpace, + Kokkos::Experimental::HIPSpace>::accessible, + ""); + + //-------------------------------------- + + static_assert( + !Kokkos::Impl::SpaceAccessibility::accessible, + ""); + + static_assert(Kokkos::Impl::SpaceAccessibility< + Kokkos::Experimental::HIP, + Kokkos::Experimental::HIPSpace>::accessible, + ""); + + static_assert(Kokkos::Impl::SpaceAccessibility< + Kokkos::Experimental::HIP, + Kokkos::Experimental::HIPHostPinnedSpace>::accessible, + ""); + + static_assert( + !Kokkos::Impl::SpaceAccessibility< + Kokkos::HostSpace, Kokkos::Experimental::HIPSpace>::accessible, + ""); + + static_assert(Kokkos::Impl::SpaceAccessibility< + Kokkos::HostSpace, + Kokkos::Experimental::HIPHostPinnedSpace>::accessible, + ""); + + static_assert( + std::is_same< + Kokkos::Impl::HostMirror::Space, + Kokkos::HostSpace>::value, + ""); + + static_assert( + std::is_same::Space, + Kokkos::Experimental::HIPHostPinnedSpace>::value, + ""); + + static_assert(Kokkos::Impl::SpaceAccessibility< + Kokkos::Impl::HostMirror::Space, + Kokkos::HostSpace>::accessible, + ""); + + static_assert( + Kokkos::Impl::SpaceAccessibility< + Kokkos::Impl::HostMirror::Space, + Kokkos::HostSpace>::accessible, + ""); + + static_assert(Kokkos::Impl::SpaceAccessibility< + Kokkos::Impl::HostMirror< + Kokkos::Experimental::HIPHostPinnedSpace>::Space, + Kokkos::HostSpace>::accessible, + ""); +} + +template +struct TestViewHIPAccessible { + enum { N = 1000 }; + + using V = Kokkos::View; + + V m_base; + + struct TagInit {}; + struct TagTest {}; + + KOKKOS_INLINE_FUNCTION + void operator()(const TagInit &, const int i) const { m_base[i] = i + 1; } + + KOKKOS_INLINE_FUNCTION + void operator()(const TagTest &, const int i, long &error_count) const { + if (m_base[i] != i + 1) ++error_count; + } + + TestViewHIPAccessible() : m_base("base", N) {} + + static void run() { + TestViewHIPAccessible self; + Kokkos::parallel_for( + Kokkos::RangePolicy(0, N), + self); + typename MemSpace::execution_space().fence(); + + // Next access is a different execution space, must complete prior kernel. + long error_count = -1; + Kokkos::parallel_reduce(Kokkos::RangePolicy(0, N), self, + error_count); + EXPECT_EQ(error_count, 0); + } +}; + +TEST(hip, impl_view_accessible) { + TestViewHIPAccessible::run(); + + TestViewHIPAccessible::run(); + TestViewHIPAccessible::run(); +} + +} // namespace Test diff --git a/core/unit_test/hip/TestHIP_SubView_a.cpp b/core/unit_test/hip/TestHIP_SubView_a.cpp new file mode 100644 index 00000000000..df7b474afd8 --- /dev/null +++ b/core/unit_test/hip/TestHIP_SubView_a.cpp @@ -0,0 +1,102 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include + +namespace Test { + +TEST(TEST_CATEGORY, view_subview_auto_1d_left) { + TestViewSubview::test_auto_1d(); +} + +TEST(TEST_CATEGORY, view_subview_auto_1d_right) { + TestViewSubview::test_auto_1d(); +} + +TEST(TEST_CATEGORY, view_subview_auto_1d_stride) { + TestViewSubview::test_auto_1d(); +} + +TEST(TEST_CATEGORY, view_subview_assign_strided) { + TestViewSubview::test_1d_strided_assignment(); +} + +TEST(TEST_CATEGORY, view_subview_left_0) { + TestViewSubview::test_left_0(); +} + +TEST(TEST_CATEGORY, view_subview_left_1) { + TestViewSubview::test_left_1(); +} + +TEST(TEST_CATEGORY, view_subview_left_2) { + TestViewSubview::test_left_2(); +} + +TEST(TEST_CATEGORY, view_subview_left_3) { + TestViewSubview::test_left_3(); +} + +TEST(TEST_CATEGORY, view_subview_right_0) { + TestViewSubview::test_right_0(); +} + +TEST(TEST_CATEGORY, view_subview_right_1) { + TestViewSubview::test_right_1(); +} + +TEST(TEST_CATEGORY, view_subview_right_3) { + TestViewSubview::test_right_3(); +} + +TEST(TEST_CATEGORY, view_static_tests) { + TestViewSubview::TestSubviewStaticSizes()(); + TestViewSubview::TestSubviewStaticSizes()(); + TestViewSubview::TestExtentsStaticTests(); +} + +} // namespace Test diff --git a/core/unit_test/qthreads/TestQthreads_SubView_b.cpp b/core/unit_test/hip/TestHIP_SubView_b.cpp similarity index 71% rename from core/unit_test/qthreads/TestQthreads_SubView_b.cpp rename to core/unit_test/hip/TestHIP_SubView_b.cpp index 6ef5dc4cf12..5fdaefcf21e 100644 --- a/core/unit_test/qthreads/TestQthreads_SubView_b.cpp +++ b/core/unit_test/hip/TestHIP_SubView_b.cpp @@ -42,24 +42,25 @@ //@HEADER */ -#include +#include +#include namespace Test { -TEST_F(qthreads, view_subview_layoutleft_to_layoutleft) { -#if 0 - TestViewSubview::test_layoutleft_to_layoutleft< Kokkos::Qthreads >(); - TestViewSubview::test_layoutleft_to_layoutleft< Kokkos::Qthreads, Kokkos::MemoryTraits >(); - TestViewSubview::test_layoutleft_to_layoutleft< Kokkos::Qthreads, Kokkos::MemoryTraits >(); -#endif +TEST(TEST_CATEGORY, view_subview_layoutleft_to_layoutleft) { + TestViewSubview::test_layoutleft_to_layoutleft(); + TestViewSubview::test_layoutleft_to_layoutleft< + TEST_EXECSPACE, Kokkos::MemoryTraits >(); + TestViewSubview::test_layoutleft_to_layoutleft< + TEST_EXECSPACE, Kokkos::MemoryTraits >(); } -TEST_F(qthreads, view_subview_layoutright_to_layoutright) { -#if 0 - TestViewSubview::test_layoutright_to_layoutright< Kokkos::Qthreads >(); - TestViewSubview::test_layoutright_to_layoutright< Kokkos::Qthreads, Kokkos::MemoryTraits >(); - TestViewSubview::test_layoutright_to_layoutright< Kokkos::Qthreads, Kokkos::MemoryTraits >(); -#endif +TEST(TEST_CATEGORY, view_subview_layoutright_to_layoutright) { + TestViewSubview::test_layoutright_to_layoutright(); + TestViewSubview::test_layoutright_to_layoutright< + TEST_EXECSPACE, Kokkos::MemoryTraits >(); + TestViewSubview::test_layoutright_to_layoutright< + TEST_EXECSPACE, Kokkos::MemoryTraits >(); } } // namespace Test diff --git a/core/unit_test/qthreads/TestQthreads_SubView_c07.cpp b/core/unit_test/hip/TestHIP_SubView_c01.cpp similarity index 91% rename from core/unit_test/qthreads/TestQthreads_SubView_c07.cpp rename to core/unit_test/hip/TestHIP_SubView_c01.cpp index 5857f67d19d..79bb42bc596 100644 --- a/core/unit_test/qthreads/TestQthreads_SubView_c07.cpp +++ b/core/unit_test/hip/TestHIP_SubView_c01.cpp @@ -42,14 +42,13 @@ //@HEADER */ -#include +#include +#include namespace Test { -TEST_F(qthreads, view_subview_3d_from_5d_left) { -#if 0 - TestViewSubview::test_3d_subview_5d_left< Kokkos::Qthreads >(); -#endif +TEST(TEST_CATEGORY, view_subview_1d_assign) { + TestViewSubview::test_1d_assign(); } } // namespace Test diff --git a/core/unit_test/hip/TestHIP_SubView_c02.cpp b/core/unit_test/hip/TestHIP_SubView_c02.cpp new file mode 100644 index 00000000000..c38dee484e3 --- /dev/null +++ b/core/unit_test/hip/TestHIP_SubView_c02.cpp @@ -0,0 +1,55 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include + +namespace Test { + +TEST(TEST_CATEGORY, view_subview_1d_assign_atomic) { + TestViewSubview::test_1d_assign >(); +} + +} // namespace Test diff --git a/core/unit_test/hip/TestHIP_SubView_c03.cpp b/core/unit_test/hip/TestHIP_SubView_c03.cpp new file mode 100644 index 00000000000..c35f786d38f --- /dev/null +++ b/core/unit_test/hip/TestHIP_SubView_c03.cpp @@ -0,0 +1,55 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include + +namespace Test { + +TEST(TEST_CATEGORY, view_subview_1d_assign_randomaccess) { + TestViewSubview::test_1d_assign< + TEST_EXECSPACE, Kokkos::MemoryTraits >(); +} + +} // namespace Test diff --git a/core/unit_test/qthreads/TestQthreads_SubView_c04.cpp b/core/unit_test/hip/TestHIP_SubView_c04.cpp similarity index 91% rename from core/unit_test/qthreads/TestQthreads_SubView_c04.cpp rename to core/unit_test/hip/TestHIP_SubView_c04.cpp index 0ee4d6887c9..3a0cafdbe11 100644 --- a/core/unit_test/qthreads/TestQthreads_SubView_c04.cpp +++ b/core/unit_test/hip/TestHIP_SubView_c04.cpp @@ -42,14 +42,13 @@ //@HEADER */ -#include +#include +#include namespace Test { -TEST_F(qthreads, view_subview_2d_from_3d) { -#if 0 - TestViewSubview::test_2d_subview_3d< Kokkos::Qthreads >(); -#endif +TEST(TEST_CATEGORY, view_subview_2d_from_3d) { + TestViewSubview::test_2d_subview_3d(); } } // namespace Test diff --git a/core/unit_test/hip/TestHIP_SubView_c05.cpp b/core/unit_test/hip/TestHIP_SubView_c05.cpp new file mode 100644 index 00000000000..7b3710f428e --- /dev/null +++ b/core/unit_test/hip/TestHIP_SubView_c05.cpp @@ -0,0 +1,55 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include + +namespace Test { + +TEST(TEST_CATEGORY, view_subview_2d_from_3d_atomic) { + TestViewSubview::test_2d_subview_3d >(); +} + +} // namespace Test diff --git a/core/unit_test/hip/TestHIP_SubView_c06.cpp b/core/unit_test/hip/TestHIP_SubView_c06.cpp new file mode 100644 index 00000000000..f0ac19f2c45 --- /dev/null +++ b/core/unit_test/hip/TestHIP_SubView_c06.cpp @@ -0,0 +1,55 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include + +namespace Test { + +TEST(TEST_CATEGORY, view_subview_2d_from_3d_randomaccess) { + TestViewSubview::test_2d_subview_3d< + TEST_EXECSPACE, Kokkos::MemoryTraits >(); +} + +} // namespace Test diff --git a/core/unit_test/qthreads/TestQthreads_SubView_c01.cpp b/core/unit_test/hip/TestHIP_SubView_c07.cpp similarity index 91% rename from core/unit_test/qthreads/TestQthreads_SubView_c01.cpp rename to core/unit_test/hip/TestHIP_SubView_c07.cpp index 1f560c24f68..b9743ab0a43 100644 --- a/core/unit_test/qthreads/TestQthreads_SubView_c01.cpp +++ b/core/unit_test/hip/TestHIP_SubView_c07.cpp @@ -42,14 +42,13 @@ //@HEADER */ -#include +#include +#include namespace Test { -TEST_F(qthreads, view_subview_1d_assign) { -#if 0 - TestViewSubview::test_1d_assign< Kokkos::Qthreads >(); -#endif +TEST(TEST_CATEGORY, view_subview_3d_from_5d_left) { + TestViewSubview::test_3d_subview_5d_left(); } } // namespace Test diff --git a/core/unit_test/hip/TestHIP_SubView_c08.cpp b/core/unit_test/hip/TestHIP_SubView_c08.cpp new file mode 100644 index 00000000000..30bef76d7fc --- /dev/null +++ b/core/unit_test/hip/TestHIP_SubView_c08.cpp @@ -0,0 +1,55 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include + +namespace Test { + +TEST(TEST_CATEGORY, view_subview_3d_from_5d_left_atomic) { + TestViewSubview::test_3d_subview_5d_left< + TEST_EXECSPACE, Kokkos::MemoryTraits >(); +} + +} // namespace Test diff --git a/core/unit_test/hip/TestHIP_SubView_c09.cpp b/core/unit_test/hip/TestHIP_SubView_c09.cpp new file mode 100644 index 00000000000..2e63849f35b --- /dev/null +++ b/core/unit_test/hip/TestHIP_SubView_c09.cpp @@ -0,0 +1,55 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include + +namespace Test { + +TEST(TEST_CATEGORY, view_subview_3d_from_5d_left_randomaccess) { + TestViewSubview::test_3d_subview_5d_left< + TEST_EXECSPACE, Kokkos::MemoryTraits >(); +} + +} // namespace Test diff --git a/core/unit_test/hip/TestHIP_SubView_c10.cpp b/core/unit_test/hip/TestHIP_SubView_c10.cpp new file mode 100644 index 00000000000..dae65a4d3b2 --- /dev/null +++ b/core/unit_test/hip/TestHIP_SubView_c10.cpp @@ -0,0 +1,54 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include + +namespace Test { + +TEST(TEST_CATEGORY, view_subview_3d_from_5d_right) { + TestViewSubview::test_3d_subview_5d_right(); +} + +} // namespace Test diff --git a/core/unit_test/hip/TestHIP_SubView_c11.cpp b/core/unit_test/hip/TestHIP_SubView_c11.cpp new file mode 100644 index 00000000000..570658bf550 --- /dev/null +++ b/core/unit_test/hip/TestHIP_SubView_c11.cpp @@ -0,0 +1,55 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include + +namespace Test { + +TEST(TEST_CATEGORY, view_subview_3d_from_5d_right_atomic) { + TestViewSubview::test_3d_subview_5d_right< + TEST_EXECSPACE, Kokkos::MemoryTraits >(); +} + +} // namespace Test diff --git a/core/unit_test/hip/TestHIP_SubView_c12.cpp b/core/unit_test/hip/TestHIP_SubView_c12.cpp new file mode 100644 index 00000000000..4e943c68f08 --- /dev/null +++ b/core/unit_test/hip/TestHIP_SubView_c12.cpp @@ -0,0 +1,55 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include + +namespace Test { + +TEST(TEST_CATEGORY, view_subview_3d_from_5d_right_randomaccess) { + TestViewSubview::test_3d_subview_5d_right< + TEST_EXECSPACE, Kokkos::MemoryTraits >(); +} + +} // namespace Test diff --git a/core/unit_test/qthreads/TestQthreads_SubView_c02.cpp b/core/unit_test/hip/TestHIP_SubView_c13.cpp similarity index 90% rename from core/unit_test/qthreads/TestQthreads_SubView_c02.cpp rename to core/unit_test/hip/TestHIP_SubView_c13.cpp index e4065d22b27..52a065f2bd5 100644 --- a/core/unit_test/qthreads/TestQthreads_SubView_c02.cpp +++ b/core/unit_test/hip/TestHIP_SubView_c13.cpp @@ -42,14 +42,13 @@ //@HEADER */ -#include +#include +#include namespace Test { -TEST_F(qthreads, view_subview_1d_assign_atomic) { -#if 0 - TestViewSubview::test_1d_assign< Kokkos::Qthreads, Kokkos::MemoryTraits >(); -#endif +TEST(TEST_CATEGORY, view_test_unmanaged_subview_reset) { + TestViewSubview::test_unmanaged_subview_reset(); } } // namespace Test diff --git a/core/unit_test/hip/TestHIP_ViewAPI_c.cpp b/core/unit_test/hip/TestHIP_ViewAPI_c.cpp new file mode 100644 index 00000000000..4dc72a07868 --- /dev/null +++ b/core/unit_test/hip/TestHIP_ViewAPI_c.cpp @@ -0,0 +1,46 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include diff --git a/core/unit_test/hip/TestHIP_ViewAPI_d.cpp b/core/unit_test/hip/TestHIP_ViewAPI_d.cpp new file mode 100644 index 00000000000..c96d39feeb7 --- /dev/null +++ b/core/unit_test/hip/TestHIP_ViewAPI_d.cpp @@ -0,0 +1,46 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include diff --git a/core/unit_test/hip/TestHIP_ViewMapping_a.cpp b/core/unit_test/hip/TestHIP_ViewMapping_a.cpp new file mode 100644 index 00000000000..146f06fc6e9 --- /dev/null +++ b/core/unit_test/hip/TestHIP_ViewMapping_a.cpp @@ -0,0 +1,46 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include diff --git a/core/unit_test/hip/TestHIP_ViewMapping_b.cpp b/core/unit_test/hip/TestHIP_ViewMapping_b.cpp new file mode 100644 index 00000000000..00f931ce9b5 --- /dev/null +++ b/core/unit_test/hip/TestHIP_ViewMapping_b.cpp @@ -0,0 +1,46 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include diff --git a/core/unit_test/hip/TestHIP_ViewMapping_subview.cpp b/core/unit_test/hip/TestHIP_ViewMapping_subview.cpp new file mode 100644 index 00000000000..f0e76e0400d --- /dev/null +++ b/core/unit_test/hip/TestHIP_ViewMapping_subview.cpp @@ -0,0 +1,46 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include diff --git a/core/unit_test/hip/TestHIP_ViewOfClass.cpp b/core/unit_test/hip/TestHIP_ViewOfClass.cpp new file mode 100644 index 00000000000..c673ffb354c --- /dev/null +++ b/core/unit_test/hip/TestHIP_ViewOfClass.cpp @@ -0,0 +1,46 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include diff --git a/core/unit_test/hip/TestHIP_ViewResize.cpp b/core/unit_test/hip/TestHIP_ViewResize.cpp new file mode 100644 index 00000000000..35e3125915a --- /dev/null +++ b/core/unit_test/hip/TestHIP_ViewResize.cpp @@ -0,0 +1,46 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include diff --git a/core/unit_test/hip/TestHIP_View_64bit.cpp b/core/unit_test/hip/TestHIP_View_64bit.cpp new file mode 100644 index 00000000000..fed256043fc --- /dev/null +++ b/core/unit_test/hip/TestHIP_View_64bit.cpp @@ -0,0 +1,46 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include diff --git a/containers/unit_tests/hpx/TestHPX_BitSet.cpp b/core/unit_test/hpx/TestHPX_Concepts.cpp similarity index 98% rename from containers/unit_tests/hpx/TestHPX_BitSet.cpp rename to core/unit_test/hpx/TestHPX_Concepts.cpp index 179cfcc7b48..197a99a1e49 100644 --- a/containers/unit_tests/hpx/TestHPX_BitSet.cpp +++ b/core/unit_test/hpx/TestHPX_Concepts.cpp @@ -1,4 +1,3 @@ - /* //@HEADER // ************************************************************************ @@ -44,4 +43,4 @@ */ #include -#include +#include diff --git a/containers/unit_tests/hpx/TestHPX_DualView.cpp b/core/unit_test/hpx/TestHPX_RangePolicyRequire.cpp similarity index 98% rename from containers/unit_tests/hpx/TestHPX_DualView.cpp rename to core/unit_test/hpx/TestHPX_RangePolicyRequire.cpp index 368cdde95c6..c76750e8fde 100644 --- a/containers/unit_tests/hpx/TestHPX_DualView.cpp +++ b/core/unit_test/hpx/TestHPX_RangePolicyRequire.cpp @@ -44,4 +44,4 @@ */ #include -#include +#include diff --git a/core/unit_test/hpx/TestHPX_TeamScratch.cpp b/core/unit_test/hpx/TestHPX_TeamScratch.cpp index 14cbfb8f417..232bed509bc 100644 --- a/core/unit_test/hpx/TestHPX_TeamScratch.cpp +++ b/core/unit_test/hpx/TestHPX_TeamScratch.cpp @@ -58,7 +58,6 @@ TEST(TEST_CATEGORY, team_scratch_request) { } #if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) -#if !defined(KOKKOS_ENABLE_CUDA) || (8000 <= CUDA_VERSION) TEST(TEST_CATEGORY, team_lambda_shared_request) { TestLambdaSharedTeam >(); @@ -66,7 +65,6 @@ TEST(TEST_CATEGORY, team_lambda_shared_request) { Kokkos::Schedule >(); } #endif -#endif TEST(TEST_CATEGORY, shmem_size) { TestShmemSize(); } diff --git a/core/unit_test/incremental/Test01_execspace.hpp b/core/unit_test/incremental/Test01_execspace.hpp new file mode 100644 index 00000000000..9f118bfb49f --- /dev/null +++ b/core/unit_test/incremental/Test01_execspace.hpp @@ -0,0 +1,111 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +/// @Kokkos_Feature_Level_Required:1 + +#include +#include +#include +#include +#include + +namespace Test { + +// Unit test for Execution Space +// Test1 - testing for memory_space, execution_space, scratch space and +// array_layout of an execution space +// Test2 - Test if the is_execution_space evaluation is working correctly + +template +struct TestIncrExecSpaceTypedef { + void testit() { + const bool passed = + (!std::is_same::value) && + std::is_same::value && + !std::is_same::value && + !std::is_same::value; + static_assert(passed == true, + "The memory and execution spaces are defined"); + } +}; + +template +struct TestIncrExecSpace { + void testit() { + typedef typename ExecSpace::device_type device_type; + typedef typename device_type::memory_space memory_space; + typedef typename device_type::execution_space execution_space; + + const bool passed = + std::is_same>::value; + + static_assert(passed == true, + "Checking if the is_execution_space is evaluated correctly"); + + ExecSpace().print_configuration(std::cout); + ExecSpace().fence(); + + auto concurrency = ExecSpace().concurrency(); + ASSERT_TRUE(concurrency > 0); + + int in_parallel = ExecSpace::in_parallel(); + ASSERT_FALSE(in_parallel); + + const char* name = ExecSpace::name(); + std::cout << name << std::endl; + } +}; + +TEST(TEST_CATEGORY, IncrTest_01_execspace_typedef) { + TestIncrExecSpaceTypedef test; + test.testit(); +} + +TEST(TEST_CATEGORY, IncrTest_01_execspace) { + ASSERT_TRUE(Kokkos::is_execution_space::value); + ASSERT_FALSE(Kokkos::is_execution_space< + TestIncrExecSpaceTypedef>::value); +} +} // namespace Test diff --git a/core/unit_test/incremental/Test02_atomic_host.hpp b/core/unit_test/incremental/Test02_atomic_host.hpp new file mode 100644 index 00000000000..4617ede99e6 --- /dev/null +++ b/core/unit_test/incremental/Test02_atomic_host.hpp @@ -0,0 +1,97 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +/// @Kokkos_Feature_Level_Required:2 +// Unit test for atomic exchange, atomic add and atomic sub. +// Atomic exchange test : we interchange value1 with value2 and check for +// correctness. Atomic add test : we add value2 to value1 and check for +// correctness. Atomic sub test : we subtract value2 from value1 and check for +// correctmess. + +#include +#include + +using value_type = double; + +namespace Test { + +struct TestIncrAtomic { + value_type value1 = 1.5, value2 = 0.5; + + void testExchange() { + value_type ret_value = Kokkos::atomic_exchange(&value1, value2); + + ASSERT_EQ(value1, 0.5); + ASSERT_EQ(ret_value, 1.5); + } + + void testAdd() { + Kokkos::atomic_add(&value1, value2); + + ASSERT_EQ(value1, 2.0); + } + + void testSub() { + Kokkos::atomic_sub(&value1, value2); + + ASSERT_EQ(value1, 1.0); + } +}; + +TEST(TEST_CATEGORY, IncrTest_01_AtomicExchange) { + TestIncrAtomic test; + test.testExchange(); +} + +TEST(TEST_CATEGORY, IncrTest_02_AtomicAdd) { + TestIncrAtomic test; + test.testAdd(); +} + +TEST(TEST_CATEGORY, IncrTest_02_AtomicSub) { + TestIncrAtomic test; + test.testSub(); +} + +} // namespace Test diff --git a/core/unit_test/incremental/Test03a_MemorySpace_malloc.hpp b/core/unit_test/incremental/Test03a_MemorySpace_malloc.hpp new file mode 100644 index 00000000000..da808be2191 --- /dev/null +++ b/core/unit_test/incremental/Test03a_MemorySpace_malloc.hpp @@ -0,0 +1,80 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include + +/// @Kokkos_Feature_Level_Required:3 +// Unit Test for Kokkos malloc. +// Allocate memory to a pointer and check if the allocation has not returned a +// null pointer. + +namespace Test { + +using value_type = double; +const int num_elements = 10; + +template +struct TestIncrMemorySpace_malloc { + using memory_space = typename ExecSpace::memory_space; + + void test_malloc() { + // Allocate memory + auto *data = static_cast(Kokkos::kokkos_malloc( + "data", num_elements * sizeof(value_type))); + + // Check if the allocated memory has not returned a NULL + ASSERT_NE(data, nullptr); + + // Free the allocated memory + Kokkos::kokkos_free(data); + } +}; + +TEST(TEST_CATEGORY, IncrTest_03a_memspace_malloc) { + TestIncrMemorySpace_malloc test; + test.test_malloc(); +} + +} // namespace Test diff --git a/core/unit_test/incremental/Test03b_MemorySpace_free.hpp b/core/unit_test/incremental/Test03b_MemorySpace_free.hpp new file mode 100644 index 00000000000..f7ee76ec9de --- /dev/null +++ b/core/unit_test/incremental/Test03b_MemorySpace_free.hpp @@ -0,0 +1,83 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +/// @Kokkos_Feature_Level_Required:3 +// Unit test for Kokkos free. +// We constantly allocate and free the memory. +// If the kokkos_free does not free the allocated memory, +// we will exceed the available space. + +#include +#include + +namespace Test { + +using value_type = double; + +// Allocate M number of value_type elements N number of times. +const int N = 100000; +const int M = 100000; + +template +struct TestIncrMemorySpace_free { + using memory_space = typename ExecSpace::memory_space; + + void test_free() { + for (int i = 0; i < N; ++i) { + auto *data = static_cast( + Kokkos::kokkos_malloc("data", M * sizeof(value_type))); + + ASSERT_NE(data, nullptr); + + Kokkos::kokkos_free(data); + } + } +}; + +TEST(TEST_CATEGORY, IncrTest_03b_memspace_free) { + TestIncrMemorySpace_free test; + test.test_free(); +} + +} // namespace Test diff --git a/core/unit_test/incremental/Test04_ParallelFor_RangePolicy.hpp b/core/unit_test/incremental/Test04_ParallelFor_RangePolicy.hpp new file mode 100644 index 00000000000..5e50b51dd1e --- /dev/null +++ b/core/unit_test/incremental/Test04_ParallelFor_RangePolicy.hpp @@ -0,0 +1,169 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include + +/// @Kokkos_Feature_Level_Required:4 +// parallel-for unit test. +// In this test, different elements of an array are updated by different +// threads. + +namespace Test { + +using value_type = double; +int num_elements = 10; +const value_type value = 0.5; + +struct ParallelForFunctor { + value_type *_data; + + ParallelForFunctor(value_type *data) : _data(data) {} + + KOKKOS_INLINE_FUNCTION + void operator()(const int i) const { _data[i] = (i + 1) * value; } +}; + +template +struct TestParallel_For { + // Memory space type for Device and Host data + using d_memspace_type = typename ExecSpace::memory_space; + using h_memspace_type = Kokkos::HostSpace; + + value_type *deviceData, *hostData; + + // Check if the array values are updated correctly. + void correctness_check(value_type *data) { + for (int i = 0; i < num_elements; ++i) { + ASSERT_EQ((i + 1) * value, data[i]) + << "Values in index " << i << " are incorrect"; + } + } + + // Routine to allocate memory in a specific memory space. + template + value_type *allocate_mem(int N) { + return (static_cast( + Kokkos::kokkos_malloc("deviceData", N * sizeof(value_type)))); + } + + // Routine to free the memory from a specific memory space. + template + void free_mem(value_type *data) { + Kokkos::kokkos_free(data); + } + + void init() { + // Allocate memory on Device space. + deviceData = allocate_mem(num_elements); + ASSERT_NE(deviceData, nullptr); + + // Allocate memory on Host space. + hostData = allocate_mem(num_elements); + ASSERT_NE(hostData, nullptr); + } + + void check_correctness_and_cleanup() { + // Copy the data back to Host memory space + Kokkos::Impl::DeepCopy( + hostData, deviceData, num_elements * sizeof(value_type)); + + // Check if all data has been update correctly + correctness_check(hostData); + + // free the allocated memory + free_mem(deviceData); + free_mem(hostData); + } + + // A simple parallel for test with functors + void simple_test() { + // Allocates memory for num_elements number of value_type elements in the + // host and device memory spaces. + init(); + + // parallel-for functor called for num_elements number of iterations. + Kokkos::parallel_for("parallel_for", num_elements, + ParallelForFunctor(deviceData)); + + Kokkos::fence(); + // Checks if parallel_for gave the correct results. + // Frees the allocated memory in init(). + check_correctness_and_cleanup(); + } + + // A parallel_for test with user defined RangePolicy + void range_policy() { + // Allocates memory for num_elements number of value_type elements in the + // host and device memory spaces. + init(); + + // Creates a range policy that uses dynamic scheduling. + typedef Kokkos::RangePolicy > + range_policy_t; + + // parallel-for functor with range-policy from 0 to num_elements iterations. + Kokkos::parallel_for("RangePolicy_ParallelFor", + range_policy_t(0, num_elements), + ParallelForFunctor(deviceData)); + + // Checks if parallel_for gave the correct results. + // Free the allocated memory in init(). + check_correctness_and_cleanup(); + } +}; + +TEST(TEST_CATEGORY, IncrTest_04_simple_parallelFor) { + if (std::is_same::value) { + TestParallel_For test; + test.simple_test(); + } +} + +TEST(TEST_CATEGORY, IncrTest_04_RangePolicy_parallelFor) { + TestParallel_For test; + test.range_policy(); +} + +} // namespace Test diff --git a/core/unit_test/incremental/Test05_ParallelReduce_RangePolicy.hpp b/core/unit_test/incremental/Test05_ParallelReduce_RangePolicy.hpp new file mode 100644 index 00000000000..7c147e47cc3 --- /dev/null +++ b/core/unit_test/incremental/Test05_ParallelReduce_RangePolicy.hpp @@ -0,0 +1,154 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include + +/// @Kokkos_Feature_Level_Required:5 +// Unit test for reduction of native data type. +// Assigns an index based value to elements of an array. +// Performs an reduction over the addition operation. + +namespace Test { + +using value_type = double; +const double value = 0.5; +const int num_elements = 10; + +struct ReduceFunctor { + value_type *_data; + + ReduceFunctor(value_type *data) : _data(data) {} + + KOKKOS_INLINE_FUNCTION + void operator()(const int i, double &UpdateSum) const { + _data[i] = (i + 1) * value; + UpdateSum += _data[i]; + } +}; + +template +struct TestReduction { + // Memory space type for Device and Host data + using d_memspace_type = typename ExecSpace::memory_space; + using h_memspace_type = Kokkos::HostSpace; + + value_type *deviceData, *hostData; + value_type sum = 0.0; + + // compare and equal + void check_correctness() { + int sum_local = 0; + for (int i = 0; i < num_elements; ++i) sum_local += (i + 1); + + ASSERT_EQ(sum, sum_local * value) + << "The reduced value does not match the expected answer"; + } + + // Routine to allocate memory in a specific memory space. + template + value_type *allocate_mem(int N) { + return (static_cast( + Kokkos::kokkos_malloc("deviceData", N * sizeof(value_type)))); + } + + // Routine to free the memory from a specific memory space. + template + void free_mem(value_type *data) { + Kokkos::kokkos_free(data); + } + + // Free the allocated memory + void free_mem() { + Kokkos::kokkos_free(deviceData); + Kokkos::kokkos_free(hostData); + } + + // Allocate Memory for both device and host memory spaces + void init() { + // Allocate memory on Device space. + deviceData = allocate_mem(num_elements); + ASSERT_NE(deviceData, nullptr); + + // Allocate memory on Host space. + hostData = allocate_mem(num_elements); + ASSERT_NE(hostData, nullptr); + + // Initialize the sum value to zero. + sum = 0.0; + } + + void check_correctness_and_cleanup() { + // Check if reduction has produced correct results + check_correctness(); + + // free the allocated memory + free_mem(deviceData); + free_mem(hostData); + } + + void sum_reduction() { + // Allocates memory for num_elements number of value_type elements in the + // host and device memory spaces. + init(); + + // Creates a range policy that uses dynamic schedule. + typedef Kokkos::RangePolicy > + range_policy; + + // parallel_reduce call with range policy over num_elements number of + // iterations + Kokkos::parallel_reduce("Reduction", range_policy(0, num_elements), + ReduceFunctor(deviceData), sum); + + check_correctness_and_cleanup(); + } +}; + +TEST(TEST_CATEGORY, IncrTest_05_reduction) { + TestReduction test; + test.sum_reduction(); +} + +} // namespace Test diff --git a/core/unit_test/incremental/Test06_ParallelFor_MDRangePolicy.hpp b/core/unit_test/incremental/Test06_ParallelFor_MDRangePolicy.hpp new file mode 100644 index 00000000000..d9e5a37b558 --- /dev/null +++ b/core/unit_test/incremental/Test06_ParallelFor_MDRangePolicy.hpp @@ -0,0 +1,263 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include + +/// @Kokkos_Feature_Level_Required:6 +// Unit Test for MDRangePolicy without Views uptil 4 ranks. +// For each of the MDRangePolicy test from 2-to-4 ranks, we create an equivalent +// dimensional array implemented in 1D. In each of these arrays we update the +// elements as a product of iterator indexes and a constant. At the end, we +// check for correctness. + +namespace Test04 { + +using value_type = double; +const int N = 10; +const int M = 10; +const value_type delta = 0.5; + +struct MDFunctor { + value_type *_data; + + MDFunctor(value_type *data) : _data(data) {} + + // 2D + KOKKOS_INLINE_FUNCTION + void operator()(const int i, const int j) const { + _data[i * M + j] = i * j * delta; + } + + // 3D + KOKKOS_INLINE_FUNCTION + void operator()(const int i, const int j, const int k) const { + _data[i * M * N + j * M + k] = i * j * k * delta; + } + + // 4D + KOKKOS_INLINE_FUNCTION + void operator()(const int i, const int j, const int k, const int l) const { + _data[i * M * N * M + j * M * N + k * M + l] = i * j * k * l * delta; + } +}; + +template +struct TestMDRangePolicy { + // Memory space type for Device and Host data + using d_memspace_type = typename ExecSpace::memory_space; + using h_memspace_type = Kokkos::HostSpace; + + // Index Type for the iterator + using int_index = Kokkos::IndexType; + + // An MDRangePolicy for 2 nested loops + using MDPolicyType_2D = typename Kokkos::Experimental::MDRangePolicy< + ExecSpace, Kokkos::Experimental::Rank<2>, int_index>; + + // An MDRangePolicy for 3 nested loops + using MDPolicyType_3D = typename Kokkos::Experimental::MDRangePolicy< + ExecSpace, Kokkos::Experimental::Rank<3>, int_index>; + + // An MDRangePolicy for 4 nested loops + using MDPolicyType_4D = typename Kokkos::Experimental::MDRangePolicy< + ExecSpace, Kokkos::Experimental::Rank<4>, int_index>; + + // Device and Host Data structure pointer + value_type *deviceData, *hostData; + + // Routine to allocate memory in a specific memory space. + template + value_type *allocate_mem(int N_) { + return (static_cast( + Kokkos::kokkos_malloc("Data", N_ * sizeof(value_type)))); + } + + // Routine to free the memory from a specific memory space. + template + void free_mem(value_type *data) { + Kokkos::kokkos_free(data); + } + + // compare and equal + void compare_equal_2D() { + for (int i = 0; i < N; ++i) + for (int j = 0; j < M; ++j) ASSERT_EQ(hostData[i * M + j], i * j * delta); + } + + // compare and equal + void compare_equal_3D() { + for (int i = 0; i < N; ++i) + for (int j = 0; j < M; ++j) + for (int k = 0; k < N; ++k) + ASSERT_EQ(hostData[i * M * N + j * M + k], i * j * k * delta); + } + + // compare and equal + void compare_equal_4D() { + for (int i = 0; i < N; ++i) + for (int j = 0; j < M; ++j) + for (int k = 0; k < N; ++k) + for (int l = 0; l < M; ++l) + ASSERT_EQ(hostData[i * M * N * M + j * M * N + k * M + l], + i * j * k * l * delta); + } + + // A 2-D MDRangePolicy + void mdRange2D() { + MDPolicyType_2D mdPolicy_2D({0, 0}, {N, M}); + + // Total number of elements + int num_elements = N * M; + + // Allocate Memory for both device and host memory spaces + // Data[M*N] + deviceData = allocate_mem(num_elements); + ASSERT_NE(deviceData, nullptr); + + hostData = allocate_mem(num_elements); + ASSERT_NE(hostData, nullptr); + + // parallel_for call + MDFunctor Functor_2D(deviceData); + Kokkos::parallel_for("MDRange2D", mdPolicy_2D, Functor_2D); + + // Copy the data back to Host memory space + Kokkos::Impl::DeepCopy( + hostData, deviceData, num_elements * sizeof(value_type)); + + // Check if all data has been update correctly + compare_equal_2D(); + + // free the allocated memory + free_mem(deviceData); + free_mem(hostData); + } + + // A 3-D MDRangePolicy + void mdRange3D() { + MDPolicyType_3D mdPolicy_3D({0, 0, 0}, {N, M, N}); + + // Total number of elements + int num_elements = N * M * N; + + // Allocate Memory for both device and host memory spaces + // Data[M*N*N] + deviceData = allocate_mem(num_elements); + ASSERT_NE(deviceData, nullptr); + + hostData = allocate_mem(num_elements); + ASSERT_NE(hostData, nullptr); + + // parallel_for call + MDFunctor Functor_3D(deviceData); + Kokkos::parallel_for("MDRange3D", mdPolicy_3D, Functor_3D); + + // Copy the data back to Host memory space + Kokkos::Impl::DeepCopy( + hostData, deviceData, num_elements * sizeof(value_type)); + + // Check if all data has been update correctly + compare_equal_3D(); + + // free the allocated memory + free_mem(deviceData); + free_mem(hostData); + } + + // A 4-D MDRangePolicy + void mdRange4D() { + MDPolicyType_4D mdPolicy_4D({0, 0, 0, 0}, {N, M, N, M}); + + // Total number of elements + int num_elements = N * M * N * M; + + // Allocate Memory for both device and host memory spaces + // Data[M*N*N*M] + deviceData = allocate_mem(num_elements); + ASSERT_NE(deviceData, nullptr); + + hostData = allocate_mem(num_elements); + ASSERT_NE(hostData, nullptr); + + // parallel_for call + MDFunctor Functor_4D(deviceData); + Kokkos::parallel_for("MDRange4D", mdPolicy_4D, Functor_4D); + + // Copy the data back to Host memory space + Kokkos::Impl::DeepCopy( + hostData, deviceData, num_elements * sizeof(value_type)); + + // Check if all data has been update correctly + compare_equal_4D(); + + // free the allocated memory + free_mem(deviceData); + free_mem(hostData); + } +}; + +} // namespace Test04 + +namespace Test { + +// 2D MDRangePolicy +TEST(TEST_CATEGORY, IncrTest_06_mdrange2D) { + Test04::TestMDRangePolicy test; + test.mdRange2D(); +} + +// 3D MDRangePolicy +TEST(TEST_CATEGORY, IncrTest_06_mdrange3D) { + Test04::TestMDRangePolicy test; + test.mdRange3D(); +} + +// 4D MDRangePolicy +TEST(TEST_CATEGORY, IncrTest_06_mdrange4D) { + Test04::TestMDRangePolicy test; + test.mdRange4D(); +} + +} // namespace Test diff --git a/core/unit_test/incremental/Test08_deep_copy.hpp b/core/unit_test/incremental/Test08_deep_copy.hpp new file mode 100644 index 00000000000..5166f5a9f0d --- /dev/null +++ b/core/unit_test/incremental/Test08_deep_copy.hpp @@ -0,0 +1,207 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include + +/// @Kokkos_Feature_Level_Required:8 +// Unit Test for MDRangePolicy without Views uptil 4 ranks. +// For each of the MDRangePolicy test from 2-to-4 ranks, we create an equivalent +// dimensional view. In each of these views we update the +// elements as a product of iterator indexes and a constant inside a +// parallel_for lambda. At the end, we check for correctness. + +namespace Test05 { + +using value_type = double; +const int N = 10; +const int M = 10; + +template +struct TestMDRangePolicy { + // 2D View + using View_2D = typename Kokkos::View; + using Host_View_2D = typename View_2D::HostMirror; + Host_View_2D hostDataView_2D; + + // 3D View + using View_3D = typename Kokkos::View; + using Host_View_3D = typename View_3D::HostMirror; + Host_View_3D hostDataView_3D; + + // 4D View + using View_4D = typename Kokkos::View; + using Host_View_4D = typename View_4D::HostMirror; + Host_View_4D hostDataView_4D; + + // Memory space type for Device and Host data + using d_memspace_type = typename ExecSpace::memory_space; + using h_memspace_type = Kokkos::HostSpace; + + // Index Type for the iterator + using int_index = Kokkos::IndexType; + + // An MDRangePolicy for 2 nested loops + using MDPolicyType_2D = typename Kokkos::Experimental::MDRangePolicy< + ExecSpace, Kokkos::Experimental::Rank<2>, int_index>; + + // An MDRangePolicy for 3 nested loops + using MDPolicyType_3D = typename Kokkos::Experimental::MDRangePolicy< + ExecSpace, Kokkos::Experimental::Rank<3>, int_index>; + + // An MDRangePolicy for 4 nested loops + using MDPolicyType_4D = typename Kokkos::Experimental::MDRangePolicy< + ExecSpace, Kokkos::Experimental::Rank<4>, int_index>; + + // compare and equal + void compare_equal_2D() { + for (int i = 0; i < N; ++i) + for (int j = 0; j < M; ++j) ASSERT_EQ(hostDataView_2D(i, j), i * M + j); + } + + // compare and equal + void compare_equal_3D() { + for (int i = 0; i < N; ++i) + for (int j = 0; j < M; ++j) + for (int k = 0; k < N; ++k) + ASSERT_EQ(hostDataView_3D(i, j, k), i * M * N + j * N + k); + } + + // compare and equal + void compare_equal_4D() { + for (int i = 0; i < N; ++i) + for (int j = 0; j < M; ++j) + for (int k = 0; k < N; ++k) + for (int l = 0; l < M; ++l) + ASSERT_EQ(hostDataView_4D(i, j, k, l), + i * M * N * M + j * N * M + k * M + l); + } + + // A 2-D MDRangePolicy + void mdRange2D() { + View_2D deviceDataView_2D("deviceData_2D", N, M); + hostDataView_2D = create_mirror_view(deviceDataView_2D); + + MDPolicyType_2D mdPolicy_2D({0, 0}, {N, M}); + + Kokkos::parallel_for( + mdPolicy_2D, KOKKOS_LAMBDA(const int i, const int j) { + deviceDataView_2D(i, j) = i * M + j; + }); + + // Copy data back to host view. + Kokkos::deep_copy(hostDataView_2D, deviceDataView_2D); + + // Check if all data has been update correctly + compare_equal_2D(); + } + + // A 3-D MDRangePolicy + void mdRange3D() { + View_3D deviceDataView_3D("deviceData_3D", N, M, N); + hostDataView_3D = create_mirror_view(deviceDataView_3D); + + MDPolicyType_3D mdPolicy_3D({0, 0, 0}, {N, M, N}); + + Kokkos::parallel_for( + mdPolicy_3D, KOKKOS_LAMBDA(const int i, const int j, const int k) { + deviceDataView_3D(i, j, k) = i * M * N + j * N + k; + }); + + // Copy data back to host view. + Kokkos::deep_copy(hostDataView_3D, deviceDataView_3D); + + // Check if all data has been update correctly + compare_equal_3D(); + } + + // A 4-D MDRangePolicy + void mdRange4D() { + View_4D deviceDataView_4D("deviceData_4D", N, M, N, M); + hostDataView_4D = create_mirror_view(deviceDataView_4D); + + MDPolicyType_4D mdPolicy_4D({0, 0, 0, 0}, {N, M, N, M}); + + Kokkos::parallel_for( + mdPolicy_4D, + KOKKOS_LAMBDA(const int i, const int j, const int k, const int l) { + deviceDataView_4D(i, j, k, l) = i * M * N * M + j * N * M + k * M + l; + }); + + Kokkos::deep_copy(hostDataView_4D, deviceDataView_4D); + + // Check if all data has been update correctly + compare_equal_4D(); + } +}; + +} // namespace Test05 + +namespace Test { + +// 2D MDRangePolicy +TEST(TEST_CATEGORY, IncrTest_08_deep_copy_2D) { + { + Test05::TestMDRangePolicy test; + test.mdRange2D(); + } +} + +// 3D MDRangePolicy +TEST(TEST_CATEGORY, IncrTest_08_deep_copy_3D) { + { + Test05::TestMDRangePolicy test; + test.mdRange3D(); + } +} + +// 4D MDRangePolicy +TEST(TEST_CATEGORY, IncrTest_08_deep_copy_4D) { + { + Test05::TestMDRangePolicy test; + test.mdRange4D(); + } +} + +} // namespace Test diff --git a/core/unit_test/incremental/Test10_HierarchicalBasics.hpp b/core/unit_test/incremental/Test10_HierarchicalBasics.hpp new file mode 100644 index 00000000000..a5e478c30fa --- /dev/null +++ b/core/unit_test/incremental/Test10_HierarchicalBasics.hpp @@ -0,0 +1,101 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +// @Kokkos_Feature_Level_Required:10 +// Unit test for hierarchical parallelism +// Create concurrent work hierarchically and verify if +// contributions of paticipating processing units corresponds to expected value + +#include +#include + +namespace Test { + +template +struct HierarchicalBasics { + using policy_t = Kokkos::TeamPolicy; + using team_t = typename policy_t::member_type; + + void run(const int nP, int nT) { + if (nT > ExecSpace::concurrency()) nT = ExecSpace::concurrency(); + + policy_t pol(nP, nT); + + ASSERT_EQ(pol.league_size(), nP); + ASSERT_LE(pol.team_size(), nT); + nT = pol.team_size(); + + Kokkos::View v("Array_A", nP, nT); + Kokkos::parallel_for( + "Teams", pol, KOKKOS_LAMBDA(const team_t &team) { + const int tR = team.team_rank(); + const int tS = team.team_size(); + const int lR = team.league_rank(); + const int lS = team.league_size(); + if (lR < lS) { + v(lR, tR) = lR * tS + tR; + } else { + v(lR, tR) = 100000; + } + }); + Kokkos::fence(); + auto h_v = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), v); + + size_t check = 0; + size_t ref = nP * nT; + for (int i = 0; i < nP; ++i) + for (int j = 0; j < nT; ++j) check += h_v(i, j); + + ASSERT_EQ(check, ref * (ref - 1) / 2); + } +}; + +TEST(TEST_CATEGORY, IncrTest_10_Hierarchical_Basics) { + HierarchicalBasics test; + test.run(1, 4); + test.run(8, 16); + test.run(11, 13); +} + +} // namespace Test diff --git a/core/unit_test/incremental/Test11a_ParallelFor_TeamThreadRange.hpp b/core/unit_test/incremental/Test11a_ParallelFor_TeamThreadRange.hpp new file mode 100644 index 00000000000..e36b8f9d3f9 --- /dev/null +++ b/core/unit_test/incremental/Test11a_ParallelFor_TeamThreadRange.hpp @@ -0,0 +1,99 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +// @Kokkos_Feature_Level_Required:11 +// Unit test for hierarchical parallelism +// Create concurrent work hierarchically and verify if +// contributions of paticipating processing units corresponds to expected value + +#include +#include + +namespace Test { + +template +struct Hierarchical_ForLoop_A { + void run(const int pN, const int sX, const int sY) { + typedef Kokkos::TeamPolicy team_policy; + typedef typename Kokkos::TeamPolicy::member_type member_type; + + typedef Kokkos::View viewDataType; + viewDataType v("Matrix", sX, sY); + + Kokkos::parallel_for( + "Team", team_policy(pN, Kokkos::AUTO), + KOKKOS_LAMBDA(const member_type &team) { + const int n = team.league_rank(); + const int ls = team.league_size(); + + const int startDim1 = n * (int)(sX / ls); + const int modDim1 = n == ls - 1 ? sX % ls : 0; + + Kokkos::parallel_for( + Kokkos::TeamThreadRange(team, v.extent(1)), [&](const int m) { + for (int i = startDim1; + i < (startDim1 + (int)(sX / ls) + modDim1); ++i) + v(i, m) = i * v.extent(1) + m; + }); + }); + + Kokkos::fence(); + auto v_H = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), v); + + int check = 0; + const int s = sY * sX; + for (int i = 0; i < sX; ++i) + for (int j = 0; j < sY; ++j) check += v_H(i, j); + ASSERT_EQ(check, s * (s - 1) / 2); + } +}; + +TEST(TEST_CATEGORY, IncrTest_11a_Hierarchical_ForLoop) { + Hierarchical_ForLoop_A test; + test.run(4, 5, 200); + test.run(4, 7, 19); + test.run(14, 277, 321); +} + +} // namespace Test diff --git a/core/unit_test/incremental/Test11b_ParallelFor_TeamVectorRange.hpp b/core/unit_test/incremental/Test11b_ParallelFor_TeamVectorRange.hpp new file mode 100644 index 00000000000..7e4bb2aa3b7 --- /dev/null +++ b/core/unit_test/incremental/Test11b_ParallelFor_TeamVectorRange.hpp @@ -0,0 +1,99 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +// @Kokkos_Feature_Level_Required:11 +// Unit test for hierarchical parallelism +// Create concurrent work hierarchically and verify if +// contributions of paticipating processing units corresponds to expected value + +#include +#include + +namespace Test { + +template +struct Hierarchical_ForLoop_B { + void run(const int pN, const int sX, const int sY) { + typedef Kokkos::TeamPolicy team_policy; + typedef typename Kokkos::TeamPolicy::member_type member_type; + + typedef Kokkos::View viewDataType; + viewDataType v("Matrix", sX, sY); + + Kokkos::parallel_for( + "Team", team_policy(pN, Kokkos::AUTO), + KOKKOS_LAMBDA(const member_type &team) { + const int n = team.league_rank(); + const int ls = team.league_size(); + + const int startDim1 = n * (int)(sX / ls); + const int modDim1 = n == ls - 1 ? sX % ls : 0; + + Kokkos::parallel_for( + Kokkos::TeamVectorRange(team, v.extent(1)), [&](const int m) { + for (int i = startDim1; + i < (startDim1 + (int)(sX / ls) + modDim1); ++i) + v(i, m) = i * v.extent(1) + m; + }); + }); + + Kokkos::fence(); + auto v_H = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), v); + + int check = 0; + const int s = sY * sX; + for (int i = 0; i < sX; ++i) + for (int j = 0; j < sY; ++j) check += v_H(i, j); + ASSERT_EQ(check, s * (s - 1) / 2); + } +}; + +TEST(TEST_CATEGORY, IncrTest_11b_Hierarchical_ForLoop) { + Hierarchical_ForLoop_B test; + test.run(1, 6, 400); + test.run(6, 7, 19); + test.run(12, 277, 321); +} + +} // namespace Test diff --git a/core/unit_test/incremental/Test11c_ParallelFor_ThreadVectorRange.hpp b/core/unit_test/incremental/Test11c_ParallelFor_ThreadVectorRange.hpp new file mode 100644 index 00000000000..c6998a5781c --- /dev/null +++ b/core/unit_test/incremental/Test11c_ParallelFor_ThreadVectorRange.hpp @@ -0,0 +1,105 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +// @Kokkos_Feature_Level_Required:11 +// Unit test for hierarchical parallelism +// Create concurrent work hierarchically and verify if +// contributions of paticipating processing units corresponds to expected value + +#include +#include + +namespace Test { + +template +struct Hierarchical_ForLoop_C { + void run(const int pN, const int sX, const int sY, const int sZ) { + typedef Kokkos::TeamPolicy team_policy; + typedef typename Kokkos::TeamPolicy::member_type member_type; + + typedef Kokkos::View viewDataType; + viewDataType v("Matrix", sX, sY, sZ); + + Kokkos::parallel_for( + "Team", team_policy(pN, Kokkos::AUTO), + KOKKOS_LAMBDA(const member_type &team) { + int n = team.league_rank(); + int ls = team.league_size(); + + int startDim1 = n * (int)(sX / ls); + int modDim1 = n == ls - 1 ? sX % ls : 0; + + Kokkos::parallel_for( + Kokkos::TeamThreadRange(team, v.extent(1)), [&](const int m) { + Kokkos::parallel_for( + Kokkos::ThreadVectorRange(team, v.extent(2)), + [&](const int k) { + for (int i = startDim1; + i < (startDim1 + (int)(sX / ls) + modDim1); ++i) + v(i, m, k) = + i * v.extent(1) * v.extent(2) + m * v.extent(2) + k; + }); + }); + }); + + Kokkos::fence(); + auto v_H = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), v); + + size_t check = 0; + const size_t s = sX * sY * sZ; + for (int i = 0; i < sX; ++i) + for (int j = 0; j < sY; ++j) + for (int k = 0; k < sZ; ++k) check += v_H(i, j, k); + ASSERT_EQ(check, s * (s - 1) / 2); + } +}; + +TEST(TEST_CATEGORY, IncrTest_11c_Hierarchical_ForLoop) { + Hierarchical_ForLoop_C test; + test.run(4, 16, 16, 16); + test.run(8, 12, 333, 16); + test.run(12, 277, 321, 345); +} + +} // namespace Test diff --git a/core/unit_test/incremental/Test12a_ThreadScratch.hpp b/core/unit_test/incremental/Test12a_ThreadScratch.hpp new file mode 100644 index 00000000000..8eb1c243c5d --- /dev/null +++ b/core/unit_test/incremental/Test12a_ThreadScratch.hpp @@ -0,0 +1,125 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +// @Kokkos_Feature_Level_Required:12 +// Unit test for hierarchical parallelism +// Create concurrent work hierarchically and verify if +// contributions of paticipating processing units corresponds to expected value +// Use a scratch pad memory for each team +#include +#include + +namespace Test { + +template +struct ThreadScratch { + using policy_t = Kokkos::TeamPolicy; + using team_t = typename Kokkos::TeamPolicy::member_type; + using data_t = Kokkos::View; + + using scratch_t = Kokkos::View >; + + int sX, sY; + data_t v; + KOKKOS_FUNCTION + void operator()(const team_t &team) const { + // Allocate and use scratch pad memory + scratch_t v_S(team.thread_scratch(1), sY); + int n = team.league_rank(); + + for (int i = 0; i < sY; ++i) v_S(i) = 0; + + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, sX), [&](const int m) { + Kokkos::parallel_for( + Kokkos::ThreadVectorRange(team, sY), + [&](const int k) { v_S(k) += sX * sY * n + sY * m + k; }); + }); + + team.team_barrier(); + + for (int i = 0; i < sY; ++i) { + v(n, team.team_rank()) += v_S(i); + } + } + + void run(const int pN, const int sX_, const int sY_) { + sX = sX_; + sY = sY_; + + int scratchSize = scratch_t::shmem_size(sY); + // So this works with deprecated code enabled: + policy_t policy = policy_t(pN, Kokkos::AUTO) + .set_scratch_size(1, Kokkos::PerThread(scratchSize)); + + int max_team_size = policy.team_size_max(*this, Kokkos::ParallelForTag()); + v = data_t("Matrix", pN, max_team_size); + + Kokkos::parallel_for( + "Test12a_ThreadScratch", + policy_t(pN, max_team_size) + .set_scratch_size(1, Kokkos::PerThread(scratchSize)), + *this); + + Kokkos::fence(); + auto v_H = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), v); + + size_t check = 0; + const size_t s = pN * sX * sY; + for (int n = 0; n < pN; ++n) + for (int m = 0; m < max_team_size; ++m) { + check += v_H(n, m); + } + ASSERT_EQ(s * (s - 1) / 2, check); + } +}; + +TEST(TEST_CATEGORY, IncrTest_12a_ThreadScratch) { + ThreadScratch test; + test.run(1, 55, 9); + test.run(2, 4, 22); + test.run(14, 277, 321); +} + +} // namespace Test diff --git a/core/unit_test/incremental/Test12b_TeamScratch.hpp b/core/unit_test/incremental/Test12b_TeamScratch.hpp new file mode 100644 index 00000000000..169491af3fb --- /dev/null +++ b/core/unit_test/incremental/Test12b_TeamScratch.hpp @@ -0,0 +1,113 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +// @Kokkos_Feature_Level_Required:12 +// Unit test for hierarchical parallelism +// Create concurrent work hierarchically and verify if +// contributions of paticipating processing units corresponds to expected value +// Use a scratch pad memory for each team +#include +#include + +namespace Test { + +template +struct TeamScratch { + void run(const int pN, const int sX, const int sY) { + using policy_t = Kokkos::TeamPolicy; + using team_t = typename Kokkos::TeamPolicy::member_type; + using data_t = Kokkos::View; + data_t v("Matrix", pN, sX); + + using scratch_t = Kokkos::View >; + int scratchSize = scratch_t::shmem_size(sX, sY); + + Kokkos::parallel_for( + "Team", + policy_t(pN, Kokkos::AUTO) + .set_scratch_size(1, Kokkos::PerTeam(scratchSize)), + KOKKOS_LAMBDA(const team_t &team) { + // Allocate and use scratch pad memory + scratch_t v_S(team.team_scratch(1), sX, sY); + int n = team.league_rank(); + + Kokkos::parallel_for( + Kokkos::TeamThreadRange(team, sX), [&](const int m) { + Kokkos::parallel_for( + Kokkos::ThreadVectorRange(team, sY), [&](const int k) { + v_S(m, k) = v_S.extent(0) * v_S.extent(1) * n + + v_S.extent(1) * m + k; + }); + }); + + team.team_barrier(); + + // Sum up contributions and reduce by one dimension + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, sX), + [&](const int m) { + for (int i = 0; i < sY; ++i) + v(n, m) += v_S(m, i); + }); + }); + + Kokkos::fence(); + auto v_H = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), v); + + size_t check = 0; + const size_t s = pN * sX * sY; + for (int n = 0; n < pN; ++n) + for (int m = 0; m < sX; ++m) check += v_H(n, m); + ASSERT_EQ(check, s * (s - 1) / 2); + } +}; + +TEST(TEST_CATEGORY, IncrTest_12b_TeamScratch) { + TeamScratch test; + test.run(1, 4, 4); + test.run(4, 7, 10); + test.run(14, 277, 321); +} + +} // namespace Test diff --git a/core/unit_test/incremental/Test13a_ParallelRed_TeamThreadRange.hpp b/core/unit_test/incremental/Test13a_ParallelRed_TeamThreadRange.hpp new file mode 100644 index 00000000000..b5467da9215 --- /dev/null +++ b/core/unit_test/incremental/Test13a_ParallelRed_TeamThreadRange.hpp @@ -0,0 +1,105 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +// @Kokkos_Feature_Level_Required:13 +// Unit test for hierarchical parallelism +// Create concurrent work hierarchically and verify if +// sum of created processing units corresponds to expected value + +#include +#include + +// Degrees of concurrency per nesting level + +using SCALAR_TYPE = int; + +namespace Test { + +template +struct Hierarchical_Red_A { + void run(const int pN, const int sX) { + typedef Kokkos::TeamPolicy team_policy; + typedef typename Kokkos::TeamPolicy::member_type member_type; + + typedef Kokkos::View viewDataType; + viewDataType v("Vector", pN); + + Kokkos::parallel_for( + "Team", team_policy(pN, Kokkos::AUTO), + KOKKOS_LAMBDA(const member_type &team) { + const int n = team.league_rank(); + SCALAR_TYPE out = 0; + + Kokkos::parallel_reduce( + Kokkos::TeamThreadRange(team, sX), + [=](const int i, SCALAR_TYPE &tmp) { + tmp += n * v.extent(0) + i; + }, + out); + + Kokkos::single(Kokkos::PerTeam(team), [&]() { v(n) += out; }); + }); + + Kokkos::fence(); + auto v_H = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), v); + + SCALAR_TYPE check = 0; + SCALAR_TYPE ref = 0; + for (int i = 0; i < pN; ++i) { + check += v_H(i); + ref += + (sX + i * pN) * (sX + i * pN - 1) / 2 - ((i * pN) * (i * pN - 1) / 2); + } + ASSERT_EQ(check, ref); + } +}; + +TEST(TEST_CATEGORY, IncrTest_13a_Hierarchical_Red) { + Hierarchical_Red_A test; + test.run(4, 16); + test.run(2, 39); + test.run(39, 3); +} + +} // namespace Test diff --git a/core/unit_test/qthreads/TestQthreads.hpp b/core/unit_test/incremental/Test13b_ParallelRed_TeamVectorRange.hpp similarity index 57% rename from core/unit_test/qthreads/TestQthreads.hpp rename to core/unit_test/incremental/Test13b_ParallelRed_TeamVectorRange.hpp index c45433d77d4..ada295591e6 100644 --- a/core/unit_test/qthreads/TestQthreads.hpp +++ b/core/unit_test/incremental/Test13b_ParallelRed_TeamVectorRange.hpp @@ -42,68 +42,61 @@ //@HEADER */ -#ifndef KOKKOS_TEST_QTHREADS_HPP -#define KOKKOS_TEST_QTHREADS_HPP +// @Kokkos_Feature_Level_Required:13 +// Unit test for hierarchical parallelism +// Create concurrent work hierarchically and verify if +// sum of created processing units corresponds to expected value #include +#include -#include +using SCALAR_TYPE = int; -#ifdef KOKKOS_LAMBDA -#undef KOKKOS_LAMBDA -#endif -#define KOKKOS_LAMBDA [=] +namespace Test { -#include +template +struct Hierarchical_Red_B { + void run(const int pN, const int sX) { + typedef Kokkos::TeamPolicy team_policy; + typedef typename Kokkos::TeamPolicy::member_type member_type; -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include + typedef Kokkos::View viewDataType; + viewDataType v("Vector", pN); -namespace Test { + Kokkos::parallel_for( + "Team", team_policy(pN, Kokkos::AUTO), + KOKKOS_LAMBDA(const member_type &team) { + const int n = team.league_rank(); + SCALAR_TYPE out = 0; -class qthreads : public ::testing::Test { - protected: - static void SetUpTestCase() { - const unsigned numa_count = Kokkos::hwloc::get_available_numa_count(); - const unsigned cores_per_numa = - Kokkos::hwloc::get_available_cores_per_numa(); - const unsigned threads_per_core = - Kokkos::hwloc::get_available_threads_per_core(); + Kokkos::parallel_reduce( + Kokkos::TeamVectorRange(team, sX), + [=](const int i, SCALAR_TYPE &tmp) { + tmp += n * v.extent(0) + i; + }, + out); - const unsigned threads_count = - std::max(1u, numa_count) * - std::max(2u, (cores_per_numa * threads_per_core) / 2); + Kokkos::single(Kokkos::PerTeam(team), [&]() { v(n) += out; }); + }); - Kokkos::Qthreads::initialize(threads_count); - Kokkos::print_configuration(std::cout, true); + Kokkos::fence(); + auto v_H = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), v); - srand(10231); + SCALAR_TYPE check = 0; + SCALAR_TYPE ref = 0; + for (int i = 0; i < pN; ++i) { + check += v_H(i); + ref += ((sX + i * pN) * (sX + i * pN - 1) - (i * pN * (i * pN - 1))) / 2; + } + ASSERT_EQ(check, ref); } - - static void TearDownTestCase() { Kokkos::Qthreads::finalize(); } }; -} // namespace Test +TEST(TEST_CATEGORY, IncrTest_13b_Hierarchical_Red) { + Hierarchical_Red_B test; + test.run(4, 16); + test.run(2, 39); + test.run(39, 3); +} -#endif +} // namespace Test diff --git a/core/unit_test/incremental/Test13c_ParallelRed_ThreadVectorRange.hpp b/core/unit_test/incremental/Test13c_ParallelRed_ThreadVectorRange.hpp new file mode 100644 index 00000000000..7df940c58de --- /dev/null +++ b/core/unit_test/incremental/Test13c_ParallelRed_ThreadVectorRange.hpp @@ -0,0 +1,112 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +// @Kokkos_Feature_Level_Required:13 +// Unit test for hierarchical parallelism +// Create concurrent work hierarchically and verify if +// sum of created processing units corresponds to expected value + +#include +#include + +using SCALAR_TYPE = int; + +namespace Test { + +template +struct Hierarchical_Red_C { + void run(const int pN, const int sX, const int sY) { + typedef Kokkos::TeamPolicy team_policy; + typedef typename Kokkos::TeamPolicy::member_type member_type; + + typedef Kokkos::View viewDataType; + viewDataType v("Vector", pN); + + Kokkos::parallel_for( + "Team", team_policy(pN, Kokkos::AUTO), + KOKKOS_LAMBDA(const member_type &team) { + int n = team.league_rank(); + SCALAR_TYPE out = 0; + + Kokkos::parallel_reduce( + Kokkos::TeamThreadRange(team, sX), + [=](const int i, SCALAR_TYPE &tmp) { + SCALAR_TYPE out_inner = 0; + Kokkos::parallel_reduce( + Kokkos::ThreadVectorRange(team, sY), + [=](const int k, int &tmp_inner) { + tmp_inner += n * sX * v.extent(0) + sX * i + k; + }, + out_inner); + + Kokkos::single(Kokkos::PerThread(team), + [&]() { tmp += out_inner; }); + }, + out); + + Kokkos::single(Kokkos::PerTeam(team), [&]() { v(n) += out; }); + }); + + Kokkos::fence(); + auto v_H = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), v); + + SCALAR_TYPE check = 0; + SCALAR_TYPE ref = 0; + for (int i = 0; i < pN; ++i) { + check += v_H(i); + for (int j = 0; j < sX; ++j) + for (int k = 0; k < sY; ++k) ref += i * sX * pN + sX * j + k; + } + ASSERT_EQ(check, ref); + } +}; + +TEST(TEST_CATEGORY, IncrTest_13c_Hierarchical_Red) { + Hierarchical_Red_C test; + test.run(1, 4, 8); + test.run(2, 39, 12); + test.run(39, 3, 235); +} + +} // namespace Test diff --git a/containers/unit_tests/openmp/TestOpenMP_BitSet.cpp b/core/unit_test/openmp/TestOpenMP_Concepts.cpp similarity index 98% rename from containers/unit_tests/openmp/TestOpenMP_BitSet.cpp rename to core/unit_test/openmp/TestOpenMP_Concepts.cpp index db110171594..5480af5aaf9 100644 --- a/containers/unit_tests/openmp/TestOpenMP_BitSet.cpp +++ b/core/unit_test/openmp/TestOpenMP_Concepts.cpp @@ -1,4 +1,3 @@ - /* //@HEADER // ************************************************************************ @@ -44,4 +43,4 @@ */ #include -#include +#include diff --git a/core/unit_test/openmp/TestOpenMP_Other.cpp b/core/unit_test/openmp/TestOpenMP_Other.cpp index 6e7463a33fd..7043432517b 100644 --- a/core/unit_test/openmp/TestOpenMP_Other.cpp +++ b/core/unit_test/openmp/TestOpenMP_Other.cpp @@ -63,7 +63,7 @@ TEST(openmp, partition_master) { Mutex mtx; int errors = 0; - auto master = [&errors, &mtx](int partition_id, int num_partitions) { + auto master = [&errors, &mtx](int /*partition_id*/, int /*num_partitions*/) { #ifdef KOKKOS_ENABLE_DEPRECATED_CODE const int pool_size = Kokkos::OpenMP::thread_pool_size(); diff --git a/containers/unit_tests/openmp/TestOpenMP_DualView.cpp b/core/unit_test/openmp/TestOpenMP_RangePolicyRequire.cpp similarity index 98% rename from containers/unit_tests/openmp/TestOpenMP_DualView.cpp rename to core/unit_test/openmp/TestOpenMP_RangePolicyRequire.cpp index ed4eb23a8bd..284a65a21ee 100644 --- a/containers/unit_tests/openmp/TestOpenMP_DualView.cpp +++ b/core/unit_test/openmp/TestOpenMP_RangePolicyRequire.cpp @@ -44,4 +44,4 @@ */ #include -#include +#include diff --git a/core/unit_test/openmp/TestOpenMP_Team.cpp b/core/unit_test/openmp/TestOpenMP_Team.cpp index c389ebac373..2409307387d 100644 --- a/core/unit_test/openmp/TestOpenMP_Team.cpp +++ b/core/unit_test/openmp/TestOpenMP_Team.cpp @@ -80,25 +80,25 @@ TEST(TEST_CATEGORY, team_reduce) { } TEST(TEST_CATEGORY, team_broadcast) { - TestTeamBroadcast >::test_teambroadcast(0); - TestTeamBroadcast >::test_teambroadcast(0); + TestTeamBroadcast, + long>::test_teambroadcast(0, 1); + TestTeamBroadcast, + long>::test_teambroadcast(0, 1); - TestTeamBroadcast >::test_teambroadcast(2); - TestTeamBroadcast >::test_teambroadcast(2); + TestTeamBroadcast, + long>::test_teambroadcast(2, 1); + TestTeamBroadcast, + long>::test_teambroadcast(2, 1); - TestTeamBroadcast >::test_teambroadcast(16); - TestTeamBroadcast >::test_teambroadcast(16); + TestTeamBroadcast, + long>::test_teambroadcast(16, 1); + TestTeamBroadcast, + long>::test_teambroadcast(16, 1); - TestTeamBroadcast >:: - test_teambroadcast(1000); - TestTeamBroadcast >:: - test_teambroadcast(1000); + TestTeamBroadcast, + long>::test_teambroadcast(1000, 1); + TestTeamBroadcast, + long>::test_teambroadcast(1000, 1); } } // namespace Test diff --git a/core/unit_test/openmp/TestOpenMP_TeamScratch.cpp b/core/unit_test/openmp/TestOpenMP_TeamScratch.cpp index 5c283a62264..b42bdf754f6 100644 --- a/core/unit_test/openmp/TestOpenMP_TeamScratch.cpp +++ b/core/unit_test/openmp/TestOpenMP_TeamScratch.cpp @@ -58,7 +58,6 @@ TEST(TEST_CATEGORY, team_scratch_request) { } #if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) -#if !defined(KOKKOS_ENABLE_CUDA) || (8000 <= CUDA_VERSION) TEST(TEST_CATEGORY, team_lambda_shared_request) { TestLambdaSharedTeam >(); @@ -67,7 +66,6 @@ TEST(TEST_CATEGORY, team_lambda_shared_request) { } TEST(TEST_CATEGORY, scratch_align) { TestScratchAlignment(); } #endif -#endif TEST(TEST_CATEGORY, shmem_size) { TestShmemSize(); } diff --git a/core/unit_test/openmptarget/TestOpenMPTarget_Concepts.cpp b/core/unit_test/openmptarget/TestOpenMPTarget_Concepts.cpp new file mode 100644 index 00000000000..8896339f996 --- /dev/null +++ b/core/unit_test/openmptarget/TestOpenMPTarget_Concepts.cpp @@ -0,0 +1,46 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 3.0 +// Copyright (2020) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include diff --git a/core/unit_test/openmptarget/TestOpenMPTarget_Crs.cpp b/core/unit_test/openmptarget/TestOpenMPTarget_Crs.cpp new file mode 100644 index 00000000000..e69de29bb2d diff --git a/core/unit_test/openmptarget/TestOpenMPTarget_FunctorAnalysis.cpp b/core/unit_test/openmptarget/TestOpenMPTarget_FunctorAnalysis.cpp new file mode 100644 index 00000000000..e69de29bb2d diff --git a/core/unit_test/openmptarget/TestOpenMPTarget_LocalDeepCopy.cpp b/core/unit_test/openmptarget/TestOpenMPTarget_LocalDeepCopy.cpp new file mode 100644 index 00000000000..e69de29bb2d diff --git a/core/unit_test/openmptarget/TestOpenMPTarget_RangePolicyRequire.cpp b/core/unit_test/openmptarget/TestOpenMPTarget_RangePolicyRequire.cpp new file mode 100644 index 00000000000..e69de29bb2d diff --git a/core/unit_test/openmptarget/TestOpenMPTarget_Reductions_DeviceView.cpp b/core/unit_test/openmptarget/TestOpenMPTarget_Reductions_DeviceView.cpp new file mode 100644 index 00000000000..e69de29bb2d diff --git a/core/unit_test/openmptarget/TestOpenMPTarget_SharedAlloc.cpp b/core/unit_test/openmptarget/TestOpenMPTarget_SharedAlloc.cpp index aa0d9cfe27d..6d5645547bc 100644 --- a/core/unit_test/openmptarget/TestOpenMPTarget_SharedAlloc.cpp +++ b/core/unit_test/openmptarget/TestOpenMPTarget_SharedAlloc.cpp @@ -47,7 +47,7 @@ namespace Test { -TEST_F(TEST_CATEGORY, impl_shared_alloc) { +TEST(TEST_CATEGORY, impl_shared_alloc) { test_shared_alloc(); } diff --git a/core/unit_test/openmptarget/TestOpenMPTarget_SubView_a.cpp b/core/unit_test/openmptarget/TestOpenMPTarget_SubView_a.cpp index 6f6255af87a..4be6218e23d 100644 --- a/core/unit_test/openmptarget/TestOpenMPTarget_SubView_a.cpp +++ b/core/unit_test/openmptarget/TestOpenMPTarget_SubView_a.cpp @@ -47,51 +47,51 @@ namespace Test { -TEST_F(TEST_CATEGORY, view_subview_auto_1d_left) { +TEST(TEST_CATEGORY, view_subview_auto_1d_left) { TestViewSubview::test_auto_1d(); } -TEST_F(TEST_CATEGORY, view_subview_auto_1d_right) { +TEST(TEST_CATEGORY, view_subview_auto_1d_right) { TestViewSubview::test_auto_1d(); } -TEST_F(TEST_CATEGORY, view_subview_auto_1d_stride) { +TEST(TEST_CATEGORY, view_subview_auto_1d_stride) { TestViewSubview::test_auto_1d(); } -TEST_F(TEST_CATEGORY, view_subview_assign_strided) { +TEST(TEST_CATEGORY, view_subview_assign_strided) { TestViewSubview::test_1d_strided_assignment(); } -TEST_F(TEST_CATEGORY, view_subview_left_0) { +TEST(TEST_CATEGORY, view_subview_left_0) { TestViewSubview::test_left_0(); } -TEST_F(TEST_CATEGORY, view_subview_left_1) { +TEST(TEST_CATEGORY, view_subview_left_1) { TestViewSubview::test_left_1(); } -TEST_F(TEST_CATEGORY, view_subview_left_2) { +TEST(TEST_CATEGORY, view_subview_left_2) { TestViewSubview::test_left_2(); } -TEST_F(TEST_CATEGORY, view_subview_left_3) { +TEST(TEST_CATEGORY, view_subview_left_3) { TestViewSubview::test_left_3(); } -TEST_F(TEST_CATEGORY, view_subview_right_0) { +TEST(TEST_CATEGORY, view_subview_right_0) { TestViewSubview::test_right_0(); } -TEST_F(TEST_CATEGORY, view_subview_right_1) { +TEST(TEST_CATEGORY, view_subview_right_1) { TestViewSubview::test_right_1(); } -TEST_F(TEST_CATEGORY, view_subview_right_3) { +TEST(TEST_CATEGORY, view_subview_right_3) { TestViewSubview::test_right_3(); } -TEST_F(TEST_CATEGORY, view_static_tests) { +TEST(TEST_CATEGORY, view_static_tests) { TestViewSubview::TestSubviewStaticSizes()(); TestViewSubview::TestSubviewStaticSizes(); TestViewSubview::test_layoutleft_to_layoutleft< TEST_EXECSPACE, Kokkos::MemoryTraits >(); @@ -55,7 +55,7 @@ TEST_F(TEST_CATEGORY, view_subview_layoutleft_to_layoutleft) { TEST_EXECSPACE, Kokkos::MemoryTraits >(); } -TEST_F(TEST_CATEGORY, view_subview_layoutright_to_layoutright) { +TEST(TEST_CATEGORY, view_subview_layoutright_to_layoutright) { TestViewSubview::test_layoutright_to_layoutright(); TestViewSubview::test_layoutright_to_layoutright< TEST_EXECSPACE, Kokkos::MemoryTraits >(); diff --git a/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c01.cpp b/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c01.cpp index 953c383bf98..3361efcc16c 100644 --- a/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c01.cpp +++ b/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c01.cpp @@ -47,7 +47,7 @@ namespace Test { -TEST_F(TEST_CATEGORY, view_subview_1d_assign) { +TEST(TEST_CATEGORY, view_subview_1d_assign) { TestViewSubview::test_1d_assign(); } diff --git a/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c02.cpp b/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c02.cpp index 3a6245e2581..c4887cf3dbc 100644 --- a/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c02.cpp +++ b/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c02.cpp @@ -47,7 +47,7 @@ namespace Test { -TEST_F(TEST_CATEGORY, view_subview_1d_assign_atomic) { +TEST(TEST_CATEGORY, view_subview_1d_assign_atomic) { TestViewSubview::test_1d_assign >(); } diff --git a/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c03.cpp b/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c03.cpp index e9e912dd0e7..b172b0c6198 100644 --- a/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c03.cpp +++ b/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c03.cpp @@ -47,7 +47,7 @@ namespace Test { -TEST_F(TEST_CATEGORY, view_subview_1d_assign_randomaccess) { +TEST(TEST_CATEGORY, view_subview_1d_assign_randomaccess) { TestViewSubview::test_1d_assign< TEST_EXECSPACE, Kokkos::MemoryTraits >(); } diff --git a/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c04.cpp b/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c04.cpp index 0c5b9c8c019..faf0ef7a412 100644 --- a/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c04.cpp +++ b/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c04.cpp @@ -47,7 +47,7 @@ namespace Test { -TEST_F(TEST_CATEGORY, view_subview_2d_from_3d) { +TEST(TEST_CATEGORY, view_subview_2d_from_3d) { TestViewSubview::test_2d_subview_3d(); } diff --git a/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c05.cpp b/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c05.cpp index 0e11c637e61..a3fb95fbefb 100644 --- a/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c05.cpp +++ b/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c05.cpp @@ -47,7 +47,7 @@ namespace Test { -TEST_F(openmptarget, view_subview_2d_from_3d_atomic) { +TEST(openmptarget, view_subview_2d_from_3d_atomic) { TestViewSubview::test_2d_subview_3d >(); } diff --git a/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c06.cpp b/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c06.cpp index e9a30a1835a..056b392cd5d 100644 --- a/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c06.cpp +++ b/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c06.cpp @@ -47,7 +47,7 @@ namespace Test { -TEST_F(TEST_CATEGORY, view_subview_2d_from_3d_randomaccess) { +TEST(TEST_CATEGORY, view_subview_2d_from_3d_randomaccess) { TestViewSubview::test_2d_subview_3d< TEST_EXECSPACE, Kokkos::MemoryTraits >(); } diff --git a/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c07.cpp b/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c07.cpp index 7d230d7e854..033bce86335 100644 --- a/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c07.cpp +++ b/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c07.cpp @@ -47,7 +47,7 @@ namespace Test { -TEST_F(TEST_CATEGORY, view_subview_3d_from_5d_left) { +TEST(TEST_CATEGORY, view_subview_3d_from_5d_left) { TestViewSubview::test_3d_subview_5d_left(); } diff --git a/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c08.cpp b/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c08.cpp index e0b1f17a986..58ef4236089 100644 --- a/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c08.cpp +++ b/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c08.cpp @@ -47,7 +47,7 @@ namespace Test { -TEST_F(TEST_CATEGORY, view_subview_3d_from_5d_left_atomic) { +TEST(TEST_CATEGORY, view_subview_3d_from_5d_left_atomic) { TestViewSubview::test_3d_subview_5d_left< TEST_EXECSPACE, Kokkos::MemoryTraits >(); } diff --git a/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c09.cpp b/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c09.cpp index 0b5d94b5274..6e6c2af8e94 100644 --- a/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c09.cpp +++ b/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c09.cpp @@ -47,7 +47,7 @@ namespace Test { -TEST_F(TEST_CATEGORY, view_subview_3d_from_5d_left_randomaccess) { +TEST(TEST_CATEGORY, view_subview_3d_from_5d_left_randomaccess) { TestViewSubview::test_3d_subview_5d_left< TEST_EXECSPACE, Kokkos::MemoryTraits >(); } diff --git a/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c10.cpp b/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c10.cpp index da2605da03a..2ad95d38cf7 100644 --- a/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c10.cpp +++ b/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c10.cpp @@ -47,7 +47,7 @@ namespace Test { -TEST_F(TEST_CATEGORY, view_subview_3d_from_5d_right) { +TEST(TEST_CATEGORY, view_subview_3d_from_5d_right) { TestViewSubview::test_3d_subview_5d_right(); } diff --git a/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c11.cpp b/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c11.cpp index 3aa927190c7..da8684d21ce 100644 --- a/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c11.cpp +++ b/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c11.cpp @@ -47,7 +47,7 @@ namespace Test { -TEST_F(TEST_CATEGORY, view_subview_3d_from_5d_right_atomic) { +TEST(TEST_CATEGORY, view_subview_3d_from_5d_right_atomic) { TestViewSubview::test_3d_subview_5d_right< TEST_EXECSPACE, Kokkos::MemoryTraits >(); } diff --git a/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c12.cpp b/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c12.cpp index dde0db7dd84..dde7d517620 100644 --- a/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c12.cpp +++ b/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c12.cpp @@ -47,7 +47,7 @@ namespace Test { -TEST_F(TEST_CATEGORY, view_subview_3d_from_5d_right_randomaccess) { +TEST(TEST_CATEGORY, view_subview_3d_from_5d_right_randomaccess) { TestViewSubview::test_3d_subview_5d_right< TEST_EXECSPACE, Kokkos::MemoryTraits >(); } diff --git a/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c13.cpp b/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c13.cpp new file mode 100644 index 00000000000..e69de29bb2d diff --git a/core/unit_test/openmptarget/TestOpenMPTarget_Team.cpp b/core/unit_test/openmptarget/TestOpenMPTarget_Team.cpp index f2d063c25f9..74b74d784ca 100644 --- a/core/unit_test/openmptarget/TestOpenMPTarget_Team.cpp +++ b/core/unit_test/openmptarget/TestOpenMPTarget_Team.cpp @@ -47,7 +47,7 @@ namespace Test { -TEST_F(TEST_CATEGORY, team_for) { +TEST(TEST_CATEGORY, team_for) { TestTeamPolicy >::test_for( 0); TestTeamPolicy >::test_for( @@ -64,7 +64,7 @@ TEST_F(TEST_CATEGORY, team_for) { 1000); } -TEST_F(TEST_CATEGORY, team_reduce) { +TEST(TEST_CATEGORY, team_reduce) { TestTeamPolicy >::test_reduce(0); TestTeamPolicy >(0); TestScanTeam >(0); TestScanTeam >(10); @@ -56,7 +56,7 @@ TEST_F(TEST_CATEGORY, team_scan) { TestScanTeam >(10000); } -TEST_F(TEST_CATEGORY, team_long_reduce) { +TEST(TEST_CATEGORY, team_long_reduce) { TestReduceTeam >(0); TestReduceTeam >(0); TestReduceTeam >(3); @@ -67,7 +67,7 @@ TEST_F(TEST_CATEGORY, team_long_reduce) { 100000); } -TEST_F(TEST_CATEGORY, team_double_reduce) { +TEST(TEST_CATEGORY, team_double_reduce) { TestReduceTeam >(0); TestReduceTeam >(0); TestReduceTeam >(3); diff --git a/core/unit_test/openmptarget/TestOpenMPTarget_TeamScratch.cpp b/core/unit_test/openmptarget/TestOpenMPTarget_TeamScratch.cpp index b0a75432d76..729e3591878 100644 --- a/core/unit_test/openmptarget/TestOpenMPTarget_TeamScratch.cpp +++ b/core/unit_test/openmptarget/TestOpenMPTarget_TeamScratch.cpp @@ -47,18 +47,17 @@ namespace Test { -TEST_F(TEST_CATEGORY, team_shared_request) { +TEST(TEST_CATEGORY, team_shared_request) { TestSharedTeam >(); TestSharedTeam >(); } -TEST_F(TEST_CATEGORY, team_scratch_request) { +TEST(TEST_CATEGORY, team_scratch_request) { TestScratchTeam >(); TestScratchTeam >(); } #if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) -#if !defined(KOKKOS_ENABLE_CUDA) || (8000 <= CUDA_VERSION) TEST_F(TEST_CATEGORY, team_lambda_shared_request) { TestLambdaSharedTeam >(); @@ -66,11 +65,10 @@ TEST_F(TEST_CATEGORY, team_lambda_shared_request) { Kokkos::Schedule >(); } #endif -#endif -TEST_F(TEST_CATEGORY, shmem_size) { TestShmemSize(); } +TEST(TEST_CATEGORY, shmem_size) { TestShmemSize(); } -TEST_F(TEST_CATEGORY, multi_level_scratch) { +TEST(TEST_CATEGORY, multi_level_scratch) { TestMultiLevelScratchTeam >(); TestMultiLevelScratchTeam - -namespace Test { - -TEST_F(qthreads, atomics) { -#if 0 - const int loop_count = 1e4; - - ASSERT_TRUE( ( TestAtomic::Loop< int, Kokkos::Qthreads >( loop_count, 1 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< int, Kokkos::Qthreads >( loop_count, 2 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< int, Kokkos::Qthreads >( loop_count, 3 ) ) ); - - ASSERT_TRUE( ( TestAtomic::Loop< unsigned int, Kokkos::Qthreads >( loop_count, 1 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< unsigned int, Kokkos::Qthreads >( loop_count, 2 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< unsigned int, Kokkos::Qthreads >( loop_count, 3 ) ) ); - - ASSERT_TRUE( ( TestAtomic::Loop< long int, Kokkos::Qthreads >( loop_count, 1 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< long int, Kokkos::Qthreads >( loop_count, 2 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< long int, Kokkos::Qthreads >( loop_count, 3 ) ) ); - - ASSERT_TRUE( ( TestAtomic::Loop< unsigned long int, Kokkos::Qthreads >( loop_count, 1 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< unsigned long int, Kokkos::Qthreads >( loop_count, 2 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< unsigned long int, Kokkos::Qthreads >( loop_count, 3 ) ) ); - - ASSERT_TRUE( ( TestAtomic::Loop< long long int, Kokkos::Qthreads >( loop_count, 1 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< long long int, Kokkos::Qthreads >( loop_count, 2 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< long long int, Kokkos::Qthreads >( loop_count, 3 ) ) ); - - ASSERT_TRUE( ( TestAtomic::Loop< double, Kokkos::Qthreads >( loop_count, 1 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< double, Kokkos::Qthreads >( loop_count, 2 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< double, Kokkos::Qthreads >( loop_count, 3 ) ) ); - - ASSERT_TRUE( ( TestAtomic::Loop< float, Kokkos::Qthreads >( 100, 1 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< float, Kokkos::Qthreads >( 100, 2 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< float, Kokkos::Qthreads >( 100, 3 ) ) ); - - ASSERT_TRUE( ( TestAtomic::Loop< Kokkos::complex, Kokkos::Qthreads >( 100, 1 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< Kokkos::complex, Kokkos::Qthreads >( 100, 2 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< Kokkos::complex, Kokkos::Qthreads >( 100, 3 ) ) ); - - ASSERT_TRUE( ( TestAtomic::Loop< TestAtomic::SuperScalar<4>, Kokkos::Qthreads >( 100, 1 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< TestAtomic::SuperScalar<4>, Kokkos::Qthreads >( 100, 2 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< TestAtomic::SuperScalar<4>, Kokkos::Qthreads >( 100, 3 ) ) ); -#endif -} - -TEST_F(qthreads, atomic_operations) { -#if 0 - const int start = 1; // Avoid zero for division. - const int end = 11; - - for ( int i = start; i < end; ++i ) - { - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Qthreads >( start, end - i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Qthreads >( start, end - i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Qthreads >( start, end - i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Qthreads >( start, end - i, 4 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Qthreads >( start, end - i, 5 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Qthreads >( start, end - i, 6 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Qthreads >( start, end - i, 7 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Qthreads >( start, end - i, 8 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Qthreads >( start, end - i, 9 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Qthreads >( start, end - i, 11 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Qthreads >( start, end - i, 12 ) ) ); - - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Qthreads >( start, end - i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Qthreads >( start, end - i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Qthreads >( start, end - i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Qthreads >( start, end - i, 4 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Qthreads >( start, end - i, 5 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Qthreads >( start, end - i, 6 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Qthreads >( start, end - i, 7 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Qthreads >( start, end - i, 8 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Qthreads >( start, end - i, 9 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Qthreads >( start, end - i, 11 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Qthreads >( start, end - i, 12 ) ) ); - - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Qthreads >( start, end - i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Qthreads >( start, end - i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Qthreads >( start, end - i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Qthreads >( start, end - i, 4 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Qthreads >( start, end - i, 5 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Qthreads >( start, end - i, 6 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Qthreads >( start, end - i, 7 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Qthreads >( start, end - i, 8 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Qthreads >( start, end - i, 9 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Qthreads >( start, end - i, 11 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Qthreads >( start, end - i, 12 ) ) ); - - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Qthreads >( start, end - i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Qthreads >( start, end - i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Qthreads >( start, end - i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Qthreads >( start, end - i, 4 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Qthreads >( start, end - i, 5 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Qthreads >( start, end - i, 6 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Qthreads >( start, end - i, 7 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Qthreads >( start, end - i, 8 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Qthreads >( start, end - i, 9 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Qthreads >( start, end - i, 11 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Qthreads >( start, end - i, 12 ) ) ); - - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Qthreads >( start, end - i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Qthreads >( start, end - i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Qthreads >( start, end - i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Qthreads >( start, end - i, 4 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Qthreads >( start, end - i, 5 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Qthreads >( start, end - i, 6 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Qthreads >( start, end - i, 7 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Qthreads >( start, end - i, 8 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Qthreads >( start, end - i, 9 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Qthreads >( start, end - i, 11 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Qthreads >( start, end - i, 12 ) ) ); - - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< double, Kokkos::Qthreads >( start, end - i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< double, Kokkos::Qthreads >( start, end - i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< double, Kokkos::Qthreads >( start, end - i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< double, Kokkos::Qthreads >( start, end - i, 4 ) ) ); - - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< float, Kokkos::Qthreads >( start, end - i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< float, Kokkos::Qthreads >( start, end - i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< float, Kokkos::Qthreads >( start, end - i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< float, Kokkos::Qthreads >( start, end - i, 4 ) ) ); - } -#endif -} - -TEST_F(qthreads, atomic_views_integral) { -#if 0 - const long length = 1000000; - - { - // Integral Types. - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Qthreads >( length, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Qthreads >( length, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Qthreads >( length, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Qthreads >( length, 4 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Qthreads >( length, 5 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Qthreads >( length, 6 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Qthreads >( length, 7 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Qthreads >( length, 8 ) ) ); - } -#endif -} - -TEST_F(qthreads, atomic_views_nonintegral) { -#if 0 - const long length = 1000000; - - { - // Non-Integral Types. - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType< double, Kokkos::Qthreads >( length, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType< double, Kokkos::Qthreads >( length, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType< double, Kokkos::Qthreads >( length, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType< double, Kokkos::Qthreads >( length, 4 ) ) ); - } -#endif -} - -TEST_F(qthreads, atomic_view_api) { -#if 0 - TestAtomicViews::TestAtomicViewAPI< int, Kokkos::Qthreads >(); -#endif -} - -} // namespace Test diff --git a/core/unit_test/qthreads/TestQthreads_Complex.cpp b/core/unit_test/qthreads/TestQthreads_Complex.cpp deleted file mode 100644 index 09e1ca3e24c..00000000000 --- a/core/unit_test/qthreads/TestQthreads_Complex.cpp +++ /dev/null @@ -1,2 +0,0 @@ -#include -#include diff --git a/core/unit_test/qthreads/TestQthreads_Other.cpp b/core/unit_test/qthreads/TestQthreads_Other.cpp deleted file mode 100644 index 7d546166131..00000000000 --- a/core/unit_test/qthreads/TestQthreads_Other.cpp +++ /dev/null @@ -1,196 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include - -namespace Test { - -TEST_F(qthreads, init) { ; } - -TEST_F(qthreads, md_range) { -#if 0 - TestMDRange_2D< Kokkos::Qthreads >::test_for2( 100, 100 ); - TestMDRange_3D< Kokkos::Qthreads >::test_for3( 100, 100, 100 ); -#endif -} - -TEST_F(qthreads, policy_construction) { -#if 0 - TestRangePolicyConstruction< Kokkos::Qthreads >(); - TestTeamPolicyConstruction< Kokkos::Qthreads >(); -#endif -} - -TEST_F(qthreads, range_tag) { -#if 0 - TestRange< Kokkos::Qthreads, Kokkos::Schedule >::test_for( 0 ); - TestRange< Kokkos::Qthreads, Kokkos::Schedule >::test_reduce( 0 ); - TestRange< Kokkos::Qthreads, Kokkos::Schedule >::test_scan( 0 ); - TestRange< Kokkos::Qthreads, Kokkos::Schedule >::test_for( 0 ); - TestRange< Kokkos::Qthreads, Kokkos::Schedule >::test_reduce( 0 ); - TestRange< Kokkos::Qthreads, Kokkos::Schedule >::test_scan( 0 ); - TestRange< Kokkos::Qthreads, Kokkos::Schedule >::test_dynamic_policy( 0 ); - - TestRange< Kokkos::Qthreads, Kokkos::Schedule >::test_for( 2 ); - TestRange< Kokkos::Qthreads, Kokkos::Schedule >::test_reduce( 2 ); - TestRange< Kokkos::Qthreads, Kokkos::Schedule >::test_scan( 2 ); - - TestRange< Kokkos::Qthreads, Kokkos::Schedule >::test_for( 3 ); - TestRange< Kokkos::Qthreads, Kokkos::Schedule >::test_reduce( 3 ); - TestRange< Kokkos::Qthreads, Kokkos::Schedule >::test_scan( 3 ); - TestRange< Kokkos::Qthreads, Kokkos::Schedule >::test_dynamic_policy( 3 ); - - TestRange< Kokkos::Qthreads, Kokkos::Schedule >::test_for( 1000 ); - TestRange< Kokkos::Qthreads, Kokkos::Schedule >::test_reduce( 1000 ); - TestRange< Kokkos::Qthreads, Kokkos::Schedule >::test_scan( 1000 ); - - TestRange< Kokkos::Qthreads, Kokkos::Schedule >::test_for( 1001 ); - TestRange< Kokkos::Qthreads, Kokkos::Schedule >::test_reduce( 1001 ); - TestRange< Kokkos::Qthreads, Kokkos::Schedule >::test_scan( 1001 ); - TestRange< Kokkos::Qthreads, Kokkos::Schedule >::test_dynamic_policy( 1000 ); -#endif -} - -//---------------------------------------------------------------------------- - -TEST_F(qthreads, compiler_macros) { -#if 0 - ASSERT_TRUE( ( TestCompilerMacros::Test< Kokkos::Qthreads >() ) ); -#endif -} - -//---------------------------------------------------------------------------- - -TEST_F(qthreads, memory_pool) { -#if 0 - -#endif -} - -//---------------------------------------------------------------------------- - -#if defined(KOKKOS_ENABLE_TASKDAG) - -TEST_F(qthreads, task_fib) { -#if 0 - const int N = 24 ; // 25 triggers tbd bug on Cuda/Pascal - for ( int i = 0; i < N; ++i ) { - TestTaskScheduler::TestFib< Kokkos::Qthreads >::run( i, ( i + 1 ) * ( i + 1 ) * 10000 ); - } -#endif -} - -TEST_F(qthreads, task_depend) { -#if 0 - for ( int i = 0; i < 25; ++i ) { - TestTaskScheduler::TestTaskDependence< Kokkos::Qthreads >::run( i ); - } -#endif -} - -TEST_F(qthreads, task_team) { -#if 0 - TestTaskScheduler::TestTaskTeam< Kokkos::Qthreads >::run( 1000 ); - //TestTaskScheduler::TestTaskTeamValue< Kokkos::Qthreads >::run( 1000 ); // Put back after testing. -#endif -} - -#endif // #if defined( KOKKOS_ENABLE_TASKDAG ) - -//---------------------------------------------------------------------------- - -#if defined(KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_QTHREADS) - -TEST_F(qthreads, cxx11) { -#if 0 - if ( std::is_same< Kokkos::DefaultExecutionSpace, Kokkos::Qthreads >::value ) { - ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Qthreads >( 1 ) ) ); - ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Qthreads >( 2 ) ) ); - ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Qthreads >( 3 ) ) ); - ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Qthreads >( 4 ) ) ); - } -#endif -} - -#endif - -TEST_F(qthreads, tile_layout) { -#if 0 - TestTile::test< Kokkos::Qthreads, 1, 1 >( 1, 1 ); - TestTile::test< Kokkos::Qthreads, 1, 1 >( 2, 3 ); - TestTile::test< Kokkos::Qthreads, 1, 1 >( 9, 10 ); - - TestTile::test< Kokkos::Qthreads, 2, 2 >( 1, 1 ); - TestTile::test< Kokkos::Qthreads, 2, 2 >( 2, 3 ); - TestTile::test< Kokkos::Qthreads, 2, 2 >( 4, 4 ); - TestTile::test< Kokkos::Qthreads, 2, 2 >( 9, 9 ); - - TestTile::test< Kokkos::Qthreads, 2, 4 >( 9, 9 ); - TestTile::test< Kokkos::Qthreads, 4, 2 >( 9, 9 ); - - TestTile::test< Kokkos::Qthreads, 4, 4 >( 1, 1 ); - TestTile::test< Kokkos::Qthreads, 4, 4 >( 4, 4 ); - TestTile::test< Kokkos::Qthreads, 4, 4 >( 9, 9 ); - TestTile::test< Kokkos::Qthreads, 4, 4 >( 9, 11 ); - - TestTile::test< Kokkos::Qthreads, 8, 8 >( 1, 1 ); - TestTile::test< Kokkos::Qthreads, 8, 8 >( 4, 4 ); - TestTile::test< Kokkos::Qthreads, 8, 8 >( 9, 9 ); - TestTile::test< Kokkos::Qthreads, 8, 8 >( 9, 11 ); -#endif -} - -TEST_F(qthreads, dispatch) { -#if 0 - const int repeat = 100; - for ( int i = 0; i < repeat; ++i ) { - for ( int j = 0; j < repeat; ++j ) { - Kokkos::parallel_for( Kokkos::RangePolicy< Kokkos::Qthreads >( 0, j ) - , KOKKOS_LAMBDA( int ) {} ); - } - } -#endif -} - -} // namespace Test diff --git a/core/unit_test/qthreads/TestQthreads_Reductions.cpp b/core/unit_test/qthreads/TestQthreads_Reductions.cpp deleted file mode 100644 index 9e60ad123f6..00000000000 --- a/core/unit_test/qthreads/TestQthreads_Reductions.cpp +++ /dev/null @@ -1,157 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include - -namespace Test { - -TEST_F(qthreads, long_reduce) { -#if 0 - TestReduce< long, Kokkos::Qthreads >( 0 ); - TestReduce< long, Kokkos::Qthreads >( 1000000 ); -#endif -} - -TEST_F(qthreads, double_reduce) { -#if 0 - TestReduce< double, Kokkos::Qthreads >( 0 ); - TestReduce< double, Kokkos::Qthreads >( 1000000 ); -#endif -} - -TEST_F(qthreads, reducers) { -#if 0 - TestReducers< int, Kokkos::Qthreads >::execute_integer(); - TestReducers< size_t, Kokkos::Qthreads >::execute_integer(); - TestReducers< double, Kokkos::Qthreads >::execute_float(); - TestReducers< Kokkos::complex, Kokkos::Qthreads>::execute_basic(); -#endif -} - -TEST_F(qthreads, long_reduce_dynamic) { -#if 0 - TestReduceDynamic< long, Kokkos::Qthreads >( 0 ); - TestReduceDynamic< long, Kokkos::Qthreads >( 1000000 ); -#endif -} - -TEST_F(qthreads, double_reduce_dynamic) { -#if 0 - TestReduceDynamic< double, Kokkos::Qthreads >( 0 ); - TestReduceDynamic< double, Kokkos::Qthreads >( 1000000 ); -#endif -} - -TEST_F(qthreads, long_reduce_dynamic_view) { -#if 0 - TestReduceDynamicView< long, Kokkos::Qthreads >( 0 ); - TestReduceDynamicView< long, Kokkos::Qthreads >( 1000000 ); -#endif -} - -TEST_F(qthreads, scan) { -#if 0 - TestScan< Kokkos::Qthreads >::test_range( 1, 1000 ); - TestScan< Kokkos::Qthreads >( 0 ); - TestScan< Kokkos::Qthreads >( 100000 ); - TestScan< Kokkos::Qthreads >( 10000000 ); - Kokkos::Qthreads().fence(); -#endif -} - -TEST_F(qthreads, scan_small) { -#if 0 - typedef TestScan< Kokkos::Qthreads, Kokkos::Impl::QthreadsExecUseScanSmall > TestScanFunctor; - - for ( int i = 0; i < 1000; ++i ) { - TestScanFunctor( 10 ); - TestScanFunctor( 10000 ); - } - TestScanFunctor( 1000000 ); - TestScanFunctor( 10000000 ); - - Kokkos::Qthreads().fence(); -#endif -} - -TEST_F(qthreads, team_scan) { -#if 0 - TestScanTeam< Kokkos::Qthreads, Kokkos::Schedule >( 0 ); - TestScanTeam< Kokkos::Qthreads, Kokkos::Schedule >( 0 ); - TestScanTeam< Kokkos::Qthreads, Kokkos::Schedule >( 10 ); - TestScanTeam< Kokkos::Qthreads, Kokkos::Schedule >( 10 ); - TestScanTeam< Kokkos::Qthreads, Kokkos::Schedule >( 10000 ); - TestScanTeam< Kokkos::Qthreads, Kokkos::Schedule >( 10000 ); -#endif -} - -TEST_F(qthreads, team_long_reduce) { -#if 0 - TestReduceTeam< long, Kokkos::Qthreads, Kokkos::Schedule >( 0 ); - TestReduceTeam< long, Kokkos::Qthreads, Kokkos::Schedule >( 0 ); - TestReduceTeam< long, Kokkos::Qthreads, Kokkos::Schedule >( 3 ); - TestReduceTeam< long, Kokkos::Qthreads, Kokkos::Schedule >( 3 ); - TestReduceTeam< long, Kokkos::Qthreads, Kokkos::Schedule >( 100000 ); - TestReduceTeam< long, Kokkos::Qthreads, Kokkos::Schedule >( 100000 ); -#endif -} - -TEST_F(qthreads, team_double_reduce) { -#if 0 - TestReduceTeam< double, Kokkos::Qthreads, Kokkos::Schedule >( 0 ); - TestReduceTeam< double, Kokkos::Qthreads, Kokkos::Schedule >( 0 ); - TestReduceTeam< double, Kokkos::Qthreads, Kokkos::Schedule >( 3 ); - TestReduceTeam< double, Kokkos::Qthreads, Kokkos::Schedule >( 3 ); - TestReduceTeam< double, Kokkos::Qthreads, Kokkos::Schedule >( 100000 ); - TestReduceTeam< double, Kokkos::Qthreads, Kokkos::Schedule >( 100000 ); -#endif -} - -TEST_F(qthreads, reduction_deduction) { -#if 0 - TestCXX11::test_reduction_deduction< Kokkos::Qthreads >(); -#endif -} - -} // namespace Test diff --git a/core/unit_test/qthreads/TestQthreads_SubView_c05.cpp b/core/unit_test/qthreads/TestQthreads_SubView_c05.cpp deleted file mode 100644 index 1e6b6736621..00000000000 --- a/core/unit_test/qthreads/TestQthreads_SubView_c05.cpp +++ /dev/null @@ -1,55 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include - -namespace Test { - -TEST_F(qthreads, view_subview_2d_from_3d_atomic) { -#if 0 - TestViewSubview::test_2d_subview_3d< Kokkos::Qthreads, Kokkos::MemoryTraits >(); -#endif -} - -} // namespace Test diff --git a/core/unit_test/qthreads/TestQthreads_SubView_c06.cpp b/core/unit_test/qthreads/TestQthreads_SubView_c06.cpp deleted file mode 100644 index b2ef924f875..00000000000 --- a/core/unit_test/qthreads/TestQthreads_SubView_c06.cpp +++ /dev/null @@ -1,55 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include - -namespace Test { - -TEST_F(qthreads, view_subview_2d_from_3d_randomaccess) { -#if 0 - TestViewSubview::test_2d_subview_3d< Kokkos::Qthreads, Kokkos::MemoryTraits >(); -#endif -} - -} // namespace Test diff --git a/core/unit_test/qthreads/TestQthreads_SubView_c08.cpp b/core/unit_test/qthreads/TestQthreads_SubView_c08.cpp deleted file mode 100644 index 0bc591c128f..00000000000 --- a/core/unit_test/qthreads/TestQthreads_SubView_c08.cpp +++ /dev/null @@ -1,55 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include - -namespace Test { - -TEST_F(qthreads, view_subview_3d_from_5d_left_atomic) { -#if 0 - TestViewSubview::test_3d_subview_5d_left< Kokkos::Qthreads, Kokkos::MemoryTraits >(); -#endif -} - -} // namespace Test diff --git a/core/unit_test/qthreads/TestQthreads_SubView_c09.cpp b/core/unit_test/qthreads/TestQthreads_SubView_c09.cpp deleted file mode 100644 index 45d878fcea3..00000000000 --- a/core/unit_test/qthreads/TestQthreads_SubView_c09.cpp +++ /dev/null @@ -1,55 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include - -namespace Test { - -TEST_F(qthreads, view_subview_3d_from_5d_left_randomaccess) { -#if 0 - TestViewSubview::test_3d_subview_5d_left< Kokkos::Qthreads, Kokkos::MemoryTraits >(); -#endif -} - -} // namespace Test diff --git a/core/unit_test/qthreads/TestQthreads_SubView_c11.cpp b/core/unit_test/qthreads/TestQthreads_SubView_c11.cpp deleted file mode 100644 index 6bfd19ad65a..00000000000 --- a/core/unit_test/qthreads/TestQthreads_SubView_c11.cpp +++ /dev/null @@ -1,55 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include - -namespace Test { - -TEST_F(qthreads, view_subview_3d_from_5d_right_atomic) { -#if 0 - TestViewSubview::test_3d_subview_5d_right< Kokkos::Qthreads, Kokkos::MemoryTraits >(); -#endif -} - -} // namespace Test diff --git a/core/unit_test/qthreads/TestQthreads_SubView_c12.cpp b/core/unit_test/qthreads/TestQthreads_SubView_c12.cpp deleted file mode 100644 index 9b6896d3eeb..00000000000 --- a/core/unit_test/qthreads/TestQthreads_SubView_c12.cpp +++ /dev/null @@ -1,55 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include - -namespace Test { - -TEST_F(qthreads, view_subview_3d_from_5d_right_randomaccess) { -#if 0 - TestViewSubview::test_3d_subview_5d_right< Kokkos::Qthreads, Kokkos::MemoryTraits >(); -#endif -} - -} // namespace Test diff --git a/core/unit_test/qthreads/TestQthreads_SubView_c13.cpp b/core/unit_test/qthreads/TestQthreads_SubView_c13.cpp deleted file mode 100644 index e03190d5146..00000000000 --- a/core/unit_test/qthreads/TestQthreads_SubView_c13.cpp +++ /dev/null @@ -1,55 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include - -namespace Test { - -TEST_F(qthreads, view_test_unmanaged_subview_reset) { -#if 0 - TestViewSubview::test_unmanaged_subview_reset< TEST_EXECSPACE >(); -#endif -} - -} // namespace Test diff --git a/core/unit_test/qthreads/TestQthreads_SubView_c_all.cpp b/core/unit_test/qthreads/TestQthreads_SubView_c_all.cpp deleted file mode 100644 index d8ea958a5d4..00000000000 --- a/core/unit_test/qthreads/TestQthreads_SubView_c_all.cpp +++ /dev/null @@ -1,13 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include diff --git a/core/unit_test/qthreads/TestQthreads_Team.cpp b/core/unit_test/qthreads/TestQthreads_Team.cpp deleted file mode 100644 index 6a4d9323a71..00000000000 --- a/core/unit_test/qthreads/TestQthreads_Team.cpp +++ /dev/null @@ -1,136 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include - -namespace Test { - -TEST_F(qthreads, team_tag) { -#if 0 - TestTeamPolicy< Kokkos::Qthreads, Kokkos::Schedule >::test_for( 0 ); - TestTeamPolicy< Kokkos::Qthreads, Kokkos::Schedule >::test_reduce( 0 ); - TestTeamPolicy< Kokkos::Qthreads, Kokkos::Schedule >::test_for( 0 ); - TestTeamPolicy< Kokkos::Qthreads, Kokkos::Schedule >::test_reduce( 0 ); - - TestTeamPolicy< Kokkos::Qthreads, Kokkos::Schedule >::test_for( 2 ); - TestTeamPolicy< Kokkos::Qthreads, Kokkos::Schedule >::test_reduce( 2 ); - TestTeamPolicy< Kokkos::Qthreads, Kokkos::Schedule >::test_for( 2 ); - TestTeamPolicy< Kokkos::Qthreads, Kokkos::Schedule >::test_reduce( 2 ); - - TestTeamPolicy< Kokkos::Qthreads, Kokkos::Schedule >::test_for( 1000 ); - TestTeamPolicy< Kokkos::Qthreads, Kokkos::Schedule >::test_reduce( 1000 ); - TestTeamPolicy< Kokkos::Qthreads, Kokkos::Schedule >::test_for( 1000 ); - TestTeamPolicy< Kokkos::Qthreads, Kokkos::Schedule >::test_reduce( 1000 ); -#endif -} - -TEST_F(qthreads, team_shared_request) { -#if 0 - TestSharedTeam< Kokkos::Qthreads, Kokkos::Schedule >(); - TestSharedTeam< Kokkos::Qthreads, Kokkos::Schedule >(); -#endif -} - -TEST_F(qthreads, team_scratch_request) { -#if 0 - TestScratchTeam< Kokkos::Qthreads, Kokkos::Schedule >(); - TestScratchTeam< Kokkos::Qthreads, Kokkos::Schedule >(); -#endif -} - -#if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) -TEST_F(qthreads, team_lambda_shared_request) { -#if 0 - TestLambdaSharedTeam< Kokkos::HostSpace, Kokkos::Qthreads, Kokkos::Schedule >(); - TestLambdaSharedTeam< Kokkos::HostSpace, Kokkos::Qthreads, Kokkos::Schedule >(); -#endif -} -#endif - -TEST_F(qthreads, shmem_size) { -#if 0 - TestShmemSize< Kokkos::Qthreads >(); -#endif -} - -TEST_F(qthreads, multi_level_scratch) { -#if 0 - TestMultiLevelScratchTeam< Kokkos::Qthreads, Kokkos::Schedule >(); - TestMultiLevelScratchTeam< Kokkos::Qthreads, Kokkos::Schedule >(); -#endif -} - -TEST_F(qthreads, team_vector) { -#if 0 - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Qthreads >( 0 ) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Qthreads >( 1 ) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Qthreads >( 2 ) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Qthreads >( 3 ) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Qthreads >( 4 ) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Qthreads >( 5 ) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Qthreads >( 6 ) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Qthreads >( 7 ) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Qthreads >( 8 ) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Qthreads >( 9 ) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Qthreads >( 10 ) ) ); -#endif -} - -#ifdef KOKKOS_COMPILER_GNU -#if (KOKKOS_COMPILER_GNU == 472) -#define SKIP_TEST -#endif -#endif - -#ifndef SKIP_TEST -TEST_F(qthreads, triple_nested_parallelism) { -#if 0 - TestTripleNestedReduce< double, Kokkos::Qthreads >( 8192, 2048, 32, 32 ); - TestTripleNestedReduce< double, Kokkos::Qthreads >( 8192, 2048, 32, 16 ); - TestTripleNestedReduce< double, Kokkos::Qthreads >( 8192, 2048, 16, 16 ); -#endif -} -#endif - -} // namespace Test diff --git a/core/unit_test/qthreads/TestQthreads_ViewAPI_a.cpp b/core/unit_test/qthreads/TestQthreads_ViewAPI_a.cpp deleted file mode 100644 index fad6473d204..00000000000 --- a/core/unit_test/qthreads/TestQthreads_ViewAPI_a.cpp +++ /dev/null @@ -1,56 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 3.0 -// Copyright (2020) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Christian R. Trott (crtrott@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include - -namespace Test { - -TEST_F(qthreads, impl_view_mapping_a) { -#if 0 - test_view_mapping< Kokkos::Qthreads >(); - test_view_mapping_operator< Kokkos::Qthreads >(); -#endif -} - -} // namespace Test diff --git a/containers/unit_tests/serial/TestSerial_BitSet.cpp b/core/unit_test/serial/TestSerial_Concepts.cpp similarity index 98% rename from containers/unit_tests/serial/TestSerial_BitSet.cpp rename to core/unit_test/serial/TestSerial_Concepts.cpp index 322e88a6af9..aa2c1577ce2 100644 --- a/containers/unit_tests/serial/TestSerial_BitSet.cpp +++ b/core/unit_test/serial/TestSerial_Concepts.cpp @@ -1,4 +1,3 @@ - /* //@HEADER // ************************************************************************ @@ -44,4 +43,4 @@ */ #include -#include +#include diff --git a/containers/unit_tests/serial/TestSerial_DualView.cpp b/core/unit_test/serial/TestSerial_RangePolicyRequire.cpp similarity index 98% rename from containers/unit_tests/serial/TestSerial_DualView.cpp rename to core/unit_test/serial/TestSerial_RangePolicyRequire.cpp index c1646ed13ba..dcc77054d6d 100644 --- a/containers/unit_tests/serial/TestSerial_DualView.cpp +++ b/core/unit_test/serial/TestSerial_RangePolicyRequire.cpp @@ -44,4 +44,4 @@ */ #include -#include +#include diff --git a/core/unit_test/serial/TestSerial_Team.cpp b/core/unit_test/serial/TestSerial_Team.cpp index e85ea86f3f6..1ee69468105 100644 --- a/core/unit_test/serial/TestSerial_Team.cpp +++ b/core/unit_test/serial/TestSerial_Team.cpp @@ -79,26 +79,87 @@ TEST(TEST_CATEGORY, team_reduce) { Kokkos::Schedule >::test_reduce(1000); } -TEST(TEST_CATEGORY, team_broadcast) { - TestTeamBroadcast >::test_teambroadcast(0); - TestTeamBroadcast >::test_teambroadcast(0); - - TestTeamBroadcast >::test_teambroadcast(2); - TestTeamBroadcast >::test_teambroadcast(2); - - TestTeamBroadcast >::test_teambroadcast(16); - TestTeamBroadcast >::test_teambroadcast(16); - - TestTeamBroadcast >:: - test_teambroadcast(1000); - TestTeamBroadcast >:: - test_teambroadcast(1000); +TEST(TEST_CATEGORY, team_broadcast_long) { + TestTeamBroadcast, + long>::test_teambroadcast(0, 1); + TestTeamBroadcast, + long>::test_teambroadcast(0, 1); + + TestTeamBroadcast, + long>::test_teambroadcast(2, 1); + TestTeamBroadcast, + long>::test_teambroadcast(2, 1); + + TestTeamBroadcast, + long>::test_teambroadcast(16, 1); + TestTeamBroadcast, + long>::test_teambroadcast(16, 1); + + TestTeamBroadcast, + long>::test_teambroadcast(1000, 1); + TestTeamBroadcast, + long>::test_teambroadcast(1000, 1); +} + +TEST(TEST_CATEGORY, team_broadcast_char) { + TestTeamBroadcast, + unsigned char>::test_teambroadcast(0, 1); + TestTeamBroadcast, + unsigned char>::test_teambroadcast(0, 1); + + TestTeamBroadcast, + unsigned char>::test_teambroadcast(2, 1); + TestTeamBroadcast, + unsigned char>::test_teambroadcast(2, 1); + + TestTeamBroadcast, + unsigned char>::test_teambroadcast(16, 1); + TestTeamBroadcast, + unsigned char>::test_teambroadcast(16, 1); +} + +TEST(TEST_CATEGORY, team_broadcast_float) { + TestTeamBroadcast, + float>::test_teambroadcast(0, 1.3); + TestTeamBroadcast, + float>::test_teambroadcast(0, 1.3); + + TestTeamBroadcast, + float>::test_teambroadcast(2, 1.3); + TestTeamBroadcast, + float>::test_teambroadcast(2, 1.3); + + TestTeamBroadcast, + float>::test_teambroadcast(16, 1.3); + TestTeamBroadcast, + float>::test_teambroadcast(16, 1.3); + + TestTeamBroadcast, + float>::test_teambroadcast(1000, 1.3); + TestTeamBroadcast, + float>::test_teambroadcast(1000, 1.3); +} + +TEST(TEST_CATEGORY, team_broadcast_double) { + TestTeamBroadcast, + double>::test_teambroadcast(0, 1.3); + TestTeamBroadcast, + double>::test_teambroadcast(0, 1.3); + + TestTeamBroadcast, + double>::test_teambroadcast(2, 1.3); + TestTeamBroadcast, + double>::test_teambroadcast(2, 1.3); + + TestTeamBroadcast, + double>::test_teambroadcast(16, 1.3); + TestTeamBroadcast, + double>::test_teambroadcast(16, 1.3); + + TestTeamBroadcast, + double>::test_teambroadcast(1000, 1.3); + TestTeamBroadcast, + double>::test_teambroadcast(1000, 1.3); } } // namespace Test diff --git a/core/unit_test/serial/TestSerial_TeamScratch.cpp b/core/unit_test/serial/TestSerial_TeamScratch.cpp index e70b3e34d61..5d10c0c5e9a 100644 --- a/core/unit_test/serial/TestSerial_TeamScratch.cpp +++ b/core/unit_test/serial/TestSerial_TeamScratch.cpp @@ -58,7 +58,6 @@ TEST(TEST_CATEGORY, team_scratch_request) { } #if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) -#if !defined(KOKKOS_ENABLE_CUDA) || (8000 <= CUDA_VERSION) TEST(TEST_CATEGORY, team_lambda_shared_request) { TestLambdaSharedTeam >(); @@ -68,7 +67,6 @@ TEST(TEST_CATEGORY, team_lambda_shared_request) { TEST(TEST_CATEGORY, scratch_align) { TestScratchAlignment(); } #endif -#endif TEST(TEST_CATEGORY, shmem_size) { TestShmemSize(); } diff --git a/core/unit_test/standalone/Makefile b/core/unit_test/standalone/Makefile index c910cc5fd18..d60422233d6 100644 --- a/core/unit_test/standalone/Makefile +++ b/core/unit_test/standalone/Makefile @@ -9,7 +9,6 @@ ifndef KOKKOS_PATH endif SRC = $(wildcard $(MAKEFILE_PATH)*.cpp) -SRC += $(MAKEFILE_PATH)/../TestStackTrace.cpp HEADERS = $(wildcard $(MAKEFILE_PATH)*.hpp) HEADERS = $(wildcard $(MAKEFILE_PATH)/../*.hpp) @@ -42,7 +41,7 @@ include $(KOKKOS_PATH)/Makefile.kokkos build: $(EXE) $(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) gtest-all.o - $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) gtest-all.o -o $(EXE) + $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) gtest-all.o -lpthread -o $(EXE) clean: kokkos-clean rm -f *.o *.cuda *.host diff --git a/core/unit_test/standalone/UnitTestMainInit.cpp b/core/unit_test/standalone/UnitTestMainInit.cpp index 8f8bac7b23c..c5d4a36677e 100644 --- a/core/unit_test/standalone/UnitTestMainInit.cpp +++ b/core/unit_test/standalone/UnitTestMainInit.cpp @@ -62,12 +62,15 @@ #ifdef KOKKOS_ENABLE_HPX #include #endif +#ifdef KOKKOS_ENABLE_OPENMPTARGET +#include +#endif #ifndef TEST_EXECSPACE #ifdef KOKKOS_ENABLE_SERIAL #include #endif #endif -#include +#include int main(int argc, char *argv[]) { Kokkos::initialize(argc, argv); diff --git a/containers/unit_tests/threads/TestThreads_BitSet.cpp b/core/unit_test/threads/TestThreads_Concepts.cpp similarity index 98% rename from containers/unit_tests/threads/TestThreads_BitSet.cpp rename to core/unit_test/threads/TestThreads_Concepts.cpp index ec93bb121cf..ed2d61ed3be 100644 --- a/containers/unit_tests/threads/TestThreads_BitSet.cpp +++ b/core/unit_test/threads/TestThreads_Concepts.cpp @@ -1,4 +1,3 @@ - /* //@HEADER // ************************************************************************ @@ -44,4 +43,4 @@ */ #include -#include +#include diff --git a/containers/unit_tests/threads/TestThreads_DualView.cpp b/core/unit_test/threads/TestThreads_RangePolicyRequire.cpp similarity index 98% rename from containers/unit_tests/threads/TestThreads_DualView.cpp rename to core/unit_test/threads/TestThreads_RangePolicyRequire.cpp index f6967bf0d4e..7a10b826585 100644 --- a/containers/unit_tests/threads/TestThreads_DualView.cpp +++ b/core/unit_test/threads/TestThreads_RangePolicyRequire.cpp @@ -44,4 +44,4 @@ */ #include -#include +#include diff --git a/core/unit_test/threads/TestThreads_Team.cpp b/core/unit_test/threads/TestThreads_Team.cpp index b931c5cb61a..5d2e3607342 100644 --- a/core/unit_test/threads/TestThreads_Team.cpp +++ b/core/unit_test/threads/TestThreads_Team.cpp @@ -80,25 +80,25 @@ TEST(TEST_CATEGORY, team_reduce) { } TEST(TEST_CATEGORY, team_broadcast) { - TestTeamBroadcast >::test_teambroadcast(0); - TestTeamBroadcast >::test_teambroadcast(0); + TestTeamBroadcast, + long>::test_teambroadcast(0, 1); + TestTeamBroadcast, + long>::test_teambroadcast(0, 1); - TestTeamBroadcast >::test_teambroadcast(2); - TestTeamBroadcast >::test_teambroadcast(2); + TestTeamBroadcast, + long>::test_teambroadcast(2, 1); + TestTeamBroadcast, + long>::test_teambroadcast(2, 1); - TestTeamBroadcast >::test_teambroadcast(16); - TestTeamBroadcast >::test_teambroadcast(16); + TestTeamBroadcast, + long>::test_teambroadcast(16, 1); + TestTeamBroadcast, + long>::test_teambroadcast(16, 1); - TestTeamBroadcast >:: - test_teambroadcast(1000); - TestTeamBroadcast >:: - test_teambroadcast(1000); + TestTeamBroadcast, + long>::test_teambroadcast(1000, 1); + TestTeamBroadcast, + long>::test_teambroadcast(1000, 1); } } // namespace Test diff --git a/core/unit_test/threads/TestThreads_TeamScratch.cpp b/core/unit_test/threads/TestThreads_TeamScratch.cpp index d793a9050fd..a37369fc049 100644 --- a/core/unit_test/threads/TestThreads_TeamScratch.cpp +++ b/core/unit_test/threads/TestThreads_TeamScratch.cpp @@ -58,7 +58,6 @@ TEST(TEST_CATEGORY, team_scratch_request) { } #if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) -#if !defined(KOKKOS_ENABLE_CUDA) || (8000 <= CUDA_VERSION) TEST(TEST_CATEGORY, team_lambda_shared_request) { TestLambdaSharedTeam >(); @@ -68,7 +67,6 @@ TEST(TEST_CATEGORY, team_lambda_shared_request) { TEST(TEST_CATEGORY, scratch_align) { TestScratchAlignment(); } #endif -#endif TEST(TEST_CATEGORY, shmem_size) { TestShmemSize(); } diff --git a/doc/design_notes_space_instances.md b/doc/design_notes_space_instances.md index 0124dfbc873..ce3d242998b 100644 --- a/doc/design_notes_space_instances.md +++ b/doc/design_notes_space_instances.md @@ -3,21 +3,21 @@ ## Objective * Enable Kokkos interoperability with coarse-grain tasking models - + ## Requirements - * Backwards compatable with existing Kokkos API - * Support existing Host execution spaces (Serial, Threads, OpenMP, maybe Qthreads) + * Backwards compatible with existing Kokkos API + * Support existing Host execution spaces (Serial, Threads, OpenMP) * Support DARMA threading model (may require a new Host execution space) * Support Uintah threading model, i.e. indepentant worker threadpools working of of shared task queues - - + + ## Execution Space * Parallel work is *dispatched* on an execution space instance - - * Execution space instances are conceptually disjoint/independant from each other - + + * Execution space instances are conceptually disjoint/independent from each other + ## Host Execution Space Instances @@ -35,7 +35,7 @@ * The control thread is a member of the instance - * The pool of threads associated with an instances is not mutatable during that instance existance + * The pool of threads associated with an instances is not mutatable during that instance existence * The pool of threads associated with an instance may be masked @@ -44,8 +44,8 @@ - Example: only one hyperthread per core of the instance - A mask can be applied during the policy creation of a parallel algorithm - - - Masking is portable by defining it as ceiling of fraction between [0.0, 1.0] + + - Masking is portable by defining it as ceiling of fraction between [0.0, 1.0] of the available resources ``` @@ -57,75 +57,75 @@ public: using array_layout = ...; using size_type = ...; using scratch_memory_space = ...; - - + + class Instance { int thread_pool_size( int depth = 0 ); ... }; - + class InstanceRequest { public: using Control = std::function< void( Instance * )>; - + InstanceRequest( Control control , unsigned thread_count , unsigned use_numa_count = 0 , unsigned use_cores_per_numa = 0 - ); - + ); + }; - + static bool in_parallel(); - + static bool sleep(); static bool wake(); - + static void fence(); - + static void print_configuration( std::ostream &, const bool detailed = false ); - + static void initialize( unsigned thread_count = 0 , unsigned use_numa_count = 0 , unsigned use_cores_per_numa = 0 ); - + // Partition the current instance into the requested instances // and run the given functions on the cooresponding instances - // will block until all the partitioned instances complete and - // the original instance will be restored + // will block until all the partitioned instances complete and + // the original instance will be restored // // Requires that the space has already been initialized // Requires that the request can be statisfied by the current instance - // i.e. the sum of number of requested threads must be less than the + // i.e. the sum of number of requested threads must be less than the // max_hardware_threads // // Each control functor will accept a handle to its new default instance - // Each instance must be independant of all other instances + // Each instance must be independent of all other instances // i.e. no assumption on scheduling between instances // The user is responible for checking the return code for errors static int run_instances( std::vector< InstanceRequest> const& requests ); - + static void finalize(); static int is_initialized(); - + static int concurrency(); - + static int thread_pool_size( int depth = 0 ); - + static int thread_pool_rank(); - + static int max_hardware_threads(); - + static int hardware_thread_id(); - + }; ``` - + diff --git a/doc/develop_builds.md b/doc/develop_builds.md index 9a211fa7764..080e43e115e 100644 --- a/doc/develop_builds.md +++ b/doc/develop_builds.md @@ -26,7 +26,7 @@ grouping by platform. ### generate_makefile.sh The bash code does not do any error checking on the `--arch=` or `--device=` -arguments thus strictly speaking you do not *need* to do anything to add a +arguments thus strictly speaking you do not *need* to do anything to add a device or architecture; however, you should add it to the help menu. For the archictectures, please group by one of the platforms listed above. @@ -37,7 +37,7 @@ The options for the CMake build system are: `-DKOKKOS_HOST_ARCH:STRING=` and `-DKOKKOS_ENABLE_:BOOL=`. Although any string can be passed into KOKKOS_HOST_ARCH option, it is checked against an accepted list. Likewise, the KOKKOS_ENABLE_ must have the option added AND it is formed using the -list. Thus: +list. Thus: + A new architecture should be added to the KOKKOS_HOST_ARCH_LIST variable. + A new device should be added to the KOKKOS_DEVICES_LIST variable **AND** a KOKKOS_ENABLE_ option specified (see KOKKOS_ENABLE_CUDA for @@ -46,7 +46,7 @@ list. Thus: The translation from option to the `KOKKOS_SETTINGS` is done in `kokkos_settings.cmake`. This translation is automated for some types if you ad -to the list, but for others, it may need to be hand coded. +to the list, but for others, it may need to be hand coded. ### Makefile.kokkos @@ -62,7 +62,7 @@ the Kokkos development team. This file is used to check the build system in a platform-independent way. It works by looping over available architectures and devices; thus, you should add -your new architecure to KOKKOS_ARCH_OPTIONS and your new device to +your new architecure to KOKKOS_ARCH_OPTIONS and your new device to KOKKOS_DEVICE_OPTIONS to be tested. The build system tests work by grepping the generated build files (automatically). The header file tests work by diffing the generated file with results that are stored in diff --git a/doc/hardware_identification/query_cuda_arch.cpp b/doc/hardware_identification/query_cuda_arch.cpp index 86efee2ab1c..879b3ca1e5c 100644 --- a/doc/hardware_identification/query_cuda_arch.cpp +++ b/doc/hardware_identification/query_cuda_arch.cpp @@ -13,7 +13,7 @@ int main() { case 5: printf("Maxwell"); break; case 6: printf("Pascal"); break; default: - fprintf(stderr, "Unspported Device %d%d\n", (int)prop.major, + fprintf(stderr, "Unsupported Device %d%d\n", (int)prop.major, (int)prop.minor); return -1; } diff --git a/doc/kokkos-promotion.txt b/doc/kokkos-promotion.txt index e36a6c98e28..81489e6e132 100644 --- a/doc/kokkos-promotion.txt +++ b/doc/kokkos-promotion.txt @@ -71,18 +71,18 @@ supported compilers. Those machines are: // -------------------------------------------------------------------------------- // Step 2: - 2.1. Build and test Trilinos with the following configurations: + 2.1. Build and test Trilinos with the following configurations: a) serial, openmp, and cuda via the testing scripts in kokkos-kernels/scripts/trilinos-integration (automates the process) b) various ATDM-supported builds via Trilinos configuration scripts located in kokkos{-kernels}/scripts/trilinos-integration/ATDM_configurations (not yet automated) - - Run scripts for automated testing on white (openmp and cuda) and blake (seral) that are provided in kokkos{-kernels}/scripts/trilinos-integration. + + Run scripts for automated testing on white (openmp and cuda) and blake (seral) that are provided in kokkos{-kernels}/scripts/trilinos-integration. These scripts load their own modules/environment, so don't require preparation. You can run all four at the same time, use separate directories for each. mkdir serial cd serial nohup KOKKOSKERNELS_PATH/scripts/trilinos-integration/blake_jenkins_run_script_serial_intel & - Use scripts to configure Trilinos for waterman (cuda, cuda-debug) and ride (cuda-rdc) that are provided in kokkos-kernels/scripts/trilinos-integration/ATDM_configurations. + Use scripts to configure Trilinos for waterman (cuda, cuda-debug) and ride (cuda-rdc) that are provided in kokkos-kernels/scripts/trilinos-integration/ATDM_configurations. These scripts load their own modules/environment, so don't require preparation of the system environment. You can run them all at the same time, just use separate directories for each. Instructions for compute node allocation, building, and testing are included in the scripts. @@ -97,12 +97,12 @@ Step 2: // -------------------------------------------------------------------------------- // Step 3: Close all issues labeled "InDevelop" - + Use the GitHub web interface: https://github.com/kokkos/kokkos/issues?q=is%3Aopen+is%3Aissue+label%3AInDevelop - Select all with checkbox in upper left, "Mark as closed" + Select all with checkbox in upper left, "Mark as closed" Use the GitHub web interface: https://github.com/kokkos/kokkos-kernels/issues?q=is%3Aopen+is%3Aissue+label%3AInDevelop - Select all with checkbox in upper left, "Mark as closed" + Select all with checkbox in upper left, "Mark as closed" // -------------------------------------------------------------------------------- // @@ -183,7 +183,7 @@ Step 4: This step should be run on kokkos-dev # Append to scripts/master_history.txt: tag: 2.03.13 date: 07:27:2017 master: da314444 develop: 29ccb58a - + git commit --amend -a Keep the merge commit as described in 4.6 @@ -210,7 +210,7 @@ Step 5: This step can be done on any SEMS machine (e.g. kokkos-dev). git clone -b kokkos-promotion git@github.com:trilinos/Trilinos.git TRILINOS_PATH=$PWD/Trilinos - 5.2. Snapshot Kokkos into Trilinos - this requires python/2.7.9 and that both Trilinos and Kokkos be clean - no untracked or modified files. Run the following outside of the Kokkos and Trilinos source trees. + 5.2. Snapshot Kokkos into Trilinos - this requires python/2.7.9 and that both Trilinos and Kokkos be clean - no untracked or modified files. Run the following outside of the Kokkos and Trilinos source trees. * Use the master branch of Kokkos for this. @@ -244,24 +244,24 @@ Step 5: This step can be done on any SEMS machine (e.g. kokkos-dev). // -------------------------------------------------------------------------------- // Step 6: Push Kokkos + KokkosKernels master and develop branches to respective GitHub repos (requires Owner permission). - + 6.1. Master branch: cd $KOKKOS_PATH git checkout master - git push --follow-tags origin master + git push --follow-tags origin master cd $KOKKOSKERNELS_PATH git checkout master - git push --follow-tags origin master + git push --follow-tags origin master 6.2. Develop branch: First merge (--no-ff) master back into develop cd $KOKKOS_PATH git checkout develop git merge --no-ff master - git push origin develop + git push origin develop cd $KOKKOSKERNELS_PATH git checkout develop git merge --no-ff master - git push origin develop + git push origin develop diff --git a/example/CMakeLists.txt b/example/CMakeLists.txt index c7d60995463..34157329d09 100644 --- a/example/CMakeLists.txt +++ b/example/CMakeLists.txt @@ -2,19 +2,11 @@ # Subpackage name must match what appears in kokkos/cmake/Dependencies.cmake # -#KOKKOS_SUBPACKAGE(Example) +KOKKOS_SUBPACKAGE(Example) -#KOKKOS_ADD_EXAMPLE_DIRECTORIES(query_device) -#KOKKOS_ADD_EXAMPLE_DIRECTORIES(fixture) -#KOKKOS_ADD_EXAMPLE_DIRECTORIES(feint) -#KOKKOS_ADD_EXAMPLE_DIRECTORIES(fenl) -#KOKKOS_ADD_EXAMPLE_DIRECTORIES(multi_fem) -#KOKKOS_ADD_EXAMPLE_DIRECTORIES(md_skeleton) -#KOKKOS_ADD_EXAMPLE_DIRECTORIES(global_2_local_ids) -#KOKKOS_ADD_EXAMPLE_DIRECTORIES(grow_array) -#KOKKOS_ADD_EXAMPLE_DIRECTORIES(sort_array) -#if(NOT Kokkos_ENABLE_CUDA) -#KOKKOS_ADD_EXAMPLE_DIRECTORIES(tutorial) -#endif() -#KOKKOS_SUBPACKAGE_POSTPROCESS() +KOKKOS_ADD_EXAMPLE_DIRECTORIES(query_device) +if(NOT Kokkos_ENABLE_CUDA) + KOKKOS_ADD_EXAMPLE_DIRECTORIES(tutorial) +endif() +KOKKOS_SUBPACKAGE_POSTPROCESS() diff --git a/example/README b/example/README index ec64004842b..66860512448 100644 --- a/example/README +++ b/example/README @@ -5,12 +5,5 @@ Tpetra. MANIFEST: - - common: Header files used by different examples - - feint: Unstructured finite-element method - - fixture: Some other finite-element method example - - global_2_local_ids: Example of global-to-local index lookup - - grow_array: Parallel dynamic memory allocation - - md_skeleton: Molecular dynamics - query_device: Kokkos' HWLOC wrapper for querying device topology - - sort_array: Parallel sort - tutorial: Kokkos tutorial (START HERE) diff --git a/example/build_cmake_in_tree/cmake_example.cpp b/example/build_cmake_in_tree/cmake_example.cpp index c6bbaeca818..63875d013f8 100644 --- a/example/build_cmake_in_tree/cmake_example.cpp +++ b/example/build_cmake_in_tree/cmake_example.cpp @@ -57,7 +57,7 @@ int main(int argc, char* argv[]) { exit(1); } - const long n = strtol(argv[1], NULL, 10); + const long n = strtol(argv[1], nullptr, 10); printf("Number of even integers from 0 to %ld\n", n - 1); diff --git a/example/build_cmake_installed/cmake_example.cpp b/example/build_cmake_installed/cmake_example.cpp index c6bbaeca818..63875d013f8 100644 --- a/example/build_cmake_installed/cmake_example.cpp +++ b/example/build_cmake_installed/cmake_example.cpp @@ -57,7 +57,7 @@ int main(int argc, char* argv[]) { exit(1); } - const long n = strtol(argv[1], NULL, 10); + const long n = strtol(argv[1], nullptr, 10); printf("Number of even integers from 0 to %ld\n", n - 1); diff --git a/example/cmake/Dependencies.cmake b/example/cmake/Dependencies.cmake deleted file mode 100644 index 741db6a0c42..00000000000 --- a/example/cmake/Dependencies.cmake +++ /dev/null @@ -1,3 +0,0 @@ -TRIBITS_PACKAGE_DEFINE_DEPENDENCIES( - TEST_OPTIONAL_TPLS CUSPARSE MKL HPX - ) diff --git a/example/query_device/query_device.cpp b/example/query_device/query_device.cpp index 0ce7fc9eca9..a563b06b286 100644 --- a/example/query_device/query_device.cpp +++ b/example/query_device/query_device.cpp @@ -59,6 +59,8 @@ int main(int argc, char** argv) { std::ostringstream msg; + (void)argc; + (void)argv; #if defined(KOKKOS_ENABLE_MPI) MPI_Init(&argc, &argv); diff --git a/example/tutorial/03_simple_view/Makefile b/example/tutorial/03_simple_view/Makefile index de994a8df92..c9dc3a0fd0e 100644 --- a/example/tutorial/03_simple_view/Makefile +++ b/example/tutorial/03_simple_view/Makefile @@ -43,7 +43,7 @@ include $(KOKKOS_PATH)/Makefile.kokkos build: $(EXE) -#for unit testing only, for best preformance with OpenMP 4.0 or better +#for unit testing only, for best performance with OpenMP 4.0 or better test: $(EXE) ./$(EXE) diff --git a/example/tutorial/Algorithms/01_random_numbers/random_numbers.cpp b/example/tutorial/Algorithms/01_random_numbers/random_numbers.cpp index be7050a851b..a0771c4fcac 100644 --- a/example/tutorial/Algorithms/01_random_numbers/random_numbers.cpp +++ b/example/tutorial/Algorithms/01_random_numbers/random_numbers.cpp @@ -64,10 +64,10 @@ typedef Kokkos::HostSpace::execution_space DefaultHostType; // threads can grep their own. On CPU architectures the pool size is equal to // the thread number, on CUDA about 128k states are generated (enough to give // every potentially simultaneously running thread its own state). With a kernel -// a thread is required to aquire a state from the pool and later return it. On +// a thread is required to acquire a state from the pool and later return it. On // CPUs the Random number generator is deterministic if using the same number of // threads. On GPUs (i.e. using the CUDA backend it is not deterministic because -// threads aquire states via atomics. +// threads acquire states via atomics. // A Functor for generating uint64_t random numbers templated on the // GeneratorPool type @@ -97,7 +97,7 @@ struct generate_random { for (int k = 0; k < samples; k++) vals(i * samples + k) = rand_gen.urand64(); - // Give the state back, which will allow another thread to aquire it + // Give the state back, which will allow another thread to acquire it rand_pool.free_state(rand_gen); } }; diff --git a/example/tutorial/CMakeLists.txt b/example/tutorial/CMakeLists.txt index 2d8259506a4..fd471fa6bea 100644 --- a/example/tutorial/CMakeLists.txt +++ b/example/tutorial/CMakeLists.txt @@ -8,11 +8,9 @@ KOKKOS_ADD_EXAMPLE_DIRECTORIES(06_simple_mdrangepolicy) KOKKOS_ADD_EXAMPLE_DIRECTORIES(Advanced_Views) KOKKOS_ADD_EXAMPLE_DIRECTORIES(Hierarchical_Parallelism) -IF (Kokkos_ENABLE_CXX11) - KOKKOS_ADD_EXAMPLE_DIRECTORIES(01_hello_world_lambda) - KOKKOS_ADD_EXAMPLE_DIRECTORIES(02_simple_reduce_lambda) - KOKKOS_ADD_EXAMPLE_DIRECTORIES(03_simple_view_lambda) -ENDIF () +KOKKOS_ADD_EXAMPLE_DIRECTORIES(01_hello_world_lambda) +KOKKOS_ADD_EXAMPLE_DIRECTORIES(02_simple_reduce_lambda) +KOKKOS_ADD_EXAMPLE_DIRECTORIES(03_simple_view_lambda) diff --git a/example/tutorial/README b/example/tutorial/README index 4ba0b3a5d9e..c46ace0d115 100644 --- a/example/tutorial/README +++ b/example/tutorial/README @@ -2,16 +2,11 @@ Build the examples by typing in each directory: make -j 16 To specify a target device: -make openmp -j 16 -make pthreads -j 16 -make serial -j 16 -make cuda -j 16 +KOKKOS_DEVICES=OpenMP make -j 16 +KOKKOS_DEVICES=Pthread make -j 16 +KOKKOS_DEVICES=Serial make -j 16 +KOKKOS_DEVICES=Cuda make -j 16 -The lambda variants can not be build with CUDA=yes at the moment, since -CUDA does not support lambdas from the host. Some of the advanced topics try to highlight performance impacts by timing different variants of doing the same thing. -Also some of the advanced topics (in particular hierarchical parallelism) -require C++11 even with out using host side lambdas. CUDA 6.5 can be used -to compile those. diff --git a/example/virtual_functions/Makefile b/example/virtual_functions/Makefile index 06186786c27..fbc54510da0 100644 --- a/example/virtual_functions/Makefile +++ b/example/virtual_functions/Makefile @@ -1,5 +1,5 @@ KOKKOS_DEVICES=Cuda -KOKKOS_CUDA_OPTIONS=enable_lambda +KOKKOS_CUDA_OPTIONS=enable_lambda,rdc KOKKOS_ARCH = "SNB,Kepler35" #KOKKOS_DEVICES=OpenMP diff --git a/generate_makefile.bash b/generate_makefile.bash index 7ef477b9f20..555f0b30a1a 100755 --- a/generate_makefile.bash +++ b/generate_makefile.bash @@ -1,6 +1,166 @@ #!/bin/bash -KOKKOS_DEVICES="" +update_kokkos_devices() { + SEARCH_TEXT="*$1*" + if [[ $KOKKOS_DEVICES == $SEARCH_TEXT ]]; then + echo kokkos devices already includes $SEARCH_TEXT + else + if [ "$KOKKOS_DEVICES" = "" ]; then + KOKKOS_DEVICES="$1" + echo reseting kokkos devices to $KOKKOS_DEVICES + else + KOKKOS_DEVICES="${KOKKOS_DEVICES},$1" + echo appending to kokkos devices $KOKKOS_DEVICES + fi + fi +} + +get_kokkos_device_list() { + KOKKOS_DEVICE_CMD= + PARSE_DEVICES_LST=$(echo $KOKKOS_DEVICES | tr "," "\n") + PARSE_DEVICES_LST=$(echo $PARSE_DEVICES_LST | tr "_" "\n") + for DEVICE_ in $PARSE_DEVICES_LST + do + UC_DEVICE=$(echo $DEVICE_ | tr "[:lower:]" "[:upper:]") + KOKKOS_DEVICE_CMD="-DKokkos_ENABLE_${UC_DEVICE}=ON ${KOKKOS_DEVICE_CMD}" + done +} + +get_kokkos_arch_list() { + KOKKOS_ARCH_CMD= + PARSE_ARCH_LST=$(echo $KOKKOS_ARCH | tr "," "\n") + for ARCH_ in $PARSE_ARCH_LST + do + UC_ARCH=$(echo $ARCH_ | tr "[:lower:]" "[:upper:]") + KOKKOS_ARCH_CMD="-DKokkos_ARCH_${UC_ARCH}=ON ${KOKKOS_ARCH_CMD}" + done +} + +get_kokkos_cuda_option_list() { + echo parsing KOKKOS_CUDA_OPTIONS=$KOKKOS_CUDA_OPTIONS + KOKKOS_CUDA_OPTION_CMD= + PARSE_CUDA_LST=$(echo $KOKKOS_CUDA_OPTIONS | tr "," "\n") + for CUDA_ in $PARSE_CUDA_LST + do + CUDA_OPT_NAME= + if [ "${CUDA_}" == "enable_lambda" ]; then + CUDA_OPT_NAME=CUDA_LAMBDA + elif [ "${CUDA_}" == "rdc" ]; then + CUDA_OPT_NAME=CUDA_RELOCATABLE_DEVICE_CODE + elif [ "${CUDA_}" == "force_uvm" ]; then + CUDA_OPT_NAME=CUDA_UVM + elif [ "${CUDA_}" == "use_ldg" ]; then + CUDA_OPT_NAME=CUDA_LDG_INTRINSIC + else + echo "${CUDA_} is not a valid cuda options..." + fi + if [ "${CUDA_OPT_NAME}" != "" ]; then + KOKKOS_CUDA_OPTION_CMD="-DKokkos_ENABLE_${CUDA_OPT_NAME}=ON ${KOKKOS_CUDA_OPTION_CMD}" + fi + done +} + +get_kokkos_option_list() { + echo parsing KOKKOS_OPTIONS=$KOKKOS_OPTIONS + KOKKOS_OPTION_CMD= + PARSE_OPTIONS_LST=$(echo $KOKKOS_OPTIONS | tr "," "\n") + for OPT_ in $PARSE_OPTIONS_LST + do + UC_OPT_=$(echo $OPT_ | tr "[:lower:]" "[:upper:]") + if [[ "$UC_OPT_" == *DISABLE* ]]; then + FLIP_OPT_=${UC_OPT_/DISABLE/ENABLE} + KOKKOS_OPTION_CMD="-DKokkos_${FLIP_OPT_}=OFF ${KOKKOS_OPTION_CMD}" + elif [[ "$UC_OPT_" == *ENABLE* ]]; then + KOKKOS_OPTION_CMD="-DKokkos_${UC_OPT_}=ON ${KOKKOS_OPTION_CMD}" + else + KOKKOS_OPTION_CMD="-DKokkos_ENABLE_${UC_OPT_}=ON ${KOKKOS_OPTION_CMD}" + fi + done +} + +display_help_text() { + + echo "Kokkos configure options:" + echo "" + echo "--kokkos-path=/Path/To/Kokkos: Path to the Kokkos root directory." + echo "--prefix=/Install/Path: Path to install the Kokkos library." + echo "" + echo "--with-cuda[=/Path/To/Cuda]: Enable Cuda and set path to Cuda Toolkit." + echo "--with-openmp: Enable OpenMP backend." + echo "--with-pthread: Enable Pthreads backend." + echo "--with-serial: Enable Serial backend." + echo "--with-devices: Explicitly add a set of backends." + echo "" + echo "--arch=[OPT]: Set target architectures. Options are:" + echo " [AMD]" + echo " AMDAVX = AMD CPU" + echo " EPYC = AMD EPYC Zen-Core CPU" + echo " [ARM]" + echo " ARMv80 = ARMv8.0 Compatible CPU" + echo " ARMv81 = ARMv8.1 Compatible CPU" + echo " ARMv8-ThunderX = ARMv8 Cavium ThunderX CPU" + echo " ARMv8-TX2 = ARMv8 Cavium ThunderX2 CPU" + echo " [IBM]" + echo " BGQ = IBM Blue Gene Q" + echo " Power7 = IBM POWER7 and POWER7+ CPUs" + echo " Power8 = IBM POWER8 CPUs" + echo " Power9 = IBM POWER9 CPUs" + echo " [Intel]" + echo " WSM = Intel Westmere CPUs" + echo " SNB = Intel Sandy/Ivy Bridge CPUs" + echo " HSW = Intel Haswell CPUs" + echo " BDW = Intel Broadwell Xeon E-class CPUs" + echo " SKX = Intel Sky Lake Xeon E-class HPC CPUs (AVX512)" + echo " [Intel Xeon Phi]" + echo " KNC = Intel Knights Corner Xeon Phi" + echo " KNL = Intel Knights Landing Xeon Phi" + echo " [NVIDIA]" + echo " Kepler30 = NVIDIA Kepler generation CC 3.0" + echo " Kepler32 = NVIDIA Kepler generation CC 3.2" + echo " Kepler35 = NVIDIA Kepler generation CC 3.5" + echo " Kepler37 = NVIDIA Kepler generation CC 3.7" + echo " Maxwell50 = NVIDIA Maxwell generation CC 5.0" + echo " Maxwell52 = NVIDIA Maxwell generation CC 5.2" + echo " Maxwell53 = NVIDIA Maxwell generation CC 5.3" + echo " Pascal60 = NVIDIA Pascal generation CC 6.0" + echo " Pascal61 = NVIDIA Pascal generation CC 6.1" + echo " Volta70 = NVIDIA Volta generation CC 7.0" + echo " Volta72 = NVIDIA Volta generation CC 7.2" + echo "" + echo "--compiler=/Path/To/Compiler Set the compiler." + echo "--debug,-dbg: Enable Debugging." + echo "--disable-tests Disable compilation of unit tests (enabled by default)" + echo "--cxxflags=[FLAGS] Overwrite CXXFLAGS for library build and test" + echo " build. This will still set certain required" + echo " flags via KOKKOS_CXXFLAGS (such as -fopenmp," + echo " --std=c++11, etc.)." + echo "--cxxstandard=[FLAGS] Overwrite KOKKOS_CXX_STANDARD for library build and test" + echo " c++11 (default), c++14, c++17, c++1y, c++1z, c++2a" + echo "--ldflags=[FLAGS] Overwrite LDFLAGS for library build and test" + echo " build. This will still set certain required" + echo " flags via KOKKOS_LDFLAGS (such as -fopenmp," + echo " -lpthread, etc.)." + echo "--with-gtest=/Path/To/Gtest: Set path to gtest. (Used in unit and performance" + echo " tests.)" + echo "--with-hwloc=/Path/To/Hwloc: Set path to hwloc library." + echo "--with-memkind=/Path/To/MemKind: Set path to memkind library." + echo "--with-options=[OPT]: Additional options to Kokkos:" + echo " compiler_warnings" + echo " aggressive_vectorization = add ivdep on loops" + echo " disable_profiling = do not compile with profiling hooks" + echo " " + echo "--with-cuda-options=[OPT]: Additional options to CUDA:" + echo " force_uvm, use_ldg, enable_lambda, rdc" + echo "--with-hpx-options=[OPT]: Additional options to HPX:" + echo " enable_async_dispatch" + echo "--gcc-toolchain=/Path/To/GccRoot: Set the gcc toolchain to use with clang (e.g. /usr)" + echo "--make-j=[NUM]: DEPRECATED: call make with appropriate" + echo " -j flag" + +} + +KOKKOS_DO_TESTS=ON +KOKKOS_DO_EXAMPLES=OFF while [[ $# > 0 ]] do @@ -17,68 +177,84 @@ do PREFIX="${key#*=}" ;; --with-cuda) - KOKKOS_DEVICES="${KOKKOS_DEVICES},Cuda" + update_kokkos_devices Cuda CUDA_PATH_NVCC=$(command -v nvcc) CUDA_PATH=${CUDA_PATH_NVCC%/bin/nvcc} ;; # Catch this before '--with-cuda*' --with-cuda-options*) - KOKKOS_CUDA_OPT="${key#*=}" + KOKKOS_CUDA_OPTIONS="${key#*=}" ;; --with-cuda*) - KOKKOS_DEVICES="${KOKKOS_DEVICES},Cuda" + update_kokkos_devices Cuda CUDA_PATH="${key#*=}" ;; --with-openmp) - KOKKOS_DEVICES="${KOKKOS_DEVICES},OpenMP" + update_kokkos_devices OpenMP ;; --with-pthread) - KOKKOS_DEVICES="${KOKKOS_DEVICES},Pthread" + update_kokkos_devices Pthread ;; --with-serial) - KOKKOS_DEVICES="${KOKKOS_DEVICES},Serial" + update_kokkos_devices Serial ;; --with-hpx-options*) KOKKOS_HPX_OPT="${key#*=}" ;; --with-hpx*) - KOKKOS_DEVICES="${KOKKOS_DEVICES},HPX" + update_kokkos_devices HPX if [ -z "$HPX_PATH" ]; then HPX_PATH="${key#*=}" fi ;; --with-devices*) DEVICES="${key#*=}" - KOKKOS_DEVICES="${KOKKOS_DEVICES},${DEVICES}" + PARSE_DEVICES=$(echo $DEVICES | tr "," "\n") + for DEVICE_ in $PARSE_DEVICES + do + update_kokkos_devices $DEVICE_ + done ;; --with-gtest*) GTEST_PATH="${key#*=}" ;; --with-hwloc*) + KOKKOS_HWLOC=ON HWLOC_PATH="${key#*=}" ;; --with-memkind*) + KOKKOS_MEMKIND=ON MEMKIND_PATH="${key#*=}" ;; --arch*) KOKKOS_ARCH="${key#*=}" ;; --cxxflags*) - CXXFLAGS="${key#*=}" + KOKKOS_CXXFLAGS="${key#*=}" + KOKKOS_CXXFLAGS=${KOKKOS_CXXFLAGS//,/ } ;; --cxxstandard*) KOKKOS_CXX_STANDARD="${key#*=}" ;; --ldflags*) - LDFLAGS="${key#*=}" + KOKKOS_LDFLAGS="${key#*=}" ;; --debug|-dbg) - KOKKOS_DEBUG=yes + KOKKOS_DEBUG=ON ;; --make-j*) echo "Warning: ${key} is deprecated" echo "Call make with appropriate -j flag" ;; + --disable-tests) + KOKKOS_DO_TESTS=OFF + ;; + --no-examples) + KOKKOS_DO_EXAMPLES=OFF + ;; + --enable-examples) + KOKKOS_DO_EXAMPLES=ON + ;; --compiler*) COMPILER="${key#*=}" CNUM=$(command -v ${COMPILER} 2>&1 >/dev/null | grep "no ${COMPILER}" | wc -l) @@ -102,87 +278,13 @@ do COMPILER=${COMPDIR}/${COMPNAME} ;; --with-options*) - KOKKOS_OPT="${key#*=}" + KOKKOS_OPTIONS="${key#*=}" ;; --gcc-toolchain*) KOKKOS_GCC_TOOLCHAIN="${key#*=}" ;; --help) - echo "Kokkos configure options:" - echo "" - echo "--kokkos-path=/Path/To/Kokkos: Path to the Kokkos root directory." - echo "--prefix=/Install/Path: Path to install the Kokkos library." - echo "" - echo "--with-cuda[=/Path/To/Cuda]: Enable Cuda and set path to Cuda Toolkit." - echo "--with-openmp: Enable OpenMP backend." - echo "--with-pthread: Enable Pthreads backend." - echo "--with-serial: Enable Serial backend." - echo "--with-devices: Explicitly add a set of backends." - echo "" - echo "--arch=[OPT]: Set target architectures. Options are:" - echo " [AMD]" - echo " AMDAVX = AMD CPU" - echo " EPYC = AMD EPYC Zen-Core CPU" - echo " [ARM]" - echo " ARMv80 = ARMv8.0 Compatible CPU" - echo " ARMv81 = ARMv8.1 Compatible CPU" - echo " ARMv8-ThunderX = ARMv8 Cavium ThunderX CPU" - echo " ARMv8-TX2 = ARMv8 Cavium ThunderX2 CPU" - echo " [IBM]" - echo " BGQ = IBM Blue Gene Q" - echo " Power7 = IBM POWER7 and POWER7+ CPUs" - echo " Power8 = IBM POWER8 CPUs" - echo " Power9 = IBM POWER9 CPUs" - echo " [Intel]" - echo " WSM = Intel Westmere CPUs" - echo " SNB = Intel Sandy/Ivy Bridge CPUs" - echo " HSW = Intel Haswell CPUs" - echo " BDW = Intel Broadwell Xeon E-class CPUs" - echo " SKX = Intel Sky Lake Xeon E-class HPC CPUs (AVX512)" - echo " [Intel Xeon Phi]" - echo " KNC = Intel Knights Corner Xeon Phi" - echo " KNL = Intel Knights Landing Xeon Phi" - echo " [NVIDIA]" - echo " Kepler30 = NVIDIA Kepler generation CC 3.0" - echo " Kepler32 = NVIDIA Kepler generation CC 3.2" - echo " Kepler35 = NVIDIA Kepler generation CC 3.5" - echo " Kepler37 = NVIDIA Kepler generation CC 3.7" - echo " Maxwell50 = NVIDIA Maxwell generation CC 5.0" - echo " Maxwell52 = NVIDIA Maxwell generation CC 5.2" - echo " Maxwell53 = NVIDIA Maxwell generation CC 5.3" - echo " Pascal60 = NVIDIA Pascal generation CC 6.0" - echo " Pascal61 = NVIDIA Pascal generation CC 6.1" - echo " Volta70 = NVIDIA Volta generation CC 7.0" - echo " Volta72 = NVIDIA Volta generation CC 7.2" - echo "" - echo "--compiler=/Path/To/Compiler Set the compiler." - echo "--debug,-dbg: Enable Debugging." - echo "--cxxflags=[FLAGS] Overwrite CXXFLAGS for library build and test" - echo " build. This will still set certain required" - echo " flags via KOKKOS_CXXFLAGS (such as -fopenmp," - echo " --std=c++11, etc.)." - echo "--cxxstandard=[FLAGS] Overwrite KOKKOS_CXX_STANDARD for library build and test" - echo " c++11 (default), c++14, c++17, c++1y, c++1z, c++2a" - echo "--ldflags=[FLAGS] Overwrite LDFLAGS for library build and test" - echo " build. This will still set certain required" - echo " flags via KOKKOS_LDFLAGS (such as -fopenmp," - echo " -lpthread, etc.)." - echo "--with-gtest=/Path/To/Gtest: Set path to gtest. (Used in unit and performance" - echo " tests.)" - echo "--with-hwloc=/Path/To/Hwloc: Set path to hwloc library." - echo "--with-memkind=/Path/To/MemKind: Set path to memkind library." - echo "--with-options=[OPT]: Additional options to Kokkos:" - echo " compiler_warnings" - echo " aggressive_vectorization = add ivdep on loops" - echo " disable_profiling = do not compile with profiling hooks" - echo " " - echo "--with-cuda-options=[OPT]: Additional options to CUDA:" - echo " force_uvm, use_ldg, enable_lambda, rdc, enable_constexpr" - echo "--with-hpx-options=[OPT]: Additional options to HPX:" - echo " enable_async_dispatch" - echo "--gcc-toolchain=/Path/To/GccRoot: Set the gcc toolchain to use with clang (e.g. /usr)" - echo "--make-j=[NUM]: DEPRECATED: call make with appropriate" - echo " -j flag" + display_help_text exit 0 ;; *) @@ -193,265 +295,79 @@ do shift done -# Remove leading ',' from KOKKOS_DEVICES. -KOKKOS_DEVICES=$(echo $KOKKOS_DEVICES | sed 's/^,//') -# If KOKKOS_PATH undefined, assume parent dir of this script is the KOKKOS_PATH. -if [ -z "$KOKKOS_PATH" ]; then - KOKKOS_PATH=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd ) +if [ "$COMPILER" == "" ]; then + COMPILER_CMD= else - # Ensure KOKKOS_PATH is abs path - KOKKOS_PATH=$( cd $KOKKOS_PATH && pwd ) -fi - -if [ "${KOKKOS_PATH}" = "${PWD}" ] || [ "${KOKKOS_PATH}" = "${PWD}/" ]; then - echo "Running generate_makefile.bash in the Kokkos root directory is not allowed" - exit -fi - -KOKKOS_SRC_PATH=${KOKKOS_PATH} - -KOKKOS_SETTINGS="KOKKOS_SRC_PATH=${KOKKOS_SRC_PATH}" - -# The double [[ ]] in the elif branch is not a typo -if [ ${#COMPILER} -gt 0 ]; then - KOKKOS_SETTINGS="${KOKKOS_SETTINGS} CXX=${COMPILER}" -elif - [ ${#COMPILER} -eq 0 ] && [[ ${KOKKOS_DEVICES} =~ .*Cuda.* ]]; then - COMPILER="${KOKKOS_PATH}/bin/nvcc_wrapper" - KOKKOS_SETTINGS="${KOKKOS_SETTINGS} CXX=${COMPILER}" -fi - -if [ ${#KOKKOS_DEVICES} -gt 0 ]; then - KOKKOS_SETTINGS="${KOKKOS_SETTINGS} KOKKOS_DEVICES=${KOKKOS_DEVICES}" -fi - -if [ ${#KOKKOS_ARCH} -gt 0 ]; then - KOKKOS_SETTINGS="${KOKKOS_SETTINGS} KOKKOS_ARCH=${KOKKOS_ARCH}" -fi - -if [ ${#KOKKOS_DEBUG} -gt 0 ]; then - KOKKOS_SETTINGS="${KOKKOS_SETTINGS} KOKKOS_DEBUG=${KOKKOS_DEBUG}" -fi - -if [ ${#CUDA_PATH} -gt 0 ]; then - KOKKOS_SETTINGS="${KOKKOS_SETTINGS} CUDA_PATH=${CUDA_PATH}" -fi - -if [ ${#CXXFLAGS} -gt 0 ]; then - KOKKOS_SETTINGS="${KOKKOS_SETTINGS} CXXFLAGS=\"${CXXFLAGS}\"" -fi - -if [ ${#KOKKOS_CXX_STANDARD} -gt 0 ]; then - KOKKOS_SETTINGS="${KOKKOS_SETTINGS} KOKKOS_CXX_STANDARD=\"${KOKKOS_CXX_STANDARD}\"" -fi - -if [ ${#LDFLAGS} -gt 0 ]; then - KOKKOS_SETTINGS="${KOKKOS_SETTINGS} LDFLAGS=\"${LDFLAGS}\"" + COMPILER_CMD=-DCMAKE_CXX_COMPILER=$COMPILER fi -if [ ${#GTEST_PATH} -gt 0 ]; then - KOKKOS_SETTINGS="${KOKKOS_SETTINGS} GTEST_PATH=${GTEST_PATH}" +if [ "$KOKKOS_DEBUG" == "ON" ]; then + KOKKOS_DEBUG_CMD=-DCMAKE_BUILD_TYPE=DEBUG else - GTEST_PATH=${KOKKOS_PATH}/tpls/gtest - KOKKOS_SETTINGS="${KOKKOS_SETTINGS} GTEST_PATH=${GTEST_PATH}" + KOKKOS_DEBUG_CMD=-DCMAKE_BUILD_TYPE=RELEASE fi -if [ ${#HWLOC_PATH} -gt 0 ]; then - KOKKOS_SETTINGS="${KOKKOS_SETTINGS} HWLOC_PATH=${HWLOC_PATH}" - KOKKOS_USE_TPLS="${KOKKOS_USE_TPLS},hwloc" +if [ "$KOKKOS_HWLOC" == "ON" ]; then + KOKKOS_HWLOC_CMD=-DKokkos_ENABLE_HWLOC=ON + if [ "$HWLOC_PATH" != "" ]; then + KOKKOS_HWLOC_PATH_CMD=-DHWLOC_ROOT=$HWLOC_PATH + fi +else + KOKKOS_HWLOC_CMD= fi -if [ ${#MEMKIND_PATH} -gt 0 ]; then - KOKKOS_SETTINGS="${KOKKOS_SETTINGS} MEMKIND_PATH=${MEMKIND_PATH}" - KOKKOS_USE_TPLS="${KOKKOS_USE_TPLS},experimental_memkind" +if [ "$KOKKOS_MEMKIND" == "ON" ]; then + KOKKOS_MEMKIND_CMD=-DKokkos_ENABLE_MEMKIND=ON + if [ "$MEMKIND_PATH" != "" ]; then + KOKKOS_MEMKIND_PATH_CMD=-DMEMKIND_ROOT=$MEMKIND_PATH + fi +else + KOKKOS_MEMKIND_CMD= fi -if [ ${#KOKKOS_USE_TPLS} -gt 0 ]; then - KOKKOS_SETTINGS="${KOKKOS_SETTINGS} KOKKOS_USE_TPLS=${KOKKOS_USE_TPLS}" +if [ ! -e ${KOKKOS_PATH}/CMakeLists.txt ]; then + if [ "${KOKKOS_PATH}" == "" ]; then + CM_SCRIPT=$0 + KOKKOS_PATH=`dirname $CM_SCRIPT` + if [ ! -e ${KOKKOS_PATH}/CMakeLists.txt ]; then + echo "${KOKKOS_PATH} repository appears to not be complete. please verify and try again" + exit 0 + fi + else + echo "KOKKOS_PATH does not appear to be set properly. please specify in location of CMakeLists.txt" + display_help_text + exit 0 + fi fi -if [ ${#HPX_PATH} -gt 0 ]; then - KOKKOS_SETTINGS="${KOKKOS_SETTINGS} HPX_PATH=${HPX_PATH}" +get_kokkos_device_list +get_kokkos_option_list +get_kokkos_arch_list +get_kokkos_cuda_option_list + +## if HPX is enabled, we need to enforce cxx standard = 14 +if [[ ${KOKKOS_DEVICE_CMD} == *Kokkos_ENABLE_HPX* ]]; then + if [ "${KOKKOS_CXX_STANDARD}" == "" ] || [ ${#KOKKOS_CXX_STANDARD} -lt 14 ]; then + echo CXX Standard must be 14 or higher for HPX to work. + KOKKOS_CXX_STANDARD=14 + fi fi -if [ ${#KOKKOS_OPT} -gt 0 ]; then - KOKKOS_SETTINGS="${KOKKOS_SETTINGS} KOKKOS_OPTIONS=${KOKKOS_OPT}" +if [ "$KOKKOS_CXX_STANDARD" == "" ]; then + STANDARD_CMD= +else + STANDARD_CMD=-DKokkos_CXX_STANDARD=${KOKKOS_CXX_STANDARD} fi -if [ ${#KOKKOS_CUDA_OPT} -gt 0 ]; then - KOKKOS_SETTINGS="${KOKKOS_SETTINGS} KOKKOS_CUDA_OPTIONS=${KOKKOS_CUDA_OPT}" -fi +if [[ ${COMPILER} == *clang* ]]; then + gcc_path=$(which g++ | awk --field-separator='/bin/g++' '{printf $1}' ) + KOKKOS_CXXFLAGS="${KOKKOS_CXXFLAGS} --gcc-toolchain=${gcc_path}" -if [ ${#KOKKOS_HPX_OPT} -gt 0 ]; then - KOKKOS_SETTINGS="${KOKKOS_SETTINGS} KOKKOS_HPX_OPTIONS=${KOKKOS_HPX_OPT}" + if [ ! "${CUDA_PATH}" == "" ]; then + KOKKOS_CXXFLAGS="${KOKKOS_CXXFLAGS} --cuda-path=${CUDA_PATH}" + fi fi - -if [ ${#KOKKOS_GCC_TOOLCHAIN} -gt 0 ]; then - KOKKOS_SETTINGS="${KOKKOS_SETTINGS} KOKKOS_INTERNAL_GCC_TOOLCHAIN=${KOKKOS_GCC_TOOLCHAIN}" -fi - -KOKKOS_SETTINGS_NO_KOKKOS_PATH="${KOKKOS_SETTINGS}" - - -gen_makefile=Makefile.kokkos -mkdir -p core -mkdir -p core/unit_test -mkdir -p core/perf_test -mkdir -p containers -mkdir -p containers/unit_tests -mkdir -p containers/performance_tests -mkdir -p algorithms -mkdir -p algorithms/unit_tests -mkdir -p algorithms/performance_tests -mkdir -p example -mkdir -p example/fixture -mkdir -p example/feint -mkdir -p example/fenl -mkdir -p example/make_buildlink -mkdir -p example/tutorial - -KOKKOS_SETTINGS="${KOKKOS_SETTINGS_NO_KOKKOS_PATH} KOKKOS_PATH=${KOKKOS_PATH}" - -# Generate subdirectory makefiles. -echo "KOKKOS_SETTINGS=${KOKKOS_SETTINGS}" > core/unit_test/Makefile -echo "" >> core/unit_test/Makefile -echo "all:" >> core/unit_test/Makefile -echo -e "\t\$(MAKE) -f ${KOKKOS_PATH}/core/unit_test/Makefile ${KOKKOS_SETTINGS}" >> core/unit_test/Makefile -echo "" >> core/unit_test/Makefile -echo "test: all" >> core/unit_test/Makefile -echo -e "\t\$(MAKE) -f ${KOKKOS_PATH}/core/unit_test/Makefile ${KOKKOS_SETTINGS} test" >> core/unit_test/Makefile -echo "" >> core/unit_test/Makefile -echo "clean:" >> core/unit_test/Makefile -echo -e "\t\$(MAKE) -f ${KOKKOS_PATH}/core/unit_test/Makefile ${KOKKOS_SETTINGS} clean" >> core/unit_test/Makefile - -echo "KOKKOS_SETTINGS=${KOKKOS_SETTINGS}" > core/perf_test/Makefile -echo "" >> core/perf_test/Makefile -echo "all:" >> core/perf_test/Makefile -echo -e "\t\$(MAKE) -f ${KOKKOS_PATH}/core/perf_test/Makefile ${KOKKOS_SETTINGS}" >> core/perf_test/Makefile -echo "" >> core/perf_test/Makefile -echo "test: all" >> core/perf_test/Makefile -echo -e "\t\$(MAKE) -f ${KOKKOS_PATH}/core/perf_test/Makefile ${KOKKOS_SETTINGS} test" >> core/perf_test/Makefile -echo "" >> core/perf_test/Makefile -echo "clean:" >> core/perf_test/Makefile -echo -e "\t\$(MAKE) -f ${KOKKOS_PATH}/core/perf_test/Makefile ${KOKKOS_SETTINGS} clean" >> core/perf_test/Makefile - -echo "KOKKOS_SETTINGS=${KOKKOS_SETTINGS}" > containers/unit_tests/Makefile -echo "" >> containers/unit_tests/Makefile -echo "all:" >> containers/unit_tests/Makefile -echo -e "\t\$(MAKE) -f ${KOKKOS_PATH}/containers/unit_tests/Makefile ${KOKKOS_SETTINGS}" >> containers/unit_tests/Makefile -echo "" >> containers/unit_tests/Makefile -echo "test: all" >> containers/unit_tests/Makefile -echo -e "\t\$(MAKE) -f ${KOKKOS_PATH}/containers/unit_tests/Makefile ${KOKKOS_SETTINGS} test" >> containers/unit_tests/Makefile -echo "" >> containers/unit_tests/Makefile -echo "clean:" >> containers/unit_tests/Makefile -echo -e "\t\$(MAKE) -f ${KOKKOS_PATH}/containers/unit_tests/Makefile ${KOKKOS_SETTINGS} clean" >> containers/unit_tests/Makefile - -echo "KOKKOS_SETTINGS=${KOKKOS_SETTINGS}" > containers/performance_tests/Makefile -echo "" >> containers/performance_tests/Makefile -echo "all:" >> containers/performance_tests/Makefile -echo -e "\t\$(MAKE) -f ${KOKKOS_PATH}/containers/performance_tests/Makefile ${KOKKOS_SETTINGS}" >> containers/performance_tests/Makefile -echo "" >> containers/performance_tests/Makefile -echo "test: all" >> containers/performance_tests/Makefile -echo -e "\t\$(MAKE) -f ${KOKKOS_PATH}/containers/performance_tests/Makefile ${KOKKOS_SETTINGS} test" >> containers/performance_tests/Makefile -echo "" >> containers/performance_tests/Makefile -echo "clean:" >> containers/performance_tests/Makefile -echo -e "\t\$(MAKE) -f ${KOKKOS_PATH}/containers/performance_tests/Makefile ${KOKKOS_SETTINGS} clean" >> containers/performance_tests/Makefile - -echo "KOKKOS_SETTINGS=${KOKKOS_SETTINGS}" > algorithms/unit_tests/Makefile -echo "" >> algorithms/unit_tests/Makefile -echo "all:" >> algorithms/unit_tests/Makefile -echo -e "\t\$(MAKE) -f ${KOKKOS_PATH}/algorithms/unit_tests/Makefile ${KOKKOS_SETTINGS}" >> algorithms/unit_tests/Makefile -echo "" >> algorithms/unit_tests/Makefile -echo "test: all" >> algorithms/unit_tests/Makefile -echo -e "\t\$(MAKE) -f ${KOKKOS_PATH}/algorithms/unit_tests/Makefile ${KOKKOS_SETTINGS} test" >> algorithms/unit_tests/Makefile -echo "" >> algorithms/unit_tests/Makefile -echo "clean:" >> algorithms/unit_tests/Makefile -echo -e "\t\$(MAKE) -f ${KOKKOS_PATH}/algorithms/unit_tests/Makefile ${KOKKOS_SETTINGS} clean" >> algorithms/unit_tests/Makefile - -echo "KOKKOS_SETTINGS=${KOKKOS_SETTINGS}" > example/fixture/Makefile -echo "" >> example/fixture/Makefile -echo "all:" >> example/fixture/Makefile -echo -e "\t\$(MAKE) -f ${KOKKOS_PATH}/example/fixture/Makefile ${KOKKOS_SETTINGS}" >> example/fixture/Makefile -echo "" >> example/fixture/Makefile -echo "test: all" >> example/fixture/Makefile -echo -e "\t\$(MAKE) -f ${KOKKOS_PATH}/example/fixture/Makefile ${KOKKOS_SETTINGS} test" >> example/fixture/Makefile -echo "" >> example/fixture/Makefile -echo "clean:" >> example/fixture/Makefile -echo -e "\t\$(MAKE) -f ${KOKKOS_PATH}/example/fixture/Makefile ${KOKKOS_SETTINGS} clean" >> example/fixture/Makefile - -echo "KOKKOS_SETTINGS=${KOKKOS_SETTINGS}" > example/feint/Makefile -echo "" >> example/feint/Makefile -echo "all:" >> example/feint/Makefile -echo -e "\t\$(MAKE) -f ${KOKKOS_PATH}/example/feint/Makefile ${KOKKOS_SETTINGS}" >> example/feint/Makefile -echo "" >> example/feint/Makefile -echo "test: all" >> example/feint/Makefile -echo -e "\t\$(MAKE) -f ${KOKKOS_PATH}/example/feint/Makefile ${KOKKOS_SETTINGS} test" >> example/feint/Makefile -echo "" >> example/feint/Makefile -echo "clean:" >> example/feint/Makefile -echo -e "\t\$(MAKE) -f ${KOKKOS_PATH}/example/feint/Makefile ${KOKKOS_SETTINGS} clean" >> example/feint/Makefile - -echo "KOKKOS_SETTINGS=${KOKKOS_SETTINGS}" > example/fenl/Makefile -echo "" >> example/fenl/Makefile -echo "all:" >> example/fenl/Makefile -echo -e "\t\$(MAKE) -f ${KOKKOS_PATH}/example/fenl/Makefile ${KOKKOS_SETTINGS}" >> example/fenl/Makefile -echo "" >> example/fenl/Makefile -echo "test: all" >> example/fenl/Makefile -echo -e "\t\$(MAKE) -f ${KOKKOS_PATH}/example/fenl/Makefile ${KOKKOS_SETTINGS} test" >> example/fenl/Makefile -echo "" >> example/fenl/Makefile -echo "clean:" >> example/fenl/Makefile -echo -e "\t\$(MAKE) -f ${KOKKOS_PATH}/example/fenl/Makefile ${KOKKOS_SETTINGS} clean" >> example/fenl/Makefile - -echo "KOKKOS_SETTINGS=${KOKKOS_SETTINGS}" > example/make_buildlink/Makefile -echo "" >> example/make_buildlink/Makefile -echo "build:" >> example/make_buildlink/Makefile -echo -e "\t\$(MAKE) -f ${KOKKOS_PATH}/example/make_buildlink/Makefile ${KOKKOS_SETTINGS} build" >> example/make_buildlink/Makefile -echo "" >> example/make_buildlink/Makefile -echo "test: build" >> example/make_buildlink/Makefile -echo -e "\t\$(MAKE) -f ${KOKKOS_PATH}/example/make_buildlink/Makefile ${KOKKOS_SETTINGS} test" >> example/make_buildlink/Makefile -echo "" >> example/make_buildlink/Makefile -echo "clean:" >> example/make_buildlink/Makefile -echo -e "\t\$(MAKE) -f ${KOKKOS_PATH}/example/make_buildlink/Makefile ${KOKKOS_SETTINGS} clean" >> example/make_buildlink/Makefile - -echo "KOKKOS_SETTINGS=${KOKKOS_SETTINGS}" > example/tutorial/Makefile -echo "" >> example/tutorial/Makefile -echo "build:" >> example/tutorial/Makefile -echo -e "\t\$(MAKE) -f ${KOKKOS_PATH}/example/tutorial/Makefile KOKKOS_SETTINGS='${KOKKOS_SETTINGS}' KOKKOS_PATH=${KOKKOS_PATH} build">> example/tutorial/Makefile -echo "" >> example/tutorial/Makefile -echo "test: build" >> example/tutorial/Makefile -echo -e "\t\$(MAKE) -f ${KOKKOS_PATH}/example/tutorial/Makefile KOKKOS_SETTINGS='${KOKKOS_SETTINGS}' KOKKOS_PATH=${KOKKOS_PATH} test" >> example/tutorial/Makefile -echo "" >> example/tutorial/Makefile -echo "clean:" >> example/tutorial/Makefile -echo -e "\t\$(MAKE) -f ${KOKKOS_PATH}/example/tutorial/Makefile KOKKOS_SETTINGS='${KOKKOS_SETTINGS}' KOKKOS_PATH=${KOKKOS_PATH} clean" >> example/tutorial/Makefile - -# Generate top level directory makefile. -echo "Generating Makefiles with options " ${KOKKOS_SETTINGS} -echo "KOKKOS_SETTINGS=${KOKKOS_SETTINGS}" > Makefile -echo "" >> Makefile -echo "build-test:" >> Makefile -echo -e "\t\$(MAKE) -C core/unit_test" >> Makefile -echo -e "\t\$(MAKE) -C core/perf_test" >> Makefile -echo -e "\t\$(MAKE) -C containers/unit_tests" >> Makefile -echo -e "\t\$(MAKE) -C containers/performance_tests" >> Makefile -echo -e "\t\$(MAKE) -C algorithms/unit_tests" >> Makefile -echo "" >> Makefile -echo "test: build-test" >> Makefile -echo -e "\t\$(MAKE) -C core/unit_test test" >> Makefile -echo -e "\t\$(MAKE) -C core/perf_test test" >> Makefile -echo -e "\t\$(MAKE) -C containers/unit_tests test" >> Makefile -echo -e "\t\$(MAKE) -C containers/performance_tests test" >> Makefile -echo -e "\t\$(MAKE) -C algorithms/unit_tests test" >> Makefile -echo "" >> Makefile -echo "unit-tests-only:" >> Makefile -echo -e "\t\$(MAKE) -C core/unit_test test" >> Makefile -echo -e "\t\$(MAKE) -C containers/unit_tests test" >> Makefile -echo -e "\t\$(MAKE) -C algorithms/unit_tests test" >> Makefile -echo "" >> Makefile - -echo "clean:" >> Makefile -echo -e "\t\$(MAKE) -C core/unit_test clean" >> Makefile -echo -e "\t\$(MAKE) -C core/perf_test clean" >> Makefile -echo -e "\t\$(MAKE) -C containers/unit_tests clean" >> Makefile -echo -e "\t\$(MAKE) -C containers/performance_tests clean" >> Makefile -echo -e "\t\$(MAKE) -C algorithms/unit_tests clean" >> Makefile - + +echo cmake $COMPILER_CMD -DCMAKE_CXX_FLAGS="${KOKKOS_CXXFLAGS}" -DCMAKE_EXE_LINKER_FLAGS="${KOKKOS_LDFLAGS}" -DCMAKE_INSTALL_PREFIX=${PREFIX} ${KOKKOS_DEVICE_CMD} ${KOKKOS_ARCH_CMD} -DKokkos_ENABLE_TESTS=${KOKKOS_DO_TESTS} -DKokkos_ENABLE_EXAMPLES=${KOKKOS_DO_EXAMPLES} ${KOKKOS_OPTION_CMD} ${KOKKOS_CUDA_OPTION_CMD} -DCMAKE_VERBOSE_MAKEFILE=ON -DCMAKE_CXX_EXTENSIONS=OFF ${STANDARD_CMD} ${KOKKOS_DEBUG_CMD} ${KOKKOS_HWLOC_CMD} ${KOKKOS_HWLOC_PATH_CMD} ${KOKKOS_MEMKIND_CMD} ${KOKKOS_MEMKIND_PATH_CMD} ${KOKKOS_PATH} +cmake $COMPILER_CMD -DCMAKE_CXX_FLAGS="${KOKKOS_CXXFLAGS//\"}" -DCMAKE_EXE_LINKER_FLAGS="${KOKKOS_LDFLAGS//\"}" -DCMAKE_INSTALL_PREFIX=${PREFIX} ${KOKKOS_DEVICE_CMD} ${KOKKOS_ARCH_CMD} -DKokkos_ENABLE_TESTS=${KOKKOS_DO_TESTS} -DKokkos_ENABLE_EXAMPLES=${KOKKOS_DO_EXAMPLES} ${KOKKOS_OPTION_CMD} ${KOKKOS_CUDA_OPTION_CMD} -DCMAKE_VERBOSE_MAKEFILE=ON -DCMAKE_CXX_EXTENSIONS=OFF ${STANDARD_CMD} ${KOKKOS_DEBUG_CMD} ${KOKKOS_HWLOC_CMD} ${KOKKOS_HWLOC_PATH_CMD} ${KOKKOS_MEMKIND_CMD} ${KOKKOS_MEMKIND_PATH_CMD} ${KOKKOS_PATH} diff --git a/gnu_generate_makefile.bash b/gnu_generate_makefile.bash new file mode 100755 index 00000000000..42b26bf4a4d --- /dev/null +++ b/gnu_generate_makefile.bash @@ -0,0 +1,484 @@ +#!/bin/bash + +KOKKOS_DEVICES="" + +KOKKOS_DO_EXAMPLES="1" + +while [[ $# > 0 ]] +do + key="$1" + + case $key in + --kokkos-path*) + KOKKOS_PATH="${key#*=}" + ;; + --hpx-path*) + HPX_PATH="${key#*=}" + ;; + --prefix*) + PREFIX="${key#*=}" + ;; + --with-cuda) + KOKKOS_DEVICES="${KOKKOS_DEVICES},Cuda" + CUDA_PATH_NVCC=$(command -v nvcc) + CUDA_PATH=${CUDA_PATH_NVCC%/bin/nvcc} + ;; + # Catch this before '--with-cuda*' + --with-cuda-options*) + KOKKOS_CUDA_OPT="${key#*=}" + ;; + --with-cuda*) + KOKKOS_DEVICES="${KOKKOS_DEVICES},Cuda" + CUDA_PATH="${key#*=}" + ;; + --with-openmp) + KOKKOS_DEVICES="${KOKKOS_DEVICES},OpenMP" + ;; + --with-pthread) + KOKKOS_DEVICES="${KOKKOS_DEVICES},Pthread" + ;; + --with-serial) + KOKKOS_DEVICES="${KOKKOS_DEVICES},Serial" + ;; + --with-hpx-options*) + KOKKOS_HPX_OPT="${key#*=}" + ;; + --with-hpx*) + KOKKOS_DEVICES="${KOKKOS_DEVICES},HPX" + if [ -z "$HPX_PATH" ]; then + HPX_PATH="${key#*=}" + fi + ;; + --with-devices*) + DEVICES="${key#*=}" + KOKKOS_DEVICES="${KOKKOS_DEVICES},${DEVICES}" + ;; + --with-gtest*) + GTEST_PATH="${key#*=}" + ;; + --with-hwloc*) + HWLOC_PATH="${key#*=}" + ;; + --with-memkind*) + MEMKIND_PATH="${key#*=}" + ;; + --arch*) + KOKKOS_ARCH="${key#*=}" + ;; + --cxxflags*) + CXXFLAGS="${key#*=}" + ;; + --cxxstandard*) + KOKKOS_CXX_STANDARD="${key#*=}" + ;; + --ldflags*) + LDFLAGS="${key#*=}" + ;; + --debug|-dbg) + KOKKOS_DEBUG=yes + ;; + --make-j*) + echo "Warning: ${key} is deprecated" + echo "Call make with appropriate -j flag" + ;; + --no-examples) + KOKKOS_DO_EXAMPLES="0" + ;; + --compiler*) + COMPILER="${key#*=}" + CNUM=$(command -v ${COMPILER} 2>&1 >/dev/null | grep "no ${COMPILER}" | wc -l) + if [ ${CNUM} -gt 0 ]; then + echo "Invalid compiler by --compiler command: '${COMPILER}'" + exit + fi + if [[ ! -n ${COMPILER} ]]; then + echo "Empty compiler specified by --compiler command." + exit + fi + CNUM=$(command -v ${COMPILER} | grep ${COMPILER} | wc -l) + if [ ${CNUM} -eq 0 ]; then + echo "Invalid compiler by --compiler command: '${COMPILER}'" + exit + fi + # ... valid compiler, ensure absolute path set + WCOMPATH=$(command -v $COMPILER) + COMPDIR=$(dirname $WCOMPATH) + COMPNAME=$(basename $WCOMPATH) + COMPILER=${COMPDIR}/${COMPNAME} + ;; + --with-options*) + KOKKOS_OPT="${key#*=}" + ;; + --gcc-toolchain*) + KOKKOS_GCC_TOOLCHAIN="${key#*=}" + ;; + --help) + echo "Kokkos configure options:" + echo "" + echo "--kokkos-path=/Path/To/Kokkos: Path to the Kokkos root directory." + echo "--prefix=/Install/Path: Path to install the Kokkos library." + echo "" + echo "--with-cuda[=/Path/To/Cuda]: Enable Cuda and set path to Cuda Toolkit." + echo "--with-openmp: Enable OpenMP backend." + echo "--with-pthread: Enable Pthreads backend." + echo "--with-serial: Enable Serial backend." + echo "--with-devices: Explicitly add a set of backends." + echo "" + echo "--arch=[OPT]: Set target architectures. Options are:" + echo " [AMD]" + echo " AMDAVX = AMD CPU" + echo " EPYC = AMD EPYC Zen-Core CPU" + echo " [ARM]" + echo " ARMv80 = ARMv8.0 Compatible CPU" + echo " ARMv81 = ARMv8.1 Compatible CPU" + echo " ARMv8-ThunderX = ARMv8 Cavium ThunderX CPU" + echo " ARMv8-TX2 = ARMv8 Cavium ThunderX2 CPU" + echo " [IBM]" + echo " BGQ = IBM Blue Gene Q" + echo " Power7 = IBM POWER7 and POWER7+ CPUs" + echo " Power8 = IBM POWER8 CPUs" + echo " Power9 = IBM POWER9 CPUs" + echo " [Intel]" + echo " WSM = Intel Westmere CPUs" + echo " SNB = Intel Sandy/Ivy Bridge CPUs" + echo " HSW = Intel Haswell CPUs" + echo " BDW = Intel Broadwell Xeon E-class CPUs" + echo " SKX = Intel Sky Lake Xeon E-class HPC CPUs (AVX512)" + echo " [Intel Xeon Phi]" + echo " KNC = Intel Knights Corner Xeon Phi" + echo " KNL = Intel Knights Landing Xeon Phi" + echo " [NVIDIA]" + echo " Kepler30 = NVIDIA Kepler generation CC 3.0" + echo " Kepler32 = NVIDIA Kepler generation CC 3.2" + echo " Kepler35 = NVIDIA Kepler generation CC 3.5" + echo " Kepler37 = NVIDIA Kepler generation CC 3.7" + echo " Maxwell50 = NVIDIA Maxwell generation CC 5.0" + echo " Maxwell52 = NVIDIA Maxwell generation CC 5.2" + echo " Maxwell53 = NVIDIA Maxwell generation CC 5.3" + echo " Pascal60 = NVIDIA Pascal generation CC 6.0" + echo " Pascal61 = NVIDIA Pascal generation CC 6.1" + echo " Volta70 = NVIDIA Volta generation CC 7.0" + echo " Volta72 = NVIDIA Volta generation CC 7.2" + echo "" + echo "--compiler=/Path/To/Compiler Set the compiler." + echo "--debug,-dbg: Enable Debugging." + echo "--cxxflags=[FLAGS] Overwrite CXXFLAGS for library build and test" + echo " build. This will still set certain required" + echo " flags via KOKKOS_CXXFLAGS (such as -fopenmp," + echo " --std=c++11, etc.)." + echo "--cxxstandard=[FLAGS] Overwrite KOKKOS_CXX_STANDARD for library build and test" + echo " c++11 (default), c++14, c++17, c++1y, c++1z, c++2a" + echo "--ldflags=[FLAGS] Overwrite LDFLAGS for library build and test" + echo " build. This will still set certain required" + echo " flags via KOKKOS_LDFLAGS (such as -fopenmp," + echo " -lpthread, etc.)." + echo "--with-gtest=/Path/To/Gtest: Set path to gtest. (Used in unit and performance" + echo " tests.)" + echo "--with-hwloc=/Path/To/Hwloc: Set path to hwloc library." + echo "--with-memkind=/Path/To/MemKind: Set path to memkind library." + echo "--with-options=[OPT]: Additional options to Kokkos:" + echo " compiler_warnings" + echo " aggressive_vectorization = add ivdep on loops" + echo " disable_profiling = do not compile with profiling hooks" + echo " " + echo "--with-cuda-options=[OPT]: Additional options to CUDA:" + echo " force_uvm, use_ldg, enable_lambda, rdc, enable_constexpr" + echo "--with-hpx-options=[OPT]: Additional options to HPX:" + echo " enable_async_dispatch" + echo "--gcc-toolchain=/Path/To/GccRoot: Set the gcc toolchain to use with clang (e.g. /usr)" + echo "--make-j=[NUM]: DEPRECATED: call make with appropriate" + echo " -j flag" + exit 0 + ;; + *) + echo "warning: ignoring unknown option $key" + ;; + esac + + shift +done + +# Remove leading ',' from KOKKOS_DEVICES. +KOKKOS_DEVICES=$(echo $KOKKOS_DEVICES | sed 's/^,//') + +# If KOKKOS_PATH undefined, assume parent dir of this script is the KOKKOS_PATH. +if [ -z "$KOKKOS_PATH" ]; then + KOKKOS_PATH=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd ) +else + # Ensure KOKKOS_PATH is abs path + KOKKOS_PATH=$( cd $KOKKOS_PATH && pwd ) +fi + +if [ "${KOKKOS_PATH}" = "${PWD}" ] || [ "${KOKKOS_PATH}" = "${PWD}/" ]; then + echo "Running generate_makefile.bash in the Kokkos root directory is not allowed" + exit +fi + +KOKKOS_SRC_PATH=${KOKKOS_PATH} + +KOKKOS_SETTINGS="KOKKOS_SRC_PATH=${KOKKOS_SRC_PATH}" + +# The double [[ ]] in the elif branch is not a typo +if [ ${#COMPILER} -gt 0 ]; then + KOKKOS_SETTINGS="${KOKKOS_SETTINGS} CXX=${COMPILER}" +elif + [ ${#COMPILER} -eq 0 ] && [[ ${KOKKOS_DEVICES} =~ .*Cuda.* ]]; then + COMPILER="${KOKKOS_PATH}/bin/nvcc_wrapper" + KOKKOS_SETTINGS="${KOKKOS_SETTINGS} CXX=${COMPILER}" +fi + +if [ ${#KOKKOS_DEVICES} -gt 0 ]; then + KOKKOS_SETTINGS="${KOKKOS_SETTINGS} KOKKOS_DEVICES=${KOKKOS_DEVICES}" +fi + +if [ ${#KOKKOS_ARCH} -gt 0 ]; then + KOKKOS_SETTINGS="${KOKKOS_SETTINGS} KOKKOS_ARCH=${KOKKOS_ARCH}" +fi + +if [ ${#KOKKOS_DEBUG} -gt 0 ]; then + KOKKOS_SETTINGS="${KOKKOS_SETTINGS} KOKKOS_DEBUG=${KOKKOS_DEBUG}" +fi + +if [ ${#CUDA_PATH} -gt 0 ]; then + KOKKOS_SETTINGS="${KOKKOS_SETTINGS} CUDA_PATH=${CUDA_PATH}" +fi + +if [ ${#CXXFLAGS} -gt 0 ]; then + KOKKOS_SETTINGS="${KOKKOS_SETTINGS} CXXFLAGS=\"${CXXFLAGS}\"" +fi + +if [ ${#KOKKOS_CXX_STANDARD} -gt 0 ]; then + KOKKOS_SETTINGS="${KOKKOS_SETTINGS} KOKKOS_CXX_STANDARD=\"${KOKKOS_CXX_STANDARD}\"" +fi + +if [ ${#LDFLAGS} -gt 0 ]; then + KOKKOS_SETTINGS="${KOKKOS_SETTINGS} LDFLAGS=\"${LDFLAGS}\"" +fi + +if [ ${#GTEST_PATH} -gt 0 ]; then + KOKKOS_SETTINGS="${KOKKOS_SETTINGS} GTEST_PATH=${GTEST_PATH}" +else + GTEST_PATH=${KOKKOS_PATH}/tpls/gtest + KOKKOS_SETTINGS="${KOKKOS_SETTINGS} GTEST_PATH=${GTEST_PATH}" +fi + +if [ ${#HWLOC_PATH} -gt 0 ]; then + KOKKOS_SETTINGS="${KOKKOS_SETTINGS} HWLOC_PATH=${HWLOC_PATH}" + KOKKOS_USE_TPLS="${KOKKOS_USE_TPLS},hwloc" +fi + +if [ ${#MEMKIND_PATH} -gt 0 ]; then + KOKKOS_SETTINGS="${KOKKOS_SETTINGS} MEMKIND_PATH=${MEMKIND_PATH}" + KOKKOS_USE_TPLS="${KOKKOS_USE_TPLS},experimental_memkind" +fi + +if [ ${#KOKKOS_USE_TPLS} -gt 0 ]; then + KOKKOS_SETTINGS="${KOKKOS_SETTINGS} KOKKOS_USE_TPLS=${KOKKOS_USE_TPLS}" +fi + +if [ ${#HPX_PATH} -gt 0 ]; then + KOKKOS_SETTINGS="${KOKKOS_SETTINGS} HPX_PATH=${HPX_PATH}" +fi + +if [ ${#KOKKOS_OPT} -gt 0 ]; then + KOKKOS_SETTINGS="${KOKKOS_SETTINGS} KOKKOS_OPTIONS=${KOKKOS_OPT}" +fi + +if [ ${#KOKKOS_CUDA_OPT} -gt 0 ]; then + KOKKOS_SETTINGS="${KOKKOS_SETTINGS} KOKKOS_CUDA_OPTIONS=${KOKKOS_CUDA_OPT}" +fi + +if [ ${#KOKKOS_HPX_OPT} -gt 0 ]; then + KOKKOS_SETTINGS="${KOKKOS_SETTINGS} KOKKOS_HPX_OPTIONS=${KOKKOS_HPX_OPT}" +fi + +if [ ${#KOKKOS_GCC_TOOLCHAIN} -gt 0 ]; then + KOKKOS_SETTINGS="${KOKKOS_SETTINGS} KOKKOS_INTERNAL_GCC_TOOLCHAIN=${KOKKOS_GCC_TOOLCHAIN}" +fi + +KOKKOS_SETTINGS_NO_KOKKOS_PATH="${KOKKOS_SETTINGS}" + + +gen_makefile=Makefile.kokkos +mkdir -p core +mkdir -p core/unit_test +mkdir -p core/perf_test +mkdir -p containers +mkdir -p containers/unit_tests +mkdir -p containers/performance_tests +mkdir -p algorithms +mkdir -p algorithms/unit_tests +mkdir -p algorithms/performance_tests +mkdir -p example +mkdir -p example/fixture +mkdir -p example/feint +mkdir -p example/fenl +mkdir -p example/make_buildlink +mkdir -p example/tutorial + +KOKKOS_SETTINGS="${KOKKOS_SETTINGS_NO_KOKKOS_PATH} KOKKOS_PATH=${KOKKOS_PATH}" + +# Generate subdirectory makefiles. +echo "KOKKOS_SETTINGS=${KOKKOS_SETTINGS}" > core/unit_test/Makefile +echo "" >> core/unit_test/Makefile +echo "all:" >> core/unit_test/Makefile +echo -e "\t\$(MAKE) -f ${KOKKOS_PATH}/core/unit_test/Makefile ${KOKKOS_SETTINGS}" >> core/unit_test/Makefile +echo "" >> core/unit_test/Makefile +echo "test: all" >> core/unit_test/Makefile +echo -e "\t\$(MAKE) -f ${KOKKOS_PATH}/core/unit_test/Makefile ${KOKKOS_SETTINGS} test" >> core/unit_test/Makefile +echo "" >> core/unit_test/Makefile +echo "clean:" >> core/unit_test/Makefile +echo -e "\t\$(MAKE) -f ${KOKKOS_PATH}/core/unit_test/Makefile ${KOKKOS_SETTINGS} clean" >> core/unit_test/Makefile + +echo "KOKKOS_SETTINGS=${KOKKOS_SETTINGS}" > core/perf_test/Makefile +echo "" >> core/perf_test/Makefile +echo "all:" >> core/perf_test/Makefile +echo -e "\t\$(MAKE) -f ${KOKKOS_PATH}/core/perf_test/Makefile ${KOKKOS_SETTINGS}" >> core/perf_test/Makefile +echo "" >> core/perf_test/Makefile +echo "test: all" >> core/perf_test/Makefile +echo -e "\t\$(MAKE) -f ${KOKKOS_PATH}/core/perf_test/Makefile ${KOKKOS_SETTINGS} test" >> core/perf_test/Makefile +echo "" >> core/perf_test/Makefile +echo "clean:" >> core/perf_test/Makefile +echo -e "\t\$(MAKE) -f ${KOKKOS_PATH}/core/perf_test/Makefile ${KOKKOS_SETTINGS} clean" >> core/perf_test/Makefile + +echo "KOKKOS_SETTINGS=${KOKKOS_SETTINGS}" > containers/unit_tests/Makefile +echo "" >> containers/unit_tests/Makefile +echo "all:" >> containers/unit_tests/Makefile +echo -e "\t\$(MAKE) -f ${KOKKOS_PATH}/containers/unit_tests/Makefile ${KOKKOS_SETTINGS}" >> containers/unit_tests/Makefile +echo "" >> containers/unit_tests/Makefile +echo "test: all" >> containers/unit_tests/Makefile +echo -e "\t\$(MAKE) -f ${KOKKOS_PATH}/containers/unit_tests/Makefile ${KOKKOS_SETTINGS} test" >> containers/unit_tests/Makefile +echo "" >> containers/unit_tests/Makefile +echo "clean:" >> containers/unit_tests/Makefile +echo -e "\t\$(MAKE) -f ${KOKKOS_PATH}/containers/unit_tests/Makefile ${KOKKOS_SETTINGS} clean" >> containers/unit_tests/Makefile + +echo "KOKKOS_SETTINGS=${KOKKOS_SETTINGS}" > containers/performance_tests/Makefile +echo "" >> containers/performance_tests/Makefile +echo "all:" >> containers/performance_tests/Makefile +echo -e "\t\$(MAKE) -f ${KOKKOS_PATH}/containers/performance_tests/Makefile ${KOKKOS_SETTINGS}" >> containers/performance_tests/Makefile +echo "" >> containers/performance_tests/Makefile +echo "test: all" >> containers/performance_tests/Makefile +echo -e "\t\$(MAKE) -f ${KOKKOS_PATH}/containers/performance_tests/Makefile ${KOKKOS_SETTINGS} test" >> containers/performance_tests/Makefile +echo "" >> containers/performance_tests/Makefile +echo "clean:" >> containers/performance_tests/Makefile +echo -e "\t\$(MAKE) -f ${KOKKOS_PATH}/containers/performance_tests/Makefile ${KOKKOS_SETTINGS} clean" >> containers/performance_tests/Makefile + +echo "KOKKOS_SETTINGS=${KOKKOS_SETTINGS}" > algorithms/unit_tests/Makefile +echo "" >> algorithms/unit_tests/Makefile +echo "all:" >> algorithms/unit_tests/Makefile +echo -e "\t\$(MAKE) -f ${KOKKOS_PATH}/algorithms/unit_tests/Makefile ${KOKKOS_SETTINGS}" >> algorithms/unit_tests/Makefile +echo "" >> algorithms/unit_tests/Makefile +echo "test: all" >> algorithms/unit_tests/Makefile +echo -e "\t\$(MAKE) -f ${KOKKOS_PATH}/algorithms/unit_tests/Makefile ${KOKKOS_SETTINGS} test" >> algorithms/unit_tests/Makefile +echo "" >> algorithms/unit_tests/Makefile +echo "clean:" >> algorithms/unit_tests/Makefile +echo -e "\t\$(MAKE) -f ${KOKKOS_PATH}/algorithms/unit_tests/Makefile ${KOKKOS_SETTINGS} clean" >> algorithms/unit_tests/Makefile + +echo "KOKKOS_SETTINGS=${KOKKOS_SETTINGS}" > example/fixture/Makefile +echo "" >> example/fixture/Makefile +echo "all:" >> example/fixture/Makefile +echo -e "\t\$(MAKE) -f ${KOKKOS_PATH}/example/fixture/Makefile ${KOKKOS_SETTINGS}" >> example/fixture/Makefile +echo "" >> example/fixture/Makefile +echo "test: all" >> example/fixture/Makefile +echo -e "\t\$(MAKE) -f ${KOKKOS_PATH}/example/fixture/Makefile ${KOKKOS_SETTINGS} test" >> example/fixture/Makefile +echo "" >> example/fixture/Makefile +echo "clean:" >> example/fixture/Makefile +echo -e "\t\$(MAKE) -f ${KOKKOS_PATH}/example/fixture/Makefile ${KOKKOS_SETTINGS} clean" >> example/fixture/Makefile + +echo "KOKKOS_SETTINGS=${KOKKOS_SETTINGS}" > example/feint/Makefile +echo "" >> example/feint/Makefile +echo "all:" >> example/feint/Makefile +echo -e "\t\$(MAKE) -f ${KOKKOS_PATH}/example/feint/Makefile ${KOKKOS_SETTINGS}" >> example/feint/Makefile +echo "" >> example/feint/Makefile +echo "test: all" >> example/feint/Makefile +echo -e "\t\$(MAKE) -f ${KOKKOS_PATH}/example/feint/Makefile ${KOKKOS_SETTINGS} test" >> example/feint/Makefile +echo "" >> example/feint/Makefile +echo "clean:" >> example/feint/Makefile +echo -e "\t\$(MAKE) -f ${KOKKOS_PATH}/example/feint/Makefile ${KOKKOS_SETTINGS} clean" >> example/feint/Makefile + +echo "KOKKOS_SETTINGS=${KOKKOS_SETTINGS}" > example/fenl/Makefile +echo "" >> example/fenl/Makefile +echo "all:" >> example/fenl/Makefile +echo -e "\t\$(MAKE) -f ${KOKKOS_PATH}/example/fenl/Makefile ${KOKKOS_SETTINGS}" >> example/fenl/Makefile +echo "" >> example/fenl/Makefile +echo "test: all" >> example/fenl/Makefile +echo -e "\t\$(MAKE) -f ${KOKKOS_PATH}/example/fenl/Makefile ${KOKKOS_SETTINGS} test" >> example/fenl/Makefile +echo "" >> example/fenl/Makefile +echo "clean:" >> example/fenl/Makefile +echo -e "\t\$(MAKE) -f ${KOKKOS_PATH}/example/fenl/Makefile ${KOKKOS_SETTINGS} clean" >> example/fenl/Makefile + +echo "KOKKOS_SETTINGS=${KOKKOS_SETTINGS}" > example/make_buildlink/Makefile +echo "" >> example/make_buildlink/Makefile +echo "build:" >> example/make_buildlink/Makefile +echo -e "\t\$(MAKE) -f ${KOKKOS_PATH}/example/make_buildlink/Makefile ${KOKKOS_SETTINGS} build" >> example/make_buildlink/Makefile +echo "" >> example/make_buildlink/Makefile +echo "test: build" >> example/make_buildlink/Makefile +echo -e "\t\$(MAKE) -f ${KOKKOS_PATH}/example/make_buildlink/Makefile ${KOKKOS_SETTINGS} test" >> example/make_buildlink/Makefile +echo "" >> example/make_buildlink/Makefile +echo "clean:" >> example/make_buildlink/Makefile +echo -e "\t\$(MAKE) -f ${KOKKOS_PATH}/example/make_buildlink/Makefile ${KOKKOS_SETTINGS} clean" >> example/make_buildlink/Makefile + +echo "KOKKOS_SETTINGS=${KOKKOS_SETTINGS}" > example/tutorial/Makefile +echo "" >> example/tutorial/Makefile +echo "build:" >> example/tutorial/Makefile +echo -e "\t\$(MAKE) -f ${KOKKOS_PATH}/example/tutorial/Makefile KOKKOS_SETTINGS='${KOKKOS_SETTINGS}' KOKKOS_PATH=${KOKKOS_PATH} build">> example/tutorial/Makefile +echo "" >> example/tutorial/Makefile +echo "test: build" >> example/tutorial/Makefile +echo -e "\t\$(MAKE) -f ${KOKKOS_PATH}/example/tutorial/Makefile KOKKOS_SETTINGS='${KOKKOS_SETTINGS}' KOKKOS_PATH=${KOKKOS_PATH} test" >> example/tutorial/Makefile +echo "" >> example/tutorial/Makefile +echo "clean:" >> example/tutorial/Makefile +echo -e "\t\$(MAKE) -f ${KOKKOS_PATH}/example/tutorial/Makefile KOKKOS_SETTINGS='${KOKKOS_SETTINGS}' KOKKOS_PATH=${KOKKOS_PATH} clean" >> example/tutorial/Makefile + +# Generate top level directory makefile. +echo "Generating Makefiles with options " ${KOKKOS_SETTINGS} +echo "KOKKOS_SETTINGS=${KOKKOS_SETTINGS}" > Makefile +echo "" >> Makefile +echo "build-test:" >> Makefile +echo -e "\t\$(MAKE) -C core/unit_test" >> Makefile +echo -e "\t\$(MAKE) -C core/perf_test" >> Makefile +echo -e "\t\$(MAKE) -C containers/unit_tests" >> Makefile +echo -e "\t\$(MAKE) -C containers/performance_tests" >> Makefile +echo -e "\t\$(MAKE) -C algorithms/unit_tests" >> Makefile +if [ ${KOKKOS_DO_EXAMPLES} -gt 0 ]; then +$() +echo -e "\t\$(MAKE) -C example/fixture" >> Makefile +echo -e "\t\$(MAKE) -C example/feint" >> Makefile +echo -e "\t\$(MAKE) -C example/fenl" >> Makefile +echo -e "\t\$(MAKE) -C example/make_buildlink build" >> Makefile +echo -e "\t\$(MAKE) -C example/tutorial build" >> Makefile +fi +echo "" >> Makefile +echo "test: build-test" >> Makefile +echo -e "\t\$(MAKE) -C core/unit_test test" >> Makefile +echo -e "\t\$(MAKE) -C core/perf_test test" >> Makefile +echo -e "\t\$(MAKE) -C containers/unit_tests test" >> Makefile +echo -e "\t\$(MAKE) -C containers/performance_tests test" >> Makefile +echo -e "\t\$(MAKE) -C algorithms/unit_tests test" >> Makefile +if [ ${KOKKOS_DO_EXAMPLES} -gt 0 ]; then +echo -e "\t\$(MAKE) -C example/fixture test" >> Makefile +echo -e "\t\$(MAKE) -C example/feint test" >> Makefile +echo -e "\t\$(MAKE) -C example/fenl test" >> Makefile +echo -e "\t\$(MAKE) -C example/make_buildlink test" >> Makefile +echo -e "\t\$(MAKE) -C example/tutorial test" >> Makefile +fi +echo "" >> Makefile +echo "unit-tests-only:" >> Makefile +echo -e "\t\$(MAKE) -C core/unit_test test" >> Makefile +echo -e "\t\$(MAKE) -C containers/unit_tests test" >> Makefile +echo -e "\t\$(MAKE) -C algorithms/unit_tests test" >> Makefile +echo "" >> Makefile + +echo "clean:" >> Makefile +echo -e "\t\$(MAKE) -C core/unit_test clean" >> Makefile +echo -e "\t\$(MAKE) -C core/perf_test clean" >> Makefile +echo -e "\t\$(MAKE) -C containers/unit_tests clean" >> Makefile +echo -e "\t\$(MAKE) -C containers/performance_tests clean" >> Makefile +echo -e "\t\$(MAKE) -C algorithms/unit_tests clean" >> Makefile +if [ ${KOKKOS_DO_EXAMPLES} -gt 0 ]; then +echo -e "\t\$(MAKE) -C example/fixture clean" >> Makefile +echo -e "\t\$(MAKE) -C example/feint clean" >> Makefile +echo -e "\t\$(MAKE) -C example/fenl clean" >> Makefile +echo -e "\t\$(MAKE) -C example/make_buildlink clean" >> Makefile +echo -e "\t\$(MAKE) -C example/tutorial clean" >> Makefile +fi + diff --git a/master_history.txt b/master_history.txt index ee5238be8c9..f6eb95292c2 100644 --- a/master_history.txt +++ b/master_history.txt @@ -18,3 +18,4 @@ tag: 2.7.24 date: 11:04:2018 master: d3a94192 develop: 7a06fc81 tag: 2.8.00 date: 02:05:2019 master: 34931a36 develop: d1659d1d tag: 2.9.00 date: 06:24:2019 master: 5d6e7fb3 develop: 4c6cb80a tag: 3.0.00 date: 01:31:2020 master: 2983b80d release-candidate-3.0: fdc904a6 +tag: 3.1.00 date: 04:14:2020 master: cd1b1d0a develop: fd90af43 diff --git a/scripts/apply-clang-format b/scripts/apply-clang-format index fb804205a3f..c70cd3118bf 100755 --- a/scripts/apply-clang-format +++ b/scripts/apply-clang-format @@ -20,3 +20,11 @@ if [ "${CLANG_FORMAT_MAJOR_VERSION}" -ne 8 ] || [ "${CLANG_FORMAT_MINOR_VERSION} fi find . -name '*.cpp' -o -name '*.hpp' | xargs ${CLANG_FORMAT_EXECUTABLE} -i + +# Now also check for trailing whitspace. Mac OSX creates backup files +# that we need to delete manually. +find . -type f \( -name "*.md" -o -name "*.cc" -o -name "*.h" -o -name "*.txt" -o -name "*.cmake" \) | + xargs -n 1 -P 10 -I {} bash -c "sed -i -e 's/\s\+$//g' {} && rm -f '{}-e'" + +# Check that we do not introduce any file with the old copyright +./scripts/update-copyright diff --git a/scripts/docker/Dockerfile.clang b/scripts/docker/Dockerfile.clang index cd96094fbf2..680502f8659 100644 --- a/scripts/docker/Dockerfile.clang +++ b/scripts/docker/Dockerfile.clang @@ -42,6 +42,3 @@ RUN LLVM_VERSION=8.0.0 && \ rm -rf /root/.gnupg && \ rm -rf ${SCRATCH_DIR} ENV PATH=${LLVM_DIR}/bin:$PATH - -# Workaround to find libcudart -ENV LD_LIBRARY_PATH=/usr/local/cuda/targets/x86_64-linux/lib:${LD_LIBRARY_PATH} diff --git a/scripts/docker/Dockerfile.hipcc b/scripts/docker/Dockerfile.hipcc new file mode 100644 index 00000000000..597db7794ec --- /dev/null +++ b/scripts/docker/Dockerfile.hipcc @@ -0,0 +1,29 @@ +ARG BASE=rocm/dev-ubuntu-18.04:3.0 +FROM $BASE + +RUN apt-get update && apt-get install -y \ + kmod \ + wget \ + ccache \ + && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +ENV PATH=/opt/rocm/bin:$PATH + +ARG CMAKE_VERSION=3.10.3 +ENV CMAKE_DIR=/opt/cmake +RUN CMAKE_KEY=2D2CEF1034921684 && \ + CMAKE_URL=https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION} && \ + CMAKE_SCRIPT=cmake-${CMAKE_VERSION}-Linux-x86_64.sh && \ + CMAKE_SHA256=cmake-${CMAKE_VERSION}-SHA-256.txt && \ + wget --quiet ${CMAKE_URL}/${CMAKE_SHA256} && \ + wget --quiet ${CMAKE_URL}/${CMAKE_SHA256}.asc && \ + wget --quiet ${CMAKE_URL}/${CMAKE_SCRIPT} && \ + gpg --keyserver hkps.pool.sks-keyservers.net --recv-keys ${CMAKE_KEY} && \ + gpg --verify ${CMAKE_SHA256}.asc ${CMAKE_SHA256} && \ + grep ${CMAKE_SCRIPT} ${CMAKE_SHA256} | sha256sum --check && \ + mkdir -p ${CMAKE_DIR} && \ + sh ${CMAKE_SCRIPT} --skip-license --prefix=${CMAKE_DIR} && \ + rm ${CMAKE_SCRIPT} +ENV PATH=${CMAKE_DIR}/bin:$PATH diff --git a/scripts/docker/check_format_cpp.sh b/scripts/docker/check_format_cpp.sh index f96e8027df0..c054c67a335 100755 --- a/scripts/docker/check_format_cpp.sh +++ b/scripts/docker/check_format_cpp.sh @@ -1,75 +1,4 @@ #!/usr/bin/env bash -clang_format_executable=${CLANG_FORMAT_EXE:-clang-format} - -this_program=$(basename "$0") -usage="Usage: - $this_program [options] -- check format of the C++ source files - -Options: - -h --help Print help and exit - -q --quiet Quiet mode (do not print the diff) - -p --apply-patch Apply diff patch to the source files" - -verbose=1 -apply_patch=0 - -#echo "Arguments: $# $@" - -while [ $# -gt 0 ] -do - case $1 in - -p|--apply-patch) - apply_patch=1 - ;; - -q|--quiet) - verbose=0 - ;; - -h|--help) - echo "$usage" - exit 0 - ;; - *) - echo "$this_program: Unknown argument '$1'. See '$this_program --help'." - exit -1 - ;; - esac - - shift -done - -# stop right here if clang-format does not exist in $PATH -command -v $clang_format_executable >/dev/null 2>&1 || { echo >&2 "clang-format executable '$clang_format_executable' not found. Aborting."; exit 1; } - -# shamelessy redirecting everything to /dev/null in quiet mode -if [ $verbose -eq 0 ]; then - exec &>/dev/null -fi - -cpp_source_files=$(git ls-files | grep -E "\.hpp$|\.cpp$|\.h$|\.c$" | grep -v -f .clang-format-ignore) - -unformatted_files=() -for file in $cpp_source_files; do - diff -u \ - <(cat $file) \ - --label a/$file \ - <($clang_format_executable $file) \ - --label b/$file >&1 - if [ $? -eq 1 ]; then - unformatted_files+=($file) - fi -done - -n_unformatted_files=${#unformatted_files[@]} -if [ $n_unformatted_files -ne 0 ]; then - echo "${#unformatted_files[@]} file(s) not formatted properly:" - for file in ${unformatted_files[@]}; do - echo " $file" - if [ $apply_patch -eq 1 ]; then - $clang_format_executable -i $file - fi - done -else - echo "OK" -fi -exit $n_unformatted_files +./scripts/apply-clang-format || exit $? +git diff --exit-code diff --git a/scripts/snapshot.py b/scripts/snapshot.py index bfa97bf48a2..b964e2b676a 100755 --- a/scripts/snapshot.py +++ b/scripts/snapshot.py @@ -214,7 +214,7 @@ def find_git_commit_information(options): def do_git_commit(message, options): if options.verbose_mode: - print "Commiting to destination repository." + print "Committing to destination repository." git_add_cmd = ["git", "add", "-A"] run_cmd(git_add_cmd, options, options.destination) @@ -283,7 +283,7 @@ def main(options): options = parse_cmdline(__doc__) main(options) except RuntimeError, e: - print "Error occured:", e + print "Error occurred:", e if "--debug" in sys.argv: traceback.print_exc() sys.exit(1) diff --git a/scripts/testing_scripts/generate_makefile.bash b/scripts/testing_scripts/generate_makefile.bash index e685a0f3c15..e3614c57496 100755 --- a/scripts/testing_scripts/generate_makefile.bash +++ b/scripts/testing_scripts/generate_makefile.bash @@ -2,6 +2,8 @@ KOKKOS_DEVICES="" +KOKKOS_DO_EXAMPLES="1" + while [[ $# > 0 ]] do key="$1" @@ -79,6 +81,9 @@ do echo "Warning: ${key} is deprecated" echo "Call make with appropriate -j flag" ;; + --no-examples) + KOKKOS_DO_EXAMPLES="0" + ;; --compiler*) COMPILER="${key#*=}" CNUM=$(command -v ${COMPILER} 2>&1 >/dev/null | grep "no ${COMPILER}" | wc -l) @@ -271,10 +276,6 @@ if [ ${#KOKKOS_USE_TPLS} -gt 0 ]; then KOKKOS_SETTINGS="${KOKKOS_SETTINGS} KOKKOS_USE_TPLS=${KOKKOS_USE_TPLS}" fi -if [ ${#QTHREADS_PATH} -gt 0 ]; then - KOKKOS_SETTINGS="${KOKKOS_SETTINGS} QTHREADS_PATH=${QTHREADS_PATH}" -fi - if [ ${#HPX_PATH} -gt 0 ]; then KOKKOS_SETTINGS="${KOKKOS_SETTINGS} HPX_PATH=${HPX_PATH}" fi @@ -441,6 +442,14 @@ echo -e "\t\$(MAKE) -C core/perf_test" >> Makefile echo -e "\t\$(MAKE) -C containers/unit_tests" >> Makefile echo -e "\t\$(MAKE) -C containers/performance_tests" >> Makefile echo -e "\t\$(MAKE) -C algorithms/unit_tests" >> Makefile +if [ ${KOKKOS_DO_EXAMPLES} -gt 0 ]; then +$() +echo -e "\t\$(MAKE) -C example/fixture" >> Makefile +echo -e "\t\$(MAKE) -C example/feint" >> Makefile +echo -e "\t\$(MAKE) -C example/fenl" >> Makefile +echo -e "\t\$(MAKE) -C example/make_buildlink build" >> Makefile +echo -e "\t\$(MAKE) -C example/tutorial build" >> Makefile +fi echo "" >> Makefile echo "test: build-test" >> Makefile echo -e "\t\$(MAKE) -C core/unit_test test" >> Makefile @@ -448,6 +457,13 @@ echo -e "\t\$(MAKE) -C core/perf_test test" >> Makefile echo -e "\t\$(MAKE) -C containers/unit_tests test" >> Makefile echo -e "\t\$(MAKE) -C containers/performance_tests test" >> Makefile echo -e "\t\$(MAKE) -C algorithms/unit_tests test" >> Makefile +if [ ${KOKKOS_DO_EXAMPLES} -gt 0 ]; then +echo -e "\t\$(MAKE) -C example/fixture test" >> Makefile +echo -e "\t\$(MAKE) -C example/feint test" >> Makefile +echo -e "\t\$(MAKE) -C example/fenl test" >> Makefile +echo -e "\t\$(MAKE) -C example/make_buildlink test" >> Makefile +echo -e "\t\$(MAKE) -C example/tutorial test" >> Makefile +fi echo "" >> Makefile echo "unit-tests-only:" >> Makefile echo -e "\t\$(MAKE) -C core/unit_test test" >> Makefile @@ -461,4 +477,11 @@ echo -e "\t\$(MAKE) -C core/perf_test clean" >> Makefile echo -e "\t\$(MAKE) -C containers/unit_tests clean" >> Makefile echo -e "\t\$(MAKE) -C containers/performance_tests clean" >> Makefile echo -e "\t\$(MAKE) -C algorithms/unit_tests clean" >> Makefile +if [ ${KOKKOS_DO_EXAMPLES} -gt 0 ]; then +echo -e "\t\$(MAKE) -C example/fixture clean" >> Makefile +echo -e "\t\$(MAKE) -C example/feint clean" >> Makefile +echo -e "\t\$(MAKE) -C example/fenl clean" >> Makefile +echo -e "\t\$(MAKE) -C example/make_buildlink clean" >> Makefile +echo -e "\t\$(MAKE) -C example/tutorial clean" >> Makefile +fi diff --git a/scripts/testing_scripts/cm_test_all_sandia b/scripts/testing_scripts/gnu_test_all_sandia similarity index 80% rename from scripts/testing_scripts/cm_test_all_sandia rename to scripts/testing_scripts/gnu_test_all_sandia index b6d032f2f32..eea23f53e92 100755 --- a/scripts/testing_scripts/cm_test_all_sandia +++ b/scripts/testing_scripts/gnu_test_all_sandia @@ -8,65 +8,9 @@ set -o pipefail # Determine current machine. -print_help() { - echo "test_all_sandia :" - echo "--kokkos-path=/Path/To/Kokkos: Path to the Kokkos root directory" - echo " Defaults to root repo containing this script" - echo "--debug: Run tests in debug. Defaults to False" - echo "--test-script: Test this script, not Kokkos" - echo "--skip-hwloc: Do not do hwloc tests" - echo "--num=N: Number of jobs to run in parallel" - echo "--spot-check: Minimal test set to issue pull request" - echo "--dry-run: Just print what would be executed" - echo "--build-only: Just do builds, don't run anything" - echo "--opt-flag=FLAG: Optimization flag (default: -O3)" - echo "--cxxflags-extra=FLAGS: Extra flags to be added to CXX_FLAGS" - echo "--ldflags-extra=FLAGS: Extra flags to be added to LD_FLAGS" - echo "--arch=ARCHITECTURE: overwrite architecture flags" - echo "--with-cuda-options=OPT: set KOKKOS_CUDA_OPTIONS" - echo "--with-options=OPT: set KOKKOS_OPTIONS" - echo "--build-list=BUILD,BUILD,BUILD..." - echo " Provide a comma-separated list of builds instead of running all builds" - echo " Valid items:" - echo " OpenMP, Pthread, Serial, OpenMP_Serial, Pthread_Serial" - echo " Cuda_OpenMP, Cuda_Pthread, Cuda_Serial" - echo "" - - echo "ARGS: list of expressions matching compilers to test" - echo " supported compilers sems" - for COMPILER_DATA in "${COMPILERS[@]}"; do - ARR=($COMPILER_DATA) - COMPILER=${ARR[0]} - echo " $COMPILER" - done - echo "" - - echo "Examples:" - echo " Run all tests" - echo " % test_all_sandia" - echo "" - echo " Run all gcc tests" - echo " % test_all_sandia gcc" - echo "" - echo " Run all gcc/4.8.4 and all intel tests" - echo " % test_all_sandia gcc/4.8.4 intel" - echo "" - echo " Run all tests in debug" - echo " % test_all_sandia --debug" - echo "" - echo " Run gcc/4.8.4 and only do OpenMP and OpenMP_Serial builds" - echo " % test_all_sandia gcc/4.8.4 --build-list=OpenMP,OpenMP_Serial" - echo "" - echo "If you want to kill the tests, do:" - echo " hit ctrl-z" - echo " % kill -9 %1" - echo -} - MACHINE="" HOSTNAME=$(hostname) PROCESSOR=`uname -p` -CUDA_ENABLE_CMD= if [[ "$HOSTNAME" =~ (white|ride).* ]]; then MACHINE=white @@ -126,12 +70,12 @@ CLANG_BUILD_LIST="Pthread,Serial,Pthread_Serial" CUDA_BUILD_LIST="Cuda_OpenMP,Cuda_Pthread,Cuda_Serial" CUDA_IBM_BUILD_LIST="Cuda_OpenMP,Cuda_Serial" -GCC_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wignored-qualifiers,-Wempty-body,-Wclobbered,-Wuninitialized" -IBM_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Wsign-compare,-Wtype-limits,-Wuninitialized" -CLANG_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized" -INTEL_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized" -CUDA_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized" -#CUDA_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Wsign-compare,-Wtype-limits,-Wuninitialized" +GCC_WARNING_FLAGS="-Wall,-Wunused-parameter,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wignored-qualifiers,-Wempty-body,-Wclobbered,-Wuninitialized" +IBM_WARNING_FLAGS="-Wall,-Wunused-parameter,-Wshadow,-pedantic,-Wsign-compare,-Wtype-limits,-Wuninitialized" +CLANG_WARNING_FLAGS="-Wall,-Wunused-parameter,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized" +INTEL_WARNING_FLAGS="-Wall,-Wunused-parameter,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized" +CUDA_WARNING_FLAGS="-Wall,-Wunused-parameter,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized" +#CUDA_WARNING_FLAGS="-Wunused-parameter,-Wall,-Wshadow,-pedantic,-Wsign-compare,-Wtype-limits,-Wuninitialized" PGI_WARNING_FLAGS="" # Default. Machine specific can override. @@ -151,7 +95,7 @@ CXX_FLAGS_EXTRA="" LD_FLAGS_EXTRA="" KOKKOS_OPTIONS="" -CXX_STANDARD="11" +CXX_STANDARD="c++11" # # Handle arguments. @@ -196,23 +140,16 @@ do OPT_FLAG="${key#*=}" ;; --with-cuda-options*) - KOKKOS_CUDA_OPTIONS="${key#*=}" - export KOKKOS_CUDA_OPTIONS + KOKKOS_CUDA_OPTIONS="--with-cuda-options=${key#*=}" ;; --with-options*) - KOKKOS_OPTIONS="${key#*=}" - export KOKKOS_OPTIONS + KOKKOS_OPTIONS="--with-options=${key#*=}" ;; --cxxflags-extra*) CXX_FLAGS_EXTRA="${key#*=}" ;; --cxxstandard*) - FULL_CXX_STANDARD="${key#*=}" - if [[ ${FULL_CXX_STANDARD} == *++* ]]; then - CXX_STANDARD="${FULL_CXX_STANDARD#*++}" - else - CXX_STANDARD="${FULL_CXX_STANDARD}" - fi + CXX_STANDARD="${key#*=}" ;; --ldflags-extra*) LD_FLAGS_EXTRA="${key#*=}" @@ -239,7 +176,17 @@ else KOKKOS_PATH=$( cd $KOKKOS_PATH && pwd ) fi +UNCOMMITTED=`cd ${KOKKOS_PATH}; git status --porcelain 2>/dev/null` +if ! [ -z "$UNCOMMITTED" ]; then + echo "WARNING!! THE FOLLOWING CHANGES ARE UNCOMMITTED!! :" + echo "$UNCOMMITTED" + echo "" +fi +GITSTATUS=`cd ${KOKKOS_PATH}; git log -n 1 --format=oneline` +echo "Repository Status: " ${GITSTATUS} +echo "" +echo "" # # Machine specific config. @@ -289,13 +236,10 @@ if [ "$MACHINE" = "sems" ]; then elif [ "$MACHINE" = "kokkos-dev" ]; then source /projects/sems/modulefiles/utils/sems-modules-init.sh - module load sems-cmake/3.12.2 - BASE_MODULE_LIST="sems-env,sems-cmake/3.12.2,kokkos-env,kokkos-hwloc/1.10.1/base,sems-/" - CUDA9_MODULE_LIST="sems-env,sems-cmake/3.12.2,kokkos-env,sems-/,sems-gcc/6.1.0,kokkos-hwloc/1.10.1/base" - CLANG7_MODULE_LIST="sems-env,sems-cmake/3.12.2,kokkos-env,kokkos-hwloc/1.10.1/base,sems-/,sems-cuda/9.2" + BASE_MODULE_LIST="sems-env,kokkos-env,kokkos-hwloc/1.10.1/base,sems-/" if [ -z "$ARCH_FLAG" ]; then - ARCH_FLAG="--arch=Kepler35" + ARCH_FLAG="" fi if [ "$SPOT_CHECK" = "True" ]; then @@ -304,8 +248,6 @@ elif [ "$MACHINE" = "kokkos-dev" ]; then "gcc/7.3.0 $BASE_MODULE_LIST "Serial" g++ $GCC_WARNING_FLAGS" "intel/17.0.1 $BASE_MODULE_LIST "OpenMP" icpc $INTEL_WARNING_FLAGS" "clang/4.0.1 $BASE_MODULE_LIST "Pthread_Serial" clang++ $CLANG_WARNING_FLAGS" - "clang/7.0.1 $CLANG7_MODULE_LIST "Cuda_OpenMP" clang++ $CLANG_WARNING_FLAGS" - "cuda/9.2 $CUDA9_MODULE_LIST "Cuda_OpenMP" $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS" ) else # Format: (compiler module-list build-list exe-name warning-flag) @@ -322,8 +264,6 @@ elif [ "$MACHINE" = "kokkos-dev" ]; then "clang/3.8.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS" "clang/3.9.0 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS" "clang/4.0.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS" - "clang/7.0.1 $CLANG7_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS" - "cuda/9.2 $CUDA9_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS" ) fi elif [ "$MACHINE" = "white" ]; then @@ -331,10 +271,10 @@ elif [ "$MACHINE" = "white" ]; then SKIP_HWLOC=True export SLURM_TASKS_PER_NODE=32 - BASE_MODULE_LIST="cmake/3.12.3,/" - IBM_MODULE_LIST="cmake/3.12.3,/xl/,gcc/7.2.0" - CUDA_MODULE_LIST="cmake/3.12.3,/,gcc/7.2.0,ibm/xl/16.1.0" - CUDA10_MODULE_LIST="cmake/3.12.3,/,gcc/7.4.0,ibm/xl/16.1.0" + BASE_MODULE_LIST="/" + IBM_MODULE_LIST="/xl/,gcc/7.2.0" + CUDA_MODULE_LIST="/,gcc/7.2.0,ibm/xl/16.1.0" + CUDA10_MODULE_LIST="/,gcc/7.4.0,ibm/xl/16.1.0" # Don't do pthread on white. GCC_BUILD_LIST="OpenMP,Serial,OpenMP_Serial" @@ -366,7 +306,7 @@ elif [ "$MACHINE" = "bowman" ]; then SKIP_HWLOC=True export SLURM_TASKS_PER_NODE=32 - BASE_MODULE_LIST="cmake/3.12.3,/compilers/" + BASE_MODULE_LIST="/compilers/" OLD_INTEL_BUILD_LIST="Pthread,Serial,Pthread_Serial" @@ -384,8 +324,8 @@ elif [ "$MACHINE" = "mayer" ]; then SKIP_HWLOC=True export SLURM_TASKS_PER_NODE=96 - BASE_MODULE_LIST="cmake/3.12.2,/" -# ARM_MODULE_LIST="cmake/3.12.2,/" + BASE_MODULE_LIST="/" +# ARM_MODULE_LIST="/compilers/" # Format: (compiler module-list build-list exe-name warning-flag) COMPILERS=("gnu7/7.2.0 $BASE_MODULE_LIST $ARM_GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" @@ -400,8 +340,6 @@ elif [ "$MACHINE" = "blake" ]; then SKIP_HWLOC=True export SLURM_TASKS_PER_NODE=32 - module load cmake/3.12.3 - BASE_MODULE_LIST="/" BASE_MODULE_LIST_INTEL="/compilers/" @@ -434,22 +372,22 @@ elif [ "$MACHINE" = "apollo" ]; then module load sems-git module load sems-tex - module load sems-cmake/3.12.2 + module load sems-cmake/3.5.2 module load sems-gdb module load binutils SKIP_HWLOC=True - BASE_MODULE_LIST="sems-env,sems-cmake/3.12.2,kokkos-env,kokkos-hwloc/1.10.1/base,sems-/" - CUDA_MODULE_LIST="sems-env,sems-cmake/3.12.2,kokkos-env,kokkos-/,sems-gcc/4.8.4,kokkos-hwloc/1.10.1/base" - CUDA9_MODULE_LIST="sems-env,sems-cmake/3.12.2,kokkos-env,kokkos-/,sems-gcc/5.3.0,kokkos-hwloc/1.10.1/base" - CUDA10_MODULE_LIST="sems-env,sems-cmake/3.12.2,kokkos-env,/,sems-gcc/5.3.0,kokkos-hwloc/1.10.1/base" + GCC_MODULE_LIST="sems-env,kokkos-env,kokkos-hwloc/1.10.1/base,sems-/" + NONGCC_MODULE_LIST="sems-env,kokkos-env,sems-gcc/5.3.0,sems-/,kokkos-hwloc/1.10.1/base" + CUDA_MODULE_LIST="sems-env,kokkos-env,kokkos-/,sems-gcc/4.8.4,kokkos-hwloc/1.10.1/base" + CUDA8_MODULE_LIST="sems-env,kokkos-env,kokkos-/,sems-gcc/5.3.0,kokkos-hwloc/1.10.1/base" + CUDA10_MODULE_LIST="sems-env,kokkos-env,/,sems-gcc/5.3.0,kokkos-hwloc/1.10.1/base" - CLANG_MODULE_LIST="sems-env,sems-cmake/3.12.2,kokkos-env,/,cuda/9.0.69" - CLANG7_MODULE_LIST="sems-env,sems-cmake/3.12.2,kokkos-env,sems-gcc/6.1.0,/,cuda/9.1" - NVCC_MODULE_LIST="sems-env,sems-cmake/3.12.2,kokkos-env,/,sems-gcc/5.3.0" - HPX_MODULE_LIST="sems-env,sems-cmake/3.12.2,kokkos-env,hpx/1.2.1,sems-gcc/6.1.0,binutils" - HPX3_MODULE_LIST="sems-env,sems-cmake/3.12.2,kokkos-env,compilers/hpx/1.3.0,sems-gcc/6.1.0,binutils" + CLANG_MODULE_LIST="sems-env,kokkos-env,/,cuda/9.0.69" + CLANG7_MODULE_LIST="sems-env,kokkos-env,/,cuda/9.1" + NVCC_MODULE_LIST="sems-env,kokkos-env,/,sems-gcc/5.3.0" + HPX_MODULE_LIST="sems-env,kokkos-env,hpx/1.2.1,sems-gcc/6.1.0,binutils" BUILD_LIST_CUDA_NVCC="Cuda_Serial,Cuda_OpenMP" BUILD_LIST_CUDA_CLANG="Cuda_Serial,Cuda_Pthread" @@ -457,32 +395,30 @@ elif [ "$MACHINE" = "apollo" ]; then if [ "$SPOT_CHECK" = "True" ]; then # Format: (compiler module-list build-list exe-name warning-flag) - COMPILERS=("gcc/4.8.4 $BASE_MODULE_LIST "OpenMP,Pthread" g++ $GCC_WARNING_FLAGS" - "gcc/5.3.0 $BASE_MODULE_LIST "Serial" g++ $GCC_WARNING_FLAGS" - "intel/16.0.1 $BASE_MODULE_LIST "OpenMP" icpc $INTEL_WARNING_FLAGS" - "clang/3.9.0 $BASE_MODULE_LIST "Pthread_Serial" clang++ $CLANG_WARNING_FLAGS" + COMPILERS=("gcc/4.8.4 $GCC_MODULE_LIST "OpenMP,Pthread" g++ $GCC_WARNING_FLAGS" + "gcc/5.3.0 $GCC_MODULE_LIST "Serial" g++ $GCC_WARNING_FLAGS" + "intel/16.0.1 $NONGCC_MODULE_LIST "OpenMP" icpc $INTEL_WARNING_FLAGS" + "clang/3.9.0 $NONGCC_MODULE_LIST "Pthread_Serial" clang++ $CLANG_WARNING_FLAGS" "clang/6.0 $CLANG_MODULE_LIST "Cuda_Pthread,OpenMP" clang++ $CUDA_WARNING_FLAGS" "cuda/9.1 $CUDA_MODULE_LIST "Cuda_OpenMP" $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS" - "hpx/1.3.0 $HPX3_MODULE_LIST "HPX" g++ $PGI_WARNING_FLAGS" + "hpx/1.2.1 $HPX_MODULE_LIST "HPX" g++ $PGI_WARNING_FLAGS" ) else # Format: (compiler module-list build-list exe-name warning-flag) - COMPILERS=("cuda/9.1 $CUDA9_MODULE_LIST $BUILD_LIST_CUDA_NVCC $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS" + COMPILERS=("cuda/9.1 $CUDA8_MODULE_LIST $BUILD_LIST_CUDA_NVCC $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS" "cuda/10.0 $CUDA10_MODULE_LIST $BUILD_LIST_CUDA_NVCC $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS" "clang/6.0 $CLANG_MODULE_LIST $BUILD_LIST_CUDA_CLANG clang++ $CUDA_WARNING_FLAGS" "clang/7.0 $CLANG7_MODULE_LIST $BUILD_LIST_CUDA_CLANG clang++ $CUDA_WARNING_FLAGS" "clang/3.9.0 $CLANG_MODULE_LIST $BUILD_LIST_CLANG clang++ $CLANG_WARNING_FLAGS" - "gcc/4.8.4 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" - "gcc/4.9.3 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" - "gcc/5.3.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" - "gcc/6.1.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" - "intel/15.0.2 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" - "intel/16.0.1 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" - "intel/17.0.1 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" - "clang/3.5.2 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS" - "clang/3.6.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS" - "hpx/1.2.1 $HPX_MODULE_LIST "HPX" g++ $PGI_WARNING_FLAGS" - "hpx/1.3.0 $HPX3_MODULE_LIST "HPX" g++ $PGI_WARNING_FLAGS" + "gcc/4.8.4 $GCC_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" + "gcc/4.9.3 $GCC_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" + "gcc/5.3.0 $GCC_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" + "gcc/6.1.0 $GCC_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" + "intel/15.0.2 $NONGCC_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" + "intel/16.0.1 $NONGCC_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" + "intel/17.0.1 $NONGCC_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" + "clang/3.5.2 $NONGCC_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS" + "clang/3.6.1 $NONGCC_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS" ) fi @@ -519,6 +455,7 @@ elif [ "$MACHINE" = "kokkos-dev-2" ]; then if [ "$SPOT_CHECK" = "True" ]; then # Format: (compiler module-list build-list exe-name warning-flag) COMPILERS=("gcc/7.3.0 $BASE_MODULE_LIST "OpenMP,Pthread" g++ $GCC_WARNING_FLAGS" + "gcc/8.3.0 $BASE_MODULE_LIST "OpenMP" g++ $GCC_WARNING_FLAGS" "gcc/9.1 $GCC91_MODULE_LIST "OpenMP,Serial" g++ $GCC_WARNING_FLAGS" "intel/18.0.5 $BASE_MODULE_LIST "OpenMP" icpc $INTEL_WARNING_FLAGS" "clang/8.0 $CLANG8_MODULE_LIST "Cuda_OpenMP,Pthread_Serial" clang++ $CLANG_WARNING_FLAGS" @@ -535,7 +472,9 @@ elif [ "$MACHINE" = "kokkos-dev-2" ]; then "gcc/5.3.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" "gcc/6.1.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" "gcc/7.3.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" - "gcc/9.1 $GCC91_MODULE_LIST "OpenMP,Serial" g++ $GCC_WARNING_FLAGS" + "gcc/8.3.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" + "gcc/9.1 $GCC91_MODULE_LIST "$GCC_BUILD_LIST" g++ $GCC_WARNING_FLAGS" + "gcc/9.2.0 $BASE_MODULE_LIST "$GCC_BUILD_LIST" g++ $GCC_WARNING_FLAGS" "intel/15.0.2 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" "intel/16.0.1 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" "intel/17.0.1 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" @@ -544,6 +483,7 @@ elif [ "$MACHINE" = "kokkos-dev-2" ]; then "clang/3.5.2 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS" "clang/5.0.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS" "clang/7.0.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS" + "clang/9.0.0 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS" "pgi/19.4 $PGI_MODULE_LIST $GCC_BUILD_LIST pgc++ $PGI_WARNING_FLAGS" ) fi @@ -566,21 +506,60 @@ declare -i NUM_RESULTS_TO_KEEP=7 RESULT_ROOT_PREFIX=TestAll if [ "$PRINT_HELP" = "True" ]; then - print_help - exit 0 -fi + echo "test_all_sandia :" + echo "--kokkos-path=/Path/To/Kokkos: Path to the Kokkos root directory" + echo " Defaults to root repo containing this script" + echo "--debug: Run tests in debug. Defaults to False" + echo "--test-script: Test this script, not Kokkos" + echo "--skip-hwloc: Do not do hwloc tests" + echo "--num=N: Number of jobs to run in parallel" + echo "--spot-check: Minimal test set to issue pull request" + echo "--dry-run: Just print what would be executed" + echo "--build-only: Just do builds, don't run anything" + echo "--opt-flag=FLAG: Optimization flag (default: -O3)" + echo "--cxxflags-extra=FLAGS: Extra flags to be added to CXX_FLAGS" + echo "--cxxstandard=OPT: c++11 (default), c++14, c++17, c++1y, c++1z, c++2a" + echo "--ldflags-extra=FLAGS: Extra flags to be added to LD_FLAGS" + echo "--arch=ARCHITECTURE: overwrite architecture flags" + echo "--with-cuda-options=OPT: set KOKKOS_CUDA_OPTIONS" + echo "--build-list=BUILD,BUILD,BUILD..." + echo " Provide a comma-separated list of builds instead of running all builds" + echo " Valid items:" + echo " OpenMP, Pthread, Serial, OpenMP_Serial, Pthread_Serial" + echo " Cuda_OpenMP, Cuda_Pthread, Cuda_Serial" + echo "" -UNCOMMITTED=`cd ${KOKKOS_PATH}; git status --porcelain 2>/dev/null` -if ! [ -z "$UNCOMMITTED" ]; then - echo "WARNING!! THE FOLLOWING CHANGES ARE UNCOMMITTED!! :" - echo "$UNCOMMITTED" + echo "ARGS: list of expressions matching compilers to test" + echo " supported compilers sems" + for COMPILER_DATA in "${COMPILERS[@]}"; do + ARR=($COMPILER_DATA) + COMPILER=${ARR[0]} + echo " $COMPILER" + done echo "" -fi -GITSTATUS=`cd ${KOKKOS_PATH}; git log -n 1 --format=oneline` -echo "Repository Status: " ${GITSTATUS} -echo "" -echo "" + echo "Examples:" + echo " Run all tests" + echo " % test_all_sandia" + echo "" + echo " Run all gcc tests" + echo " % test_all_sandia gcc" + echo "" + echo " Run all gcc/4.8.4 and all intel tests" + echo " % test_all_sandia gcc/4.8.4 intel" + echo "" + echo " Run all tests in debug" + echo " % test_all_sandia --debug" + echo "" + echo " Run gcc/4.8.4 and only do OpenMP and OpenMP_Serial builds" + echo " % test_all_sandia gcc/4.8.4 --build-list=OpenMP,OpenMP_Serial" + echo "" + echo "If you want to kill the tests, do:" + echo " hit ctrl-z" + echo " % kill -9 %1" + echo + exit 0 +fi # Set build type. if [ "$DEBUG" = "True" ]; then @@ -608,18 +587,10 @@ for ARG in $ARGS; do else echo "Tried to add $COMPILER twice" fi - fi + fi done done -if [ "$COMPILERS_TO_TEST" == "" ]; then - echo "-----------------------------------------------" - echo " !!!! Invalid Compiler provided '$ARGS' !!!!" - echo "-----------------------------------------------" - print_help - exit 0 -fi - # # Functions. # @@ -715,10 +686,6 @@ setup_env() { module list 2>&1 | grep "$mod" >& /dev/null || return 1 done - if [ -e ${CM_ALL_SCRIPT_PATH}/update_lib.sh ]; then - echo calling ${CM_ALL_SCRIPT_PATH}/update_lib.sh $MACHINE - source ${CM_ALL_SCRIPT_PATH}/update_lib.sh $MACHINE - fi return 0 } @@ -737,14 +704,8 @@ single_build_and_test() { local compiler_warning_flags=$(get_compiler_warning_flags $compiler) local compiler_exe=$(get_compiler_exe_name $compiler) - if [[ "$KOKKOS_OPTIONS" == "" ]]; then - local extra_args="--with-options=enable_large_mem_tests" - else - local extra_args= - fi - if [[ "$build_type" = hwloc* ]]; then - local extra_args="$extra_args --with-hwloc=$(dirname $(dirname $(which hwloc-info)))" + local extra_args=--with-hwloc=$(dirname $(dirname $(which hwloc-info))) fi if [[ "$OPT_FLAG" = "" ]]; then @@ -754,15 +715,25 @@ single_build_and_test() { if [[ "$build_type" = *debug* ]]; then local extra_args="$extra_args --debug" local cxxflags="-g $compiler_warning_flags" + local ldflags="-g" else local cxxflags="$OPT_FLAG $compiler_warning_flags" + local ldflags="${OPT_FLAG}" fi local cxxflags="${cxxflags} ${CXX_FLAGS_EXTRA}" - local ldflags="${LD_FLAGS_EXTRA}" + local ldflags="${ldflags} ${LD_FLAGS_EXTRA}" local cxx_standard="${CXX_STANDARD}" + if [[ "$KOKKOS_CUDA_OPTIONS" != "" ]]; then + local extra_args="$extra_args $KOKKOS_CUDA_OPTIONS" + fi + if [[ "$KOKKOS_OPTIONS" != "" ]]; then + local extra_args="$extra_args $KOKKOS_OPTIONS" + else + local extra_args="$extra_args --with-options=enable_large_mem_tests" + fi echo " Starting job $desc" @@ -776,16 +747,9 @@ single_build_and_test() { run_cmd ls fake_problem >& ${desc}.configure.log || { report_and_log_test_result 1 $desc configure && return 0; } fi else - LOCAL_KOKKOS_DEVICES=${build//_/,} - if [[ "$LOCAL_KOKKOS_DEVICES" = *Cuda* ]]; then - CUDA_ENABLE_CMD="--with-cuda=$CUDA_ROOT" - fi - echo "kokkos options: ${KOKKOS_OPTIONS}" - echo "kokkos devices: ${LOCAL_KOKKOS_DEVICES}" - echo "kokkos cxx: ${cxxflags}" - run_cmd ${KOKKOS_PATH}/cm_generate_makefile.bash --with-devices=$LOCAL_KOKKOS_DEVICES $ARCH_FLAG --compiler=$(which $compiler_exe) --cxxflags=\"$cxxflags\" --cxxstandard=\"$cxx_standard\" --ldflags=\"$ldflags\" $CUDA_ENABLE_CMD --kokkos-path=${KOKKOS_PATH} --no-examples $extra_args &>> ${desc}.configure.log || { report_and_log_test_result 1 ${desc} configure && return 0; } + run_cmd ${KOKKOS_PATH}/scripts/testing_scripts/gnu_generate_makefile.bash --with-devices=$build $ARCH_FLAG --compiler=$(which $compiler_exe) --cxxflags=\"$cxxflags\" --cxxstandard=\"$cxx_standard\" --ldflags=\"$ldflags\" $extra_args &>> ${desc}.configure.log || { report_and_log_test_result 1 ${desc} configure && return 0; } local -i build_start_time=$(date +%s) - run_cmd make -j 48 all >& ${desc}.build.log || { report_and_log_test_result 1 ${desc} build && return 0; } + run_cmd make -j 48 build-test >& ${desc}.build.log || { report_and_log_test_result 1 ${desc} build && return 0; } local -i build_end_time=$(date +%s) comment="build_time=$(($build_end_time-$build_start_time))" @@ -905,10 +869,6 @@ wait_summarize_and_exit() { # Main. # -CM_ALL_SCRIPT=$0 -CM_ALL_SCRIPT_PATH=`pwd` -CM_ALL_SCRIPT_PATH=${CM_ALL_SCRIPT_PATH}/`dirname $CM_ALL_SCRIPT` - ROOT_DIR=$(get_test_root_dir) mkdir -p $ROOT_DIR cd $ROOT_DIR diff --git a/scripts/testing_scripts/test_all_sandia b/scripts/testing_scripts/test_all_sandia index 7113a49134b..c41a83641c4 100755 --- a/scripts/testing_scripts/test_all_sandia +++ b/scripts/testing_scripts/test_all_sandia @@ -8,15 +8,77 @@ set -o pipefail # Determine current machine. +print_help() { + echo "test_all_sandia :" + echo "--kokkos-path=/Path/To/Kokkos: Path to the Kokkos root directory" + echo " Defaults to root repo containing this script" + echo "--debug: Run tests in debug. Defaults to False" + echo "--test-script: Test this script, not Kokkos" + echo "--skip-hwloc: Do not do hwloc tests" + echo "--num=N: Number of jobs to run in parallel" + echo "--spot-check: Minimal test set to issue pull request" + echo "--dry-run: Just print what would be executed" + echo "--build-only: Just do builds, don't run anything" + echo "--opt-flag=FLAG: Optimization flag (default: -O3)" + echo "--cxxflags-extra=FLAGS: Extra flags to be added to CXX_FLAGS" + echo "--cxxstandard=OPT: c++11 (default), c++14, c++17, c++1y, c++1z, c++2a" + echo "--ldflags-extra=FLAGS: Extra flags to be added to LD_FLAGS" + echo "--arch=ARCHITECTURE: overwrite architecture flags" + echo "--with-cuda-options=OPT: set KOKKOS_CUDA_OPTIONS" + echo "--with-options=OPT: set KOKKOS_OPTIONS" + echo "--build-list=BUILD,BUILD,BUILD..." + echo " Provide a comma-separated list of builds instead of running all builds" + echo " Valid items:" + echo " OpenMP, Pthread, Serial, OpenMP_Serial, Pthread_Serial" + echo " Cuda_OpenMP, Cuda_Pthread, Cuda_Serial" + echo "" + + echo "ARGS: list of expressions matching compilers to test" + echo " supported compilers sems" + for COMPILER_DATA in "${COMPILERS[@]}"; do + ARR=($COMPILER_DATA) + COMPILER=${ARR[0]} + echo " $COMPILER" + done + echo "" + + echo "Examples:" + echo " Run all tests" + echo " % test_all_sandia" + echo "" + echo " Run all gcc tests" + echo " % test_all_sandia gcc" + echo "" + echo " Run all gcc/4.8.4 and all intel tests" + echo " % test_all_sandia gcc/4.8.4 intel" + echo "" + echo " Run all tests in debug" + echo " % test_all_sandia --debug" + echo "" + echo " Run gcc/4.8.4 and only do OpenMP and OpenMP_Serial builds" + echo " % test_all_sandia gcc/4.8.4 --build-list=OpenMP,OpenMP_Serial" + echo "" + echo "If you want to kill the tests, do:" + echo " hit ctrl-z" + echo " % kill -9 %1" + echo +} + MACHINE="" HOSTNAME=$(hostname) PROCESSOR=`uname -p` +CUDA_ENABLE_CMD= if [[ "$HOSTNAME" =~ (white|ride).* ]]; then MACHINE=white module load git fi +if [[ "$HOSTNAME" =~ waterman.* ]]; then + MACHINE=waterman + module load git +fi + if [[ "$HOSTNAME" =~ .*bowman.* ]]; then MACHINE=bowman module load git @@ -70,12 +132,13 @@ CLANG_BUILD_LIST="Pthread,Serial,Pthread_Serial" CUDA_BUILD_LIST="Cuda_OpenMP,Cuda_Pthread,Cuda_Serial" CUDA_IBM_BUILD_LIST="Cuda_OpenMP,Cuda_Serial" -GCC_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wignored-qualifiers,-Wempty-body,-Wclobbered,-Wuninitialized" -IBM_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Wsign-compare,-Wtype-limits,-Wuninitialized" -CLANG_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized" -INTEL_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized" -CUDA_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized" -#CUDA_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Wsign-compare,-Wtype-limits,-Wuninitialized" +GCC_WARNING_FLAGS="-Wall,-Wunused-parameter,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wignored-qualifiers,-Wempty-body,-Wclobbered,-Wuninitialized" +IBM_WARNING_FLAGS="-Wall,-Wunused-parameter,-Wshadow,-pedantic,-Wsign-compare,-Wtype-limits,-Wuninitialized" +CLANG_WARNING_FLAGS="-Wall,-Wunused-parameter,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized" +INTEL_WARNING_FLAGS="-Wall,-Wunused-parameter,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized" +INTEL15_WARNING_FLAGS="-Wall,-Wno-unused-variable,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized" +CUDA_WARNING_FLAGS="-Wall,-Wunused-parameter,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized" +#CUDA_WARNING_FLAGS="-Wall,-Wunused-parameter,-Wshadow,-pedantic,-Wsign-compare,-Wtype-limits,-Wuninitialized" PGI_WARNING_FLAGS="" # Default. Machine specific can override. @@ -95,7 +158,7 @@ CXX_FLAGS_EXTRA="" LD_FLAGS_EXTRA="" KOKKOS_OPTIONS="" -CXX_STANDARD="c++11" +CXX_STANDARD="11" # # Handle arguments. @@ -140,16 +203,23 @@ do OPT_FLAG="${key#*=}" ;; --with-cuda-options*) - KOKKOS_CUDA_OPTIONS="--with-cuda-options=${key#*=}" + KOKKOS_CUDA_OPTIONS="${key#*=}" + export KOKKOS_CUDA_OPTIONS ;; --with-options*) - KOKKOS_OPTIONS="--with-options=${key#*=}" + KOKKOS_OPTIONS="${key#*=}" + export KOKKOS_OPTIONS ;; --cxxflags-extra*) CXX_FLAGS_EXTRA="${key#*=}" ;; --cxxstandard*) - CXX_STANDARD="${key#*=}" + FULL_CXX_STANDARD="${key#*=}" + if [[ ${FULL_CXX_STANDARD} == *++* ]]; then + CXX_STANDARD="${FULL_CXX_STANDARD#*++}" + else + CXX_STANDARD="${FULL_CXX_STANDARD}" + fi ;; --ldflags-extra*) LD_FLAGS_EXTRA="${key#*=}" @@ -176,17 +246,7 @@ else KOKKOS_PATH=$( cd $KOKKOS_PATH && pwd ) fi -UNCOMMITTED=`cd ${KOKKOS_PATH}; git status --porcelain 2>/dev/null` -if ! [ -z "$UNCOMMITTED" ]; then - echo "WARNING!! THE FOLLOWING CHANGES ARE UNCOMMITTED!! :" - echo "$UNCOMMITTED" - echo "" -fi -GITSTATUS=`cd ${KOKKOS_PATH}; git log -n 1 --format=oneline` -echo "Repository Status: " ${GITSTATUS} -echo "" -echo "" # # Machine specific config. @@ -199,8 +259,9 @@ if [ "$MACHINE" = "sems" ]; then # On rhel7 sems machines gcc/7.3.0, clang/4.0.1, and intel/16.0.3 are missing # Remove kokkkos-env module use - BASE_MODULE_LIST="sems-env,sems-/" - CUDA9_MODULE_LIST="sems-env,sems-/,sems-gcc/7.2.0" + module load sems-cmake/3.12.2 + BASE_MODULE_LIST="sems-env,sems-cmake/3.12.2,sems-/" + CUDA9_MODULE_LIST="sems-env,sems-cmake/3.12.2,sems-/,sems-gcc/7.2.0" SKIP_HWLOC=True # No sems hwloc module @@ -223,7 +284,7 @@ if [ "$MACHINE" = "sems" ]; then "gcc/5.3.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" "gcc/6.1.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" "gcc/7.2.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" - "intel/15.0.2 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" + "intel/15.0.2 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL15_WARNING_FLAGS" "intel/16.0.1 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" "intel/17.0.1 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" "clang/3.6.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS" @@ -236,10 +297,13 @@ if [ "$MACHINE" = "sems" ]; then elif [ "$MACHINE" = "kokkos-dev" ]; then source /projects/sems/modulefiles/utils/sems-modules-init.sh - BASE_MODULE_LIST="sems-env,kokkos-env,kokkos-hwloc/1.10.1/base,sems-/" + module load sems-cmake/3.12.2 + BASE_MODULE_LIST="sems-env,sems-cmake/3.12.2,kokkos-env,kokkos-hwloc/1.10.1/base,sems-/" + CUDA9_MODULE_LIST="sems-env,sems-cmake/3.12.2,kokkos-env,sems-/,sems-gcc/6.1.0,kokkos-hwloc/1.10.1/base" + CLANG7_MODULE_LIST="sems-env,sems-cmake/3.12.2,kokkos-env,kokkos-hwloc/1.10.1/base,sems-/,sems-cuda/9.2" if [ -z "$ARCH_FLAG" ]; then - ARCH_FLAG="" + ARCH_FLAG="--arch=Kepler35" fi if [ "$SPOT_CHECK" = "True" ]; then @@ -248,6 +312,8 @@ elif [ "$MACHINE" = "kokkos-dev" ]; then "gcc/7.3.0 $BASE_MODULE_LIST "Serial" g++ $GCC_WARNING_FLAGS" "intel/17.0.1 $BASE_MODULE_LIST "OpenMP" icpc $INTEL_WARNING_FLAGS" "clang/4.0.1 $BASE_MODULE_LIST "Pthread_Serial" clang++ $CLANG_WARNING_FLAGS" + "clang/7.0.1 $CLANG7_MODULE_LIST "Cuda_OpenMP" clang++ $CLANG_WARNING_FLAGS" + "cuda/9.2 $CUDA9_MODULE_LIST "Cuda_OpenMP" $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS" ) else # Format: (compiler module-list build-list exe-name warning-flag) @@ -256,7 +322,7 @@ elif [ "$MACHINE" = "kokkos-dev" ]; then "gcc/5.3.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" "gcc/6.1.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" "gcc/7.3.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" - "intel/15.0.2 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" + "intel/15.0.2 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL15_WARNING_FLAGS" "intel/16.0.3 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" "intel/17.0.1 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" "clang/3.6.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS" @@ -264,6 +330,8 @@ elif [ "$MACHINE" = "kokkos-dev" ]; then "clang/3.8.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS" "clang/3.9.0 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS" "clang/4.0.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS" + "clang/7.0.1 $CLANG7_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS" + "cuda/9.2 $CUDA9_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS" ) fi elif [ "$MACHINE" = "white" ]; then @@ -271,10 +339,10 @@ elif [ "$MACHINE" = "white" ]; then SKIP_HWLOC=True export SLURM_TASKS_PER_NODE=32 - BASE_MODULE_LIST="/" - IBM_MODULE_LIST="/xl/,gcc/7.2.0" - CUDA_MODULE_LIST="/,gcc/7.2.0,ibm/xl/16.1.0" - CUDA10_MODULE_LIST="/,gcc/7.4.0,ibm/xl/16.1.0" + BASE_MODULE_LIST="cmake/3.12.3,/" + IBM_MODULE_LIST="cmake/3.12.3,/xl/,gcc/7.2.0" + CUDA_MODULE_LIST="cmake/3.12.3,/,gcc/7.2.0,ibm/xl/16.1.1" + CUDA10_MODULE_LIST="cmake/3.12.3,/,gcc/7.4.0,ibm/xl/16.1.1" # Don't do pthread on white. GCC_BUILD_LIST="OpenMP,Serial,OpenMP_Serial" @@ -283,17 +351,19 @@ elif [ "$MACHINE" = "white" ]; then # Format: (compiler module-list build-list exe-name warning-flag) COMPILERS=("gcc/6.4.0 $BASE_MODULE_LIST "OpenMP_Serial" g++ $GCC_WARNING_FLAGS" "gcc/7.2.0 $BASE_MODULE_LIST $IBM_BUILD_LIST g++ $GCC_WARNING_FLAGS" - "ibm/16.1.0 $IBM_MODULE_LIST $IBM_BUILD_LIST xlC $IBM_WARNING_FLAGS" + "ibm/16.1.1 $IBM_MODULE_LIST $IBM_BUILD_LIST xlC $IBM_WARNING_FLAGS" "cuda/9.2.88 $CUDA_MODULE_LIST $CUDA_IBM_BUILD_LIST ${KOKKOS_PATH}/bin/nvcc_wrapper $CUDA_WARNING_FLAGS" + "cuda/10.1.105 $CUDA10_MODULE_LIST $CUDA_IBM_BUILD_LIST ${KOKKOS_PATH}/bin/nvcc_wrapper $CUDA_WARNING_FLAGS" ) else # Format: (compiler module-list build-list exe-name warning-flag) COMPILERS=("gcc/6.4.0 $BASE_MODULE_LIST $IBM_BUILD_LIST g++ $GCC_WARNING_FLAGS" "gcc/7.2.0 $BASE_MODULE_LIST $IBM_BUILD_LIST g++ $GCC_WARNING_FLAGS" - "ibm/16.1.0 $IBM_MODULE_LIST $IBM_BUILD_LIST xlC $IBM_WARNING_FLAGS" + "gcc/7.4.0 $BASE_MODULE_LIST $IBM_BUILD_LIST g++ $GCC_WARNING_FLAGS" "ibm/16.1.1 $IBM_MODULE_LIST $IBM_BUILD_LIST xlC $IBM_WARNING_FLAGS" "cuda/9.2.88 $CUDA_MODULE_LIST $CUDA_IBM_BUILD_LIST ${KOKKOS_PATH}/bin/nvcc_wrapper $CUDA_WARNING_FLAGS" "cuda/10.0.130 $CUDA10_MODULE_LIST $CUDA_IBM_BUILD_LIST ${KOKKOS_PATH}/bin/nvcc_wrapper $CUDA_WARNING_FLAGS" + "cuda/10.1.105 $CUDA10_MODULE_LIST $CUDA_IBM_BUILD_LIST ${KOKKOS_PATH}/bin/nvcc_wrapper $CUDA_WARNING_FLAGS" ) fi @@ -301,12 +371,49 @@ elif [ "$MACHINE" = "white" ]; then ARCH_FLAG="--arch=Power8,Kepler37" fi +elif [ "$MACHINE" = "waterman" ]; then + source /etc/profile.d/modules.sh + SKIP_HWLOC=True + + BASE_MODULE_LIST="cmake/3.12.3,/" + IBM_MODULE_LIST="cmake/3.12.3,/xl/,gcc/7.2.0" + CUDA_MODULE_LIST="cmake/3.12.3,/,gcc/7.2.0,ibm/xl/16.1.1" + CUDA10_MODULE_LIST="cmake/3.12.3,/,gcc/7.4.0,ibm/xl/16.1.1" + + # Don't do pthread on white. + GCC_BUILD_LIST="OpenMP,Serial,OpenMP_Serial" + + if [ "$SPOT_CHECK" = "True" ]; then + # Format: (compiler module-list build-list exe-name warning-flag) + COMPILERS=("gcc/6.4.0 $BASE_MODULE_LIST "OpenMP_Serial" g++ $GCC_WARNING_FLAGS" + "gcc/7.2.0 $BASE_MODULE_LIST $IBM_BUILD_LIST g++ $GCC_WARNING_FLAGS" + "ibm/16.1.1 $IBM_MODULE_LIST "Serial" xlC $IBM_WARNING_FLAGS" + "cuda/9.2.88 $CUDA_MODULE_LIST $CUDA_IBM_BUILD_LIST ${KOKKOS_PATH}/bin/nvcc_wrapper $CUDA_WARNING_FLAGS" + "cuda/10.1.243 $CUDA10_MODULE_LIST $CUDA_IBM_BUILD_LIST ${KOKKOS_PATH}/bin/nvcc_wrapper $CUDA_WARNING_FLAGS" + ) + else + # Format: (compiler module-list build-list exe-name warning-flag) + COMPILERS=("gcc/6.4.0 $BASE_MODULE_LIST $IBM_BUILD_LIST g++ $GCC_WARNING_FLAGS" + "gcc/7.2.0 $BASE_MODULE_LIST $IBM_BUILD_LIST g++ $GCC_WARNING_FLAGS" + "gcc/7.4.0 $BASE_MODULE_LIST $IBM_BUILD_LIST g++ $GCC_WARNING_FLAGS" + "ibm/16.1.1 $IBM_MODULE_LIST $IBM_BUILD_LIST xlC $IBM_WARNING_FLAGS" + "cuda/9.2.88 $CUDA_MODULE_LIST $CUDA_IBM_BUILD_LIST ${KOKKOS_PATH}/bin/nvcc_wrapper $CUDA_WARNING_FLAGS" + "cuda/10.0.130 $CUDA10_MODULE_LIST $CUDA_IBM_BUILD_LIST ${KOKKOS_PATH}/bin/nvcc_wrapper $CUDA_WARNING_FLAGS" + "cuda/10.1.105 $CUDA10_MODULE_LIST $CUDA_IBM_BUILD_LIST ${KOKKOS_PATH}/bin/nvcc_wrapper $CUDA_WARNING_FLAGS" + "cuda/10.1.243 $CUDA10_MODULE_LIST $CUDA_IBM_BUILD_LIST ${KOKKOS_PATH}/bin/nvcc_wrapper $CUDA_WARNING_FLAGS" + ) + fi + + if [ -z "$ARCH_FLAG" ]; then + ARCH_FLAG="--arch=Power9,Volta70" + fi + elif [ "$MACHINE" = "bowman" ]; then source /etc/profile.d/modules.sh SKIP_HWLOC=True export SLURM_TASKS_PER_NODE=32 - BASE_MODULE_LIST="/compilers/" + BASE_MODULE_LIST="cmake/3.12.3,/compilers/" OLD_INTEL_BUILD_LIST="Pthread,Serial,Pthread_Serial" @@ -324,12 +431,12 @@ elif [ "$MACHINE" = "mayer" ]; then SKIP_HWLOC=True export SLURM_TASKS_PER_NODE=96 - BASE_MODULE_LIST="/" -# ARM_MODULE_LIST="/compilers/" + BASE_MODULE_LIST="cmake/3.14.5,/" +# ARM_MODULE_LIST="cmake/3.12.2,/" # Format: (compiler module-list build-list exe-name warning-flag) COMPILERS=("gnu7/7.2.0 $BASE_MODULE_LIST $ARM_GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" - "arm/19.2 $BASE_MODULE_LIST $ARM_GCC_BUILD_LIST armclang++ $CLANG_WARNING_FLAGS") + "arm/20.0 $BASE_MODULE_LIST $ARM_GCC_BUILD_LIST armclang++ $CLANG_WARNING_FLAGS") if [ -z "$ARCH_FLAG" ]; then ARCH_FLAG="--arch=ARMv8-TX2" @@ -340,6 +447,8 @@ elif [ "$MACHINE" = "blake" ]; then SKIP_HWLOC=True export SLURM_TASKS_PER_NODE=32 + module load cmake/3.12.3 + BASE_MODULE_LIST="/" BASE_MODULE_LIST_INTEL="/compilers/" @@ -372,22 +481,22 @@ elif [ "$MACHINE" = "apollo" ]; then module load sems-git module load sems-tex - module load sems-cmake/3.5.2 + module load sems-cmake/3.12.2 module load sems-gdb module load binutils SKIP_HWLOC=True - GCC_MODULE_LIST="sems-env,kokkos-env,kokkos-hwloc/1.10.1/base,sems-/" - NONGCC_MODULE_LIST="sems-env,kokkos-env,sems-gcc/5.3.0,sems-/,kokkos-hwloc/1.10.1/base" - CUDA_MODULE_LIST="sems-env,kokkos-env,kokkos-/,sems-gcc/4.8.4,kokkos-hwloc/1.10.1/base" - CUDA8_MODULE_LIST="sems-env,kokkos-env,kokkos-/,sems-gcc/5.3.0,kokkos-hwloc/1.10.1/base" - CUDA10_MODULE_LIST="sems-env,kokkos-env,/,sems-gcc/5.3.0,kokkos-hwloc/1.10.1/base" + BASE_MODULE_LIST="sems-env,sems-cmake/3.12.2,kokkos-env,kokkos-hwloc/1.10.1/base,sems-/" + CUDA_MODULE_LIST="sems-env,sems-cmake/3.12.2,kokkos-env,kokkos-/,sems-gcc/4.8.4,kokkos-hwloc/1.10.1/base" + CUDA9_MODULE_LIST="sems-env,sems-cmake/3.12.2,kokkos-env,kokkos-/,sems-gcc/5.3.0,kokkos-hwloc/1.10.1/base" + CUDA10_MODULE_LIST="sems-env,sems-cmake/3.12.2,kokkos-env,/,sems-gcc/5.3.0,kokkos-hwloc/1.10.1/base" - CLANG_MODULE_LIST="sems-env,kokkos-env,/,cuda/9.0.69" - CLANG7_MODULE_LIST="sems-env,kokkos-env,/,cuda/9.1" - NVCC_MODULE_LIST="sems-env,kokkos-env,/,sems-gcc/5.3.0" - HPX_MODULE_LIST="sems-env,kokkos-env,hpx/1.2.1,sems-gcc/6.1.0,binutils" + CLANG_MODULE_LIST="sems-env,sems-cmake/3.12.2,kokkos-env,/,cuda/9.0.69" + CLANG7_MODULE_LIST="sems-env,sems-cmake/3.12.2,kokkos-env,sems-gcc/6.1.0,/,cuda/9.1" + NVCC_MODULE_LIST="sems-env,sems-cmake/3.12.2,kokkos-env,/,sems-gcc/5.3.0" + HPX_MODULE_LIST="sems-env,sems-cmake/3.12.2,kokkos-env,hpx/1.2.1,sems-gcc/6.1.0,binutils" + HPX3_MODULE_LIST="sems-env,sems-cmake/3.12.2,kokkos-env,compilers/hpx/1.3.0,sems-gcc/6.1.0,binutils" BUILD_LIST_CUDA_NVCC="Cuda_Serial,Cuda_OpenMP" BUILD_LIST_CUDA_CLANG="Cuda_Serial,Cuda_Pthread" @@ -395,30 +504,32 @@ elif [ "$MACHINE" = "apollo" ]; then if [ "$SPOT_CHECK" = "True" ]; then # Format: (compiler module-list build-list exe-name warning-flag) - COMPILERS=("gcc/4.8.4 $GCC_MODULE_LIST "OpenMP,Pthread" g++ $GCC_WARNING_FLAGS" - "gcc/5.3.0 $GCC_MODULE_LIST "Serial" g++ $GCC_WARNING_FLAGS" - "intel/16.0.1 $NONGCC_MODULE_LIST "OpenMP" icpc $INTEL_WARNING_FLAGS" - "clang/3.9.0 $NONGCC_MODULE_LIST "Pthread_Serial" clang++ $CLANG_WARNING_FLAGS" + COMPILERS=("gcc/4.8.4 $BASE_MODULE_LIST "OpenMP,Pthread" g++ $GCC_WARNING_FLAGS" + "gcc/5.3.0 $BASE_MODULE_LIST "Serial" g++ $GCC_WARNING_FLAGS" + "intel/16.0.1 $BASE_MODULE_LIST "OpenMP" icpc $INTEL_WARNING_FLAGS" + "clang/3.9.0 $BASE_MODULE_LIST "Pthread_Serial" clang++ $CLANG_WARNING_FLAGS" "clang/6.0 $CLANG_MODULE_LIST "Cuda_Pthread,OpenMP" clang++ $CUDA_WARNING_FLAGS" "cuda/9.1 $CUDA_MODULE_LIST "Cuda_OpenMP" $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS" - "hpx/1.2.1 $HPX_MODULE_LIST "HPX" g++ $PGI_WARNING_FLAGS" + "hpx/1.3.0 $HPX3_MODULE_LIST "HPX" g++ $PGI_WARNING_FLAGS" ) else # Format: (compiler module-list build-list exe-name warning-flag) - COMPILERS=("cuda/9.1 $CUDA8_MODULE_LIST $BUILD_LIST_CUDA_NVCC $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS" + COMPILERS=("cuda/9.1 $CUDA9_MODULE_LIST $BUILD_LIST_CUDA_NVCC $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS" "cuda/10.0 $CUDA10_MODULE_LIST $BUILD_LIST_CUDA_NVCC $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS" "clang/6.0 $CLANG_MODULE_LIST $BUILD_LIST_CUDA_CLANG clang++ $CUDA_WARNING_FLAGS" "clang/7.0 $CLANG7_MODULE_LIST $BUILD_LIST_CUDA_CLANG clang++ $CUDA_WARNING_FLAGS" "clang/3.9.0 $CLANG_MODULE_LIST $BUILD_LIST_CLANG clang++ $CLANG_WARNING_FLAGS" - "gcc/4.8.4 $GCC_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" - "gcc/4.9.3 $GCC_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" - "gcc/5.3.0 $GCC_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" - "gcc/6.1.0 $GCC_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" - "intel/15.0.2 $NONGCC_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" - "intel/16.0.1 $NONGCC_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" - "intel/17.0.1 $NONGCC_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" - "clang/3.5.2 $NONGCC_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS" - "clang/3.6.1 $NONGCC_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS" + "gcc/4.8.4 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" + "gcc/4.9.3 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" + "gcc/5.3.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" + "gcc/6.1.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" + "intel/15.0.2 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL15_WARNING_FLAGS" + "intel/16.0.1 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" + "intel/17.0.1 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" + "clang/3.5.2 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS" + "clang/3.6.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS" + "hpx/1.2.1 $HPX_MODULE_LIST "HPX" g++ $PGI_WARNING_FLAGS" + "hpx/1.3.0 $HPX3_MODULE_LIST "HPX" g++ $PGI_WARNING_FLAGS" ) fi @@ -455,6 +566,7 @@ elif [ "$MACHINE" = "kokkos-dev-2" ]; then if [ "$SPOT_CHECK" = "True" ]; then # Format: (compiler module-list build-list exe-name warning-flag) COMPILERS=("gcc/7.3.0 $BASE_MODULE_LIST "OpenMP,Pthread" g++ $GCC_WARNING_FLAGS" + "gcc/8.3.0 $BASE_MODULE_LIST "OpenMP" g++ $GCC_WARNING_FLAGS" "gcc/9.1 $GCC91_MODULE_LIST "OpenMP,Serial" g++ $GCC_WARNING_FLAGS" "intel/18.0.5 $BASE_MODULE_LIST "OpenMP" icpc $INTEL_WARNING_FLAGS" "clang/8.0 $CLANG8_MODULE_LIST "Cuda_OpenMP,Pthread_Serial" clang++ $CLANG_WARNING_FLAGS" @@ -471,15 +583,21 @@ elif [ "$MACHINE" = "kokkos-dev-2" ]; then "gcc/5.3.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" "gcc/6.1.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" "gcc/7.3.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" - "gcc/9.1 $GCC91_MODULE_LIST "OpenMP,Serial" g++ $GCC_WARNING_FLAGS" - "intel/15.0.2 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" + "gcc/8.3.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" + "gcc/9.1 $GCC91_MODULE_LIST "$GCC_BUILD_LIST" g++ $GCC_WARNING_FLAGS" + "gcc/9.2.0 $BASE_MODULE_LIST "$GCC_BUILD_LIST" g++ $GCC_WARNING_FLAGS" + "intel/15.0.2 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL15_WARNING_FLAGS" "intel/16.0.1 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" "intel/17.0.1 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" "intel/18.0.5 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" "intel/19.0.5 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" - "clang/3.5.2 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS" + "clang/3.6.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS" + "clang/3.7.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS" + "clang/3.8.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS" + "clang/3.9.0 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS" "clang/5.0.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS" "clang/7.0.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS" + "clang/9.0.0 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS" "pgi/19.4 $PGI_MODULE_LIST $GCC_BUILD_LIST pgc++ $PGI_WARNING_FLAGS" ) fi @@ -502,60 +620,22 @@ declare -i NUM_RESULTS_TO_KEEP=7 RESULT_ROOT_PREFIX=TestAll if [ "$PRINT_HELP" = "True" ]; then - echo "test_all_sandia :" - echo "--kokkos-path=/Path/To/Kokkos: Path to the Kokkos root directory" - echo " Defaults to root repo containing this script" - echo "--debug: Run tests in debug. Defaults to False" - echo "--test-script: Test this script, not Kokkos" - echo "--skip-hwloc: Do not do hwloc tests" - echo "--num=N: Number of jobs to run in parallel" - echo "--spot-check: Minimal test set to issue pull request" - echo "--dry-run: Just print what would be executed" - echo "--build-only: Just do builds, don't run anything" - echo "--opt-flag=FLAG: Optimization flag (default: -O3)" - echo "--cxxflags-extra=FLAGS: Extra flags to be added to CXX_FLAGS" - echo "--ldflags-extra=FLAGS: Extra flags to be added to LD_FLAGS" - echo "--arch=ARCHITECTURE: overwrite architecture flags" - echo "--with-cuda-options=OPT: set KOKKOS_CUDA_OPTIONS" - echo "--build-list=BUILD,BUILD,BUILD..." - echo " Provide a comma-separated list of builds instead of running all builds" - echo " Valid items:" - echo " OpenMP, Pthread, Serial, OpenMP_Serial, Pthread_Serial" - echo " Cuda_OpenMP, Cuda_Pthread, Cuda_Serial" - echo "" - - echo "ARGS: list of expressions matching compilers to test" - echo " supported compilers sems" - for COMPILER_DATA in "${COMPILERS[@]}"; do - ARR=($COMPILER_DATA) - COMPILER=${ARR[0]} - echo " $COMPILER" - done - echo "" + print_help + exit 0 +fi - echo "Examples:" - echo " Run all tests" - echo " % test_all_sandia" - echo "" - echo " Run all gcc tests" - echo " % test_all_sandia gcc" - echo "" - echo " Run all gcc/4.8.4 and all intel tests" - echo " % test_all_sandia gcc/4.8.4 intel" - echo "" - echo " Run all tests in debug" - echo " % test_all_sandia --debug" - echo "" - echo " Run gcc/4.8.4 and only do OpenMP and OpenMP_Serial builds" - echo " % test_all_sandia gcc/4.8.4 --build-list=OpenMP,OpenMP_Serial" +UNCOMMITTED=`cd ${KOKKOS_PATH}; git status --porcelain 2>/dev/null` +if ! [ -z "$UNCOMMITTED" ]; then + echo "WARNING!! THE FOLLOWING CHANGES ARE UNCOMMITTED!! :" + echo "$UNCOMMITTED" echo "" - echo "If you want to kill the tests, do:" - echo " hit ctrl-z" - echo " % kill -9 %1" - echo - exit 0 fi +GITSTATUS=`cd ${KOKKOS_PATH}; git log -n 1 --format=oneline` +echo "Repository Status: " ${GITSTATUS} +echo "" +echo "" + # Set build type. if [ "$DEBUG" = "True" ]; then BUILD_TYPE=debug @@ -582,10 +662,18 @@ for ARG in $ARGS; do else echo "Tried to add $COMPILER twice" fi - fi + fi done done +if [ "$COMPILERS_TO_TEST" == "" ]; then + echo "-----------------------------------------------" + echo " !!!! Invalid Compiler provided '$ARGS' !!!!" + echo "-----------------------------------------------" + print_help + exit 0 +fi + # # Functions. # @@ -681,6 +769,10 @@ setup_env() { module list 2>&1 | grep "$mod" >& /dev/null || return 1 done + if [ -e ${CM_ALL_SCRIPT_PATH}/update_lib.sh ]; then + echo calling ${CM_ALL_SCRIPT_PATH}/update_lib.sh $MACHINE + source ${CM_ALL_SCRIPT_PATH}/update_lib.sh $MACHINE + fi return 0 } @@ -692,6 +784,13 @@ single_build_and_test() { # Set up env. mkdir -p $ROOT_DIR/$compiler/"${build}-$build_type" cd $ROOT_DIR/$compiler/"${build}-$build_type" + + local compiler_modules_list=$(get_compiler_modules $compiler) + echo " # Load modules:" &> reload_modules.sh + echo " module load $compiler_modules_list" &>> reload_modules.sh + echo "" &>> reload_modules.sh + chmod +x reload_modules.sh + local desc=$(echo "${compiler}-${build}-${build_type}" | sed 's:/:-:g') setup_env $compiler >& ${desc}.configure.log || { report_and_log_test_result 1 ${desc} configure && return 0; } @@ -699,8 +798,14 @@ single_build_and_test() { local compiler_warning_flags=$(get_compiler_warning_flags $compiler) local compiler_exe=$(get_compiler_exe_name $compiler) + if [[ "$KOKKOS_OPTIONS" == "" ]]; then + local extra_args="--with-options=enable_large_mem_tests" + else + local extra_args= + fi + if [[ "$build_type" = hwloc* ]]; then - local extra_args=--with-hwloc=$(dirname $(dirname $(which hwloc-info))) + local extra_args="$extra_args --with-hwloc=$(dirname $(dirname $(which hwloc-info)))" fi if [[ "$OPT_FLAG" = "" ]]; then @@ -710,25 +815,15 @@ single_build_and_test() { if [[ "$build_type" = *debug* ]]; then local extra_args="$extra_args --debug" local cxxflags="-g $compiler_warning_flags" - local ldflags="-g" else local cxxflags="$OPT_FLAG $compiler_warning_flags" - local ldflags="${OPT_FLAG}" fi local cxxflags="${cxxflags} ${CXX_FLAGS_EXTRA}" - local ldflags="${ldflags} ${LD_FLAGS_EXTRA}" + local ldflags="${LD_FLAGS_EXTRA}" local cxx_standard="${CXX_STANDARD}" - if [[ "$KOKKOS_CUDA_OPTIONS" != "" ]]; then - local extra_args="$extra_args $KOKKOS_CUDA_OPTIONS" - fi - if [[ "$KOKKOS_OPTIONS" != "" ]]; then - local extra_args="$extra_args $KOKKOS_OPTIONS" - else - local extra_args="$extra_args --with-options=enable_large_mem_tests" - fi echo " Starting job $desc" @@ -742,14 +837,25 @@ single_build_and_test() { run_cmd ls fake_problem >& ${desc}.configure.log || { report_and_log_test_result 1 $desc configure && return 0; } fi else - run_cmd ${KOKKOS_PATH}/scripts/testing_scripts/generate_makefile.bash --with-devices=$build $ARCH_FLAG --compiler=$(which $compiler_exe) --cxxflags=\"$cxxflags\" --cxxstandard=\"$cxx_standard\" --ldflags=\"$ldflags\" $extra_args &>> ${desc}.configure.log || { report_and_log_test_result 1 ${desc} configure && return 0; } + LOCAL_KOKKOS_DEVICES=${build//_/,} + if [[ "$LOCAL_KOKKOS_DEVICES" = *Cuda* ]]; then + CUDA_ENABLE_CMD="--with-cuda=$CUDA_ROOT" + fi + echo "kokkos options: ${KOKKOS_OPTIONS}" + echo "kokkos devices: ${LOCAL_KOKKOS_DEVICES}" + echo "kokkos cxx: ${cxxflags}" + + echo " # Use generate_makefile line below to call cmake which generates makefile for this build:" &> call_generate_makefile.sh + echo " ${KOKKOS_PATH}/generate_makefile.bash --with-devices=$LOCAL_KOKKOS_DEVICES $ARCH_FLAG --compiler=$(which $compiler_exe) --cxxflags=\"$cxxflags\" --cxxstandard=\"$cxx_standard\" --ldflags=\"$ldflags\" $CUDA_ENABLE_CMD --kokkos-path=${KOKKOS_PATH} --no-examples $extra_args" &>> call_generate_makefile.sh + + run_cmd ${KOKKOS_PATH}/generate_makefile.bash --with-devices=$LOCAL_KOKKOS_DEVICES $ARCH_FLAG --compiler=$(which $compiler_exe) --cxxflags=\"$cxxflags\" --cxxstandard=\"$cxx_standard\" --ldflags=\"$ldflags\" $CUDA_ENABLE_CMD --kokkos-path=${KOKKOS_PATH} --no-examples $extra_args &>> ${desc}.configure.log || { report_and_log_test_result 1 ${desc} configure && return 0; } local -i build_start_time=$(date +%s) - run_cmd make -j 48 build-test >& ${desc}.build.log || { report_and_log_test_result 1 ${desc} build && return 0; } + run_cmd make -j 48 all >& ${desc}.build.log || { report_and_log_test_result 1 ${desc} build && return 0; } local -i build_end_time=$(date +%s) comment="build_time=$(($build_end_time-$build_start_time))" if [[ "$BUILD_ONLY" == False ]]; then - run_cmd make test >& ${desc}.test.log || { report_and_log_test_result 1 ${desc} test && return 0; } + run_cmd ctest -V --output-on-failure >& ${desc}.test.log || { report_and_log_test_result 1 ${desc} test && return 0; } local -i run_end_time=$(date +%s) comment="$comment run_time=$(($run_end_time-$build_end_time))" fi @@ -854,6 +960,32 @@ wait_summarize_and_exit() { do echo $failed_test "("$(cat $FAILED_DIR/$failed_test)" failed)" rv=$rv+1 + + local str=$failed_test + local comp=$(echo "$str" | cut -d- -f1) + local vers=$(echo "$str" | cut -d- -f2) + local lbuild=$(echo "$str" | cut -d- -f3-) + # Generate reproducer instructions + #local filename=reproducer_instructions-$comp-$vers-$lbuild + local faildir=$ROOT_DIR/$comp/$vers/$lbuild + # Output reproducer instructions + echo "#######################################################" + echo " # Reproducer instructions:" + cat $faildir/reload_modules.sh + cat $faildir/call_generate_makefile.sh + echo "" + echo " # To reload modules, reconfigure, rebuild, and retest directly from this failing build do the following:" + echo " # Move to the build directory" + echo " cd $faildir" + echo " # To reload modules" + echo " source ./reload_modules.sh" + echo " # To reconfigure" + echo " ./call_generate_makefile.sh" + echo " # To rebuild" + echo " make -j" + echo " # To retest" + echo " ctest -V" + echo "#######################################################" done fi @@ -864,6 +996,10 @@ wait_summarize_and_exit() { # Main. # +CM_ALL_SCRIPT=$0 +CM_ALL_SCRIPT_PATH=`pwd` +CM_ALL_SCRIPT_PATH=${CM_ALL_SCRIPT_PATH}/`dirname $CM_ALL_SCRIPT` + ROOT_DIR=$(get_test_root_dir) mkdir -p $ROOT_DIR cd $ROOT_DIR diff --git a/scripts/update-version b/scripts/update-version new file mode 100755 index 00000000000..3cc10b33112 --- /dev/null +++ b/scripts/update-version @@ -0,0 +1,35 @@ + +NEW_MAJOR=$1 +NEW_MINOR=$2 +NEW_PATCH=$3 + +GNU_MAJOR=`grep "KOKKOS_VERSION_MAJOR =" Makefile.kokkos | awk '{print $3}'` +CMAKE_MAJOR=`grep "Kokkos_VERSION_MAJOR " CMakeLists.txt | awk '{print $2}' | sed 's|)||'` + +GNU_MINOR=`grep "KOKKOS_VERSION_MINOR =" Makefile.kokkos | awk '{print $3}'` +CMAKE_MINOR=`grep "Kokkos_VERSION_MINOR " CMakeLists.txt | awk '{print $2}' | sed 's|)||'` + +GNU_PATCH=`grep "KOKKOS_VERSION_PATCH =" Makefile.kokkos | awk '{print $3}'` +CMAKE_PATCH=`grep "Kokkos_VERSION_PATCH " CMakeLists.txt | awk '{print $2}' | sed 's|)||'` + +if [ $GNU_MAJOR -ne $CMAKE_MAJOR ] +then + echo 'MAJOR Versions do not agree GNU' $GNU_MAJOR 'vs CMAKE' $CMAKE_MAJOR +fi +if [ $GNU_MINOR -ne $CMAKE_MINOR ] +then + echo 'MINOR Versions do not agree GNU' $GNU_MINOR 'vs CMAKE' $CMAKE_MINOR +fi +if [ $GNU_PATCH -ne $CMAKE_PATCH ] +then + echo 'PATCH Versions do not agree GNU' $GNU_PATCH 'vs CMAKE' $CMAKE_PATCH +fi + +sed -i.bak 's|KOKKOS_VERSION_MAJOR = '$GNU_MAJOR'|KOKKOS_VERSION_MAJOR = '$NEW_MAJOR'|g' Makefile.kokkos +sed -i.bak 's|KOKKOS_VERSION_MINOR = '$GNU_MINOR'|KOKKOS_VERSION_MINOR = '$NEW_MINOR'|g' Makefile.kokkos +sed -i.bak 's|KOKKOS_VERSION_PATCH = '$GNU_PATCH'|KOKKOS_VERSION_PATCH = '$NEW_PATCH'|g' Makefile.kokkos +rm Makefile.kokkos.bak +sed -i.bak 's|Kokkos_VERSION_MAJOR '$GNU_MAJOR')|Kokkos_VERSION_MAJOR '$NEW_MAJOR')|g' CMakeLists.txt +sed -i.bak 's|Kokkos_VERSION_MINOR '$GNU_MINOR')|Kokkos_VERSION_MINOR '$NEW_MINOR')|g' CMakeLists.txt +sed -i.bak 's|Kokkos_VERSION_PATCH '$GNU_PATCH')|Kokkos_VERSION_PATCH '$NEW_PATCH')|g' CMakeLists.txt +rm CMakeLists.txt.bak diff --git a/tpls/gtest/gtest/gtest-all.cc b/tpls/gtest/gtest/gtest-all.cc index 735f581c95b..7c544a382f2 100644 --- a/tpls/gtest/gtest/gtest-all.cc +++ b/tpls/gtest/gtest/gtest-all.cc @@ -912,9 +912,9 @@ class GTEST_API_ UnitTestImpl { virtual ~UnitTestImpl(); // There are two different ways to register your own TestPartResultReporter. - // You can register your own repoter to listen either only for test results + // You can register your own reporter to listen either only for test results // from the current thread or for results from all threads. - // By default, each per-thread test result repoter just passes a new + // By default, each per-thread test result reporter just passes a new // TestPartResult to the global test result reporter, which registers the // test part result for the currently running test. diff --git a/tpls/gtest/gtest/gtest.h b/tpls/gtest/gtest/gtest.h index 8f98f330ed8..f39d0b87c90 100644 --- a/tpls/gtest/gtest/gtest.h +++ b/tpls/gtest/gtest/gtest.h @@ -2192,7 +2192,7 @@ inline void FlushInfoLog() { fflush(NULL); } // const Foo*). When you use ImplicitCast_, the compiler checks that // the cast is safe. Such explicit ImplicitCast_s are necessary in // surprisingly many situations where C++ demands an exact type match -// instead of an argument type convertable to a target type. +// instead of an argument type convertible to a target type. // // The syntax for using ImplicitCast_ is the same as for static_cast: // @@ -9945,7 +9945,7 @@ void UniversalTersePrint(const T& value, ::std::ostream* os) { // NUL-terminated string. template void UniversalPrint(const T& value, ::std::ostream* os) { - // A workarond for the bug in VC++ 7.1 that prevents us from instantiating + // A workaround for the bug in VC++ 7.1 that prevents us from instantiating // UniversalPrinter with T directly. typedef T T1; UniversalPrinter::Print(value, os); @@ -18672,7 +18672,7 @@ class GTEST_API_ UnitTest { internal::UnitTestImpl* impl() { return impl_; } const internal::UnitTestImpl* impl() const { return impl_; } - // These classes and funcions are friends as they need to access private + // These classes and functions are friends as they need to access private // members of UnitTest. friend class Test; friend class internal::AssertHelper;