From c99179079aee1d1b793ecf07530252046e2e40d6 Mon Sep 17 00:00:00 2001 From: Oksana Guba Date: Thu, 5 Sep 2024 16:19:19 -0600 Subject: [PATCH 01/12] HOMME: add aurora, polaris, spot machine files --- .../homme/cmake/machineFiles/aurora-aot.cmake | 64 ++++++++++++++++ .../homme/cmake/machineFiles/aurora-jit.cmake | 58 +++++++++++++++ .../cmake/machineFiles/chrysalis-bfb.cmake | 2 + .../homme/cmake/machineFiles/chrysalis.cmake | 2 + .../homme/cmake/machineFiles/polaris-a100.sh | 74 +++++++++++++++++++ .../cmake/machineFiles/spot-aot-AB2.cmake | 63 ++++++++++++++++ 6 files changed, 263 insertions(+) create mode 100644 components/homme/cmake/machineFiles/aurora-aot.cmake create mode 100644 components/homme/cmake/machineFiles/aurora-jit.cmake create mode 100644 components/homme/cmake/machineFiles/polaris-a100.sh create mode 100644 components/homme/cmake/machineFiles/spot-aot-AB2.cmake diff --git a/components/homme/cmake/machineFiles/aurora-aot.cmake b/components/homme/cmake/machineFiles/aurora-aot.cmake new file mode 100644 index 000000000000..b6fe34a78d72 --- /dev/null +++ b/components/homme/cmake/machineFiles/aurora-aot.cmake @@ -0,0 +1,64 @@ +#module restore +#module load oneapi/eng-compiler/2022.12.30.005 +#module load intel_compute_runtime/release/agama-devel-627 +#module load spack cmake +#module list + + +SET (SUNSPOT_MACHINE TRUE CACHE BOOL "") + +SET(BUILD_HOMME_WITHOUT_PIOLIBRARY TRUE CACHE BOOL "") +SET(HOMMEXX_MPI_ON_DEVICE FALSE CACHE BOOL "") + +SET(HOMME_FIND_BLASLAPACK TRUE CACHE BOOL "") + +SET(WITH_PNETCDF FALSE CACHE FILEPATH "") + +SET(USE_QUEUING FALSE CACHE BOOL "") + +#temp hack +SET(HOMME_USE_KOKKOS TRUE CACHE BOOL "") + +SET(BUILD_HOMME_PREQX_KOKKOS TRUE CACHE BOOL "") +SET(BUILD_HOMME_THETA_KOKKOS TRUE CACHE BOOL "") + +#set(KOKKOS_HOME "/home/onguba/kokkos-build/mar05-aot/install" CACHE STRING "") +#set(E3SM_KOKKOS_PATH ${KOKKOS_HOME} CACHE STRING "") + +SET(USE_TRILINOS OFF CACHE BOOL "") + +SET(SYCL_BUILD TRUE CACHE BOOL "") +SET(HOMME_ENABLE_COMPOSE FALSE CACHE BOOL "") + +SET(CMAKE_CXX_STANDARD 17) + +SET(CMAKE_C_COMPILER "mpicc" CACHE STRING "") +SET(CMAKE_Fortran_COMPILER "mpifort" CACHE STRING "") +SET(CMAKE_CXX_COMPILER "mpicxx" CACHE STRING "") + +# -fsycl-link-huge-device-code for theta to get build +#JIT flags +#SET(SYCL_COMPILE_FLAGS "-std=c++17 -fsycl -fsycl-device-code-split=per_kernel -fno-sycl-id-queries-fit-in-int -fsycl-unnamed-lambda") +#SET(SYCL_LINK_FLAGS "-fsycl -fsycl-link-huge-device-code -fsycl-device-code-split=per_kernel -fsycl-targets=spir64") + +#AOT flags +SET(SYCL_COMPILE_FLAGS "-std=c++17 -fsycl -fsycl-device-code-split=per_kernel -fno-sycl-id-queries-fit-in-int -fsycl-unnamed-lambda") +SET(SYCL_LINK_FLAGS "-fsycl -fsycl-device-code-split=per_kernel -fsycl-link-huge-device-code -fsycl-targets=spir64_gen -Xsycl-target-backend \"-device 12.60.7\"") + +SET(ADD_Fortran_FLAGS "-fc=ifx -O3 -DNDEBUG -DCPRINTEL -g" CACHE STRING "") +SET(ADD_C_FLAGS "-O3 -DNDEBUG " CACHE STRING "") + +SET(ADD_CXX_FLAGS "-std=c++17 -O3 -DNDEBUG ${SYCL_COMPILE_FLAGS}" CACHE STRING "") +SET(ADD_LINKER_FLAGS "-O3 -DNDEBUG ${SYCL_LINK_FLAGS} -fortlib" CACHE STRING "") + +set (ENABLE_OPENMP OFF CACHE BOOL "") +set (ENABLE_COLUMN_OPENMP OFF CACHE BOOL "") +set (ENABLE_HORIZ_OPENMP OFF CACHE BOOL "") + +set (HOMME_TESTING_PROFILE "dev" CACHE STRING "") + +set (USE_NUM_PROCS 4 CACHE STRING "") + +SET (USE_MPI_OPTIONS "--bind-to core" CACHE FILEPATH "") + + diff --git a/components/homme/cmake/machineFiles/aurora-jit.cmake b/components/homme/cmake/machineFiles/aurora-jit.cmake new file mode 100644 index 000000000000..1941fa9eb3f3 --- /dev/null +++ b/components/homme/cmake/machineFiles/aurora-jit.cmake @@ -0,0 +1,58 @@ +#module restore +#module load oneapi/eng-compiler/2022.12.30.005 +#module load intel_compute_runtime/release/agama-devel-627 +#module load spack cmake +#module list + + + +SET(BUILD_HOMME_WITHOUT_PIOLIBRARY TRUE CACHE BOOL "") +SET(HOMMEXX_MPI_ON_DEVICE FALSE CACHE BOOL "") + +SET(HOMME_FIND_BLASLAPACK TRUE CACHE BOOL "") + +SET(WITH_PNETCDF FALSE CACHE FILEPATH "") + +SET(USE_QUEUING FALSE CACHE BOOL "") + +#temp hack +SET(HOMME_USE_KOKKOS TRUE CACHE BOOL "") + +SET(BUILD_HOMME_PREQX_KOKKOS TRUE CACHE BOOL "") +SET(BUILD_HOMME_THETA_KOKKOS TRUE CACHE BOOL "") + +#set(KOKKOS_HOME "/home/onguba/kokkos-build/jan03-2024/install" CACHE STRING "") +#set(E3SM_KOKKOS_PATH ${KOKKOS_HOME} CACHE STRING "") + +SET(USE_TRILINOS OFF CACHE BOOL "") + +SET(SYCL_BUILD TRUE CACHE BOOL "") +SET(HOMME_ENABLE_COMPOSE FALSE CACHE BOOL "") + +SET(CMAKE_CXX_STANDARD 17) + +SET(CMAKE_C_COMPILER "mpicc" CACHE STRING "") +SET(CMAKE_Fortran_COMPILER "mpifort" CACHE STRING "") +SET(CMAKE_CXX_COMPILER "mpicxx" CACHE STRING "") + +# -fsycl-link-huge-device-code for theta to get build +SET(SYCL_COMPILE_FLAGS "-std=c++17 -fsycl -fsycl-device-code-split=per_kernel -fno-sycl-id-queries-fit-in-int -fsycl-unnamed-lambda") +SET(SYCL_LINK_FLAGS "-fsycl -fsycl-link-huge-device-code -fsycl-device-code-split=per_kernel -fsycl-targets=spir64") + +SET(ADD_Fortran_FLAGS "-fc=ifx -O3 -DNDEBUG -DCPRINTEL -g" CACHE STRING "") +SET(ADD_C_FLAGS "-O3 -DNDEBUG " CACHE STRING "") + +SET(ADD_CXX_FLAGS "-std=c++17 -O3 -DNDEBUG ${SYCL_COMPILE_FLAGS}" CACHE STRING "") +SET(ADD_LINKER_FLAGS "-O3 -DNDEBUG ${SYCL_LINK_FLAGS} -fortlib" CACHE STRING "") + +set (ENABLE_OPENMP OFF CACHE BOOL "") +set (ENABLE_COLUMN_OPENMP OFF CACHE BOOL "") +set (ENABLE_HORIZ_OPENMP OFF CACHE BOOL "") + +set (HOMME_TESTING_PROFILE "dev" CACHE STRING "") + +set (USE_NUM_PROCS 4 CACHE STRING "") + +SET (USE_MPI_OPTIONS "--bind-to core" CACHE FILEPATH "") + + diff --git a/components/homme/cmake/machineFiles/chrysalis-bfb.cmake b/components/homme/cmake/machineFiles/chrysalis-bfb.cmake index b9f0d41a0606..fa1f1ac545c5 100644 --- a/components/homme/cmake/machineFiles/chrysalis-bfb.cmake +++ b/components/homme/cmake/machineFiles/chrysalis-bfb.cmake @@ -17,6 +17,8 @@ ENDIF() SET (USE_MPIEXEC "srun" CACHE STRING "") SET (USE_MPI_OPTIONS "-K --cpu_bind=cores" CACHE STRING "") +SET (CHRYSALIS_MACHINE TRUE CACHE BOOL "") + # Set kokkos arch, to get correct avx flags SET (Kokkos_ARCH_ZEN2 ON CACHE BOOL "") diff --git a/components/homme/cmake/machineFiles/chrysalis.cmake b/components/homme/cmake/machineFiles/chrysalis.cmake index 68ff76ec8082..97bc682c9546 100644 --- a/components/homme/cmake/machineFiles/chrysalis.cmake +++ b/components/homme/cmake/machineFiles/chrysalis.cmake @@ -17,6 +17,8 @@ ENDIF() SET (USE_MPIEXEC "srun" CACHE STRING "") SET (USE_MPI_OPTIONS "-K --cpu_bind=cores" CACHE STRING "") +SET (CHRYSALIS_MACHINE TRUE CACHE BOOL "") + # Set kokkos arch, to get correct avx flags SET (Kokkos_ARCH_ZEN2 ON CACHE BOOL "") diff --git a/components/homme/cmake/machineFiles/polaris-a100.sh b/components/homme/cmake/machineFiles/polaris-a100.sh new file mode 100644 index 000000000000..2b63c61a55e7 --- /dev/null +++ b/components/homme/cmake/machineFiles/polaris-a100.sh @@ -0,0 +1,74 @@ +#Currently Loaded Modules: +# 1) craype-x86-rome 6) craype/2.7.15 11) cray-libpals/1.1.7 16) nvhpc-mixed/21.9 +# 2) libfabric/1.11.0.4.125 7) cray-dsmml/0.2.2 12) PrgEnv-gnu/8.3.3 17) cudatoolkit-standalone/11.6.2 +# 3) craype-network-ofi 8) cray-pmi/6.1.2 13) gnu-parallel/2021-09-22 18) cmake/3.23.2 +# 4) perftools-base/22.05.0 9) cray-pmi-lib/6.0.17 14) gcc/11.2.0 +# 5) craype-accel-nvidia80 10) cray-pals/1.1.7 15) cray-mpich/8.1.16 + + + +#SET(HOMMEXX_EXEC_SPACE CUDA CACHE STRING "") +#SET(HOMMEXX_MPI_ON_DEVICE FALSE CACHE BOOL "") +#SET(HOMMEXX_CUDA_MAX_WARP_PER_TEAM "16" CACHE STRING "") + +# cray-hdf5-parallel/1.12.0.6 cray-netcdf-hdf5parallel/4.7.4.6 cray-parallel-netcdf/1.12.1.6 +#SET(NETCDF_DIR $ENV{CRAY_NETCDF_HDF5PARALLEL_PREFIX} CACHE FILEPATH "") +#SET(PNETCDF_DIR $ENV{CRAY_PARALLEL_NETCDF_DIR} CACHE FILEPATH "") +#SET(HDF5_DIR $ENV{CRAY_HDF5_PARALLEL_PREFIX} CACHE FILEPATH "") + +#for scorpio +#SET (NetCDF_C_PATH $ENV{CRAY_NETCDF_HDF5PARALLEL_PREFIX} CACHE FILEPATH "") +#SET (NetCDF_Fortran_PATH $ENV{CRAY_NETCDF_HDF5PARALLEL_PREFIX} CACHE FILEPATH "") + +SET(BUILD_HOMME_WITHOUT_PIOLIBRARY TRUE CACHE BOOL "") + +SET(HOMME_FIND_BLASLAPACK FALSE CACHE BOOL "") + +SET(WITH_PNETCDF FALSE CACHE FILEPATH "") + +SET(USE_QUEUING FALSE CACHE BOOL "") + +SET(BUILD_HOMME_THETA_KOKKOS TRUE CACHE BOOL "") + +SET(CUDA_BUILD TRUE CACHE BOOL "") + +#SET(HOMMEXX_BFB_TESTING TRUE CACHE BOOL "") + +SET(USE_TRILINOS OFF CACHE BOOL "") + +SET(Kokkos_ENABLE_OPENMP OFF CACHE BOOL "") +SET(Kokkos_ENABLE_CUDA ON CACHE BOOL "") +SET(Kokkos_ENABLE_CUDA_LAMBDA ON CACHE BOOL "") +SET(Kokkos_ARCH_AMPERE80 ON CACHE BOOL "") +#SET(Kokkos_ARCH_ZEN2 ON CACHE BOOL "") # works, and perf same if both AMPERE80 and ZEN2 are on +#SET(Kokkos_ENABLE_CUDA_UVM ON CACHE BOOL "") +SET(Kokkos_ENABLE_EXPLICIT_INSTANTIATION OFF CACHE BOOL "") +#SET(Kokkos_ENABLE_CUDA_ARCH_LINKING OFF CACHE BOOL "") + +#SET(CMAKE_C_COMPILER "mpicc" CACHE STRING "") +#SET(CMAKE_Fortran_COMPILER "mpifort" CACHE STRING "") +#SET(CMAKE_CXX_COMPILER "mpicxx" CACHE STRING "") +SET(CMAKE_C_COMPILER "cc" CACHE STRING "") +SET(CMAKE_Fortran_COMPILER "ftn" CACHE STRING "") +SET(CMAKE_CXX_COMPILER "CC" CACHE STRING "") + +#SET(CMAKE_C_COMPILER "mpicc" CACHE STRING "") +#SET(CMAKE_Fortran_COMPILER "mpifort" CACHE STRING "") +#SET(CMAKE_CXX_COMPILER "${CMAKE_CURRENT_SOURCE_DIR}/../../externals/kokkos/bin/nvcc_wrapper" CACHE STRING "") + +# Note: need to set MPICH_CXX env variable and perhaps NVCC_WRAPPER_DEFAULT_COMPILER + +SET(CXXLIB_SUPPORTED_CACHE FALSE CACHE BOOL "") + +SET(ENABLE_OPENMP OFF CACHE BOOL "") +SET(ENABLE_COLUMN_OPENMP OFF CACHE BOOL "") +SET(ENABLE_HORIZ_OPENMP OFF CACHE BOOL "") + +SET(CMAKE_VERBOSE_MAKEFILE ON CACHE BOOL "") + +#SET(HOMME_TESTING_PROFILE "dev" CACHE STRING "") + +SET(USE_NUM_PROCS 4 CACHE STRING "") + +SET(USE_MPIEXEC "srun" CACHE STRING "") +#SET(CPRNC_DIR /global/cfs/cdirs/e3sm/tools/cprnc CACHE FILEPATH "") diff --git a/components/homme/cmake/machineFiles/spot-aot-AB2.cmake b/components/homme/cmake/machineFiles/spot-aot-AB2.cmake new file mode 100644 index 000000000000..23fad2361ccf --- /dev/null +++ b/components/homme/cmake/machineFiles/spot-aot-AB2.cmake @@ -0,0 +1,63 @@ +#module restore +#module load oneapi/eng-compiler/2022.12.30.005 +#module load intel_compute_runtime/release/agama-devel-627 +#module load spack cmake +#module list + +SET (SUNSPOT_MACHINE TRUE CACHE BOOL "") + +SET (HOMMEXX_MPI_ON_DEVICE TRUE CACHE BOOL "") + +#SET(BUILD_HOMME_WITHOUT_PIOLIBRARY TRUE CACHE BOOL "") + +SET(HOMME_FIND_BLASLAPACK TRUE CACHE BOOL "") + +SET(WITH_PNETCDF FALSE CACHE FILEPATH "") + +SET(USE_QUEUING FALSE CACHE BOOL "") + +#temp hack +SET(HOMME_USE_KOKKOS TRUE CACHE BOOL "") + +SET(BUILD_HOMME_PREQX_KOKKOS TRUE CACHE BOOL "") +SET(BUILD_HOMME_THETA_KOKKOS TRUE CACHE BOOL "") + +#set(KOKKOS_HOME "/home/onguba/kokkos-build/june22-2024-aot/install" CACHE STRING "") +#set(E3SM_KOKKOS_PATH ${KOKKOS_HOME} CACHE STRING "") + +SET (NetCDF_Fortran_PATH "/lus/gila/projects/CSC249ADSE15_CNDA/software/oneAPI.2022.12.30.003/netcdf" CACHE STRING "") +SET (NetCDF_C_PATH "/lus/gila/projects/CSC249ADSE15_CNDA/software/oneAPI.2022.12.30.003/netcdf" CACHE STRING "") + +SET(USE_TRILINOS OFF CACHE BOOL "") + +SET(SYCL_BUILD TRUE CACHE BOOL "") +SET(HOMME_ENABLE_COMPOSE FALSE CACHE BOOL "") + +#SET(CMAKE_CXX_STANDARD 17) +SET(CMAKE_CXX_STANDARD 17 CACHE STRING "CXX Standard") + +SET(CMAKE_C_COMPILER "mpicc" CACHE STRING "") +SET(CMAKE_Fortran_COMPILER "mpifort" CACHE STRING "") +SET(CMAKE_CXX_COMPILER "mpicxx" CACHE STRING "") + +SET(SYCL_COMPILE_FLAGS "-std=c++17 -fsycl -fsycl-device-code-split=per_kernel -fno-sycl-id-queries-fit-in-int -fsycl-unnamed-lambda") +SET(SYCL_LINK_FLAGS "-fsycl-max-parallel-link-jobs=32 -fsycl-link-huge-device-code -fsycl -fsycl-device-code-split=per_kernel -fsycl-targets=spir64_gen -Xsycl-target-backend \"-device 12.60.7\"") + +#-fpscomp does not actually solve the issue with bools in here,another suggestion was -fp-model=precise, not working either +SET(ADD_Fortran_FLAGS " -fc=ifx -fpscomp logicals -O3 -DNDEBUG -DCPRINTEL -g" CACHE STRING "") +SET(ADD_C_FLAGS "-O3 -DNDEBUG " CACHE STRING "") + +SET(ADD_CXX_FLAGS " -std=c++17 -O3 -DNDEBUG ${SYCL_COMPILE_FLAGS}" CACHE STRING "") +SET(ADD_LINKER_FLAGS "-O3 -DNDEBUG ${SYCL_LINK_FLAGS} -fortlib" CACHE STRING "") + +set (ENABLE_OPENMP OFF CACHE BOOL "") +set (ENABLE_COLUMN_OPENMP OFF CACHE BOOL "") +set (ENABLE_HORIZ_OPENMP OFF CACHE BOOL "") + +set (HOMME_TESTING_PROFILE "dev" CACHE STRING "") + +set (USE_NUM_PROCS 4 CACHE STRING "") + +SET (USE_MPI_OPTIONS "--bind-to core" CACHE FILEPATH "") + + From 1379db52a29ff31764b6268eae1f88fbedbae252 Mon Sep 17 00:00:00 2001 From: Oksana Guba Date: Thu, 5 Sep 2024 16:29:27 -0600 Subject: [PATCH 02/12] HOMME: CMake and CPP mods to support SYCL backend --- components/homme/CMakeLists.txt | 8 +++++--- components/homme/cmake/HommeMacros.cmake | 8 +++++++- components/homme/src/share/cxx/Config.hpp | 2 +- components/homme/src/share/cxx/ExecSpaceDefs.hpp | 4 ++++ .../homme/test_execs/share_kokkos_ut/CMakeLists.txt | 4 ++-- .../homme/test_execs/thetal_kokkos_ut/CMakeLists.txt | 3 +++ 6 files changed, 22 insertions(+), 7 deletions(-) diff --git a/components/homme/CMakeLists.txt b/components/homme/CMakeLists.txt index 80a89a296910..6fe81180ab54 100644 --- a/components/homme/CMakeLists.txt +++ b/components/homme/CMakeLists.txt @@ -206,7 +206,9 @@ IF (HOMME_USE_KOKKOS) STRING (TOUPPER ${HOMMEXX_EXEC_SPACE} HOMMEXX_EXEC_SPACE_UPPER) - IF (HOMMEXX_EXEC_SPACE_UPPER STREQUAL "HIP") + IF (${HOMMEXX_EXEC_SPACE_UPPER} STREQUAL "SYCL") + SET (HOMMEXX_SYCL_SPACE ON) + ELSEIF (${HOMMEXX_EXEC_SPACE_UPPER} STREQUAL "HIP") SET (HOMMEXX_HIP_SPACE ON) ELSEIF (HOMMEXX_EXEC_SPACE_UPPER STREQUAL "CUDA") SET (HOMMEXX_CUDA_SPACE ON) @@ -303,7 +305,7 @@ SET (HOMMEXX_ENABLE_GPU_F90 FALSE) IF (HOMME_USE_KOKKOS) - IF (CUDA_BUILD OR HIP_BUILD) + IF (CUDA_BUILD OR HIP_BUILD OR SYCL_BUILD) SET (DEFAULT_VECTOR_SIZE 1) SET (HOMMEXX_ENABLE_GPU TRUE) SET (HOMMEXX_ENABLE_GPU_F90 TRUE) @@ -312,7 +314,7 @@ IF (HOMME_USE_KOKKOS) ENDIF() SET (HOMMEXX_VECTOR_SIZE ${DEFAULT_VECTOR_SIZE} CACHE STRING - "If AVX or Cuda or HIP don't take priority, use this software vector size.") + "If AVX or Cuda or HIP or SYCL don't take priority, use this software vector size.") IF (CMAKE_BUILD_TYPE_UPPER MATCHES "DEBUG" OR CMAKE_BUILD_TYPE_UPPER MATCHES "RELWITHDEBINFO") SET (HOMMEXX_DEBUG ON) diff --git a/components/homme/cmake/HommeMacros.cmake b/components/homme/cmake/HommeMacros.cmake index 6d073dbbe83b..5610947cb299 100644 --- a/components/homme/cmake/HommeMacros.cmake +++ b/components/homme/cmake/HommeMacros.cmake @@ -112,7 +112,13 @@ macro(createTestExec execName execType macroNP macroNC ADD_DEFINITIONS(-DHAVE_CONFIG_H) ADD_EXECUTABLE(${execName} ${EXEC_SOURCES}) - SET_TARGET_PROPERTIES(${execName} PROPERTIES LINKER_LANGUAGE Fortran) + + if(SUNSPOT_MACHINE) + SET_TARGET_PROPERTIES(${execName} PROPERTIES LINKER_LANGUAGE CXX) + else() + SET_TARGET_PROPERTIES(${execName} PROPERTIES LINKER_LANGUAGE Fortran) + endif() + IF(BUILD_HOMME_WITHOUT_PIOLIBRARY) TARGET_COMPILE_DEFINITIONS(${execName} PUBLIC HOMME_WITHOUT_PIOLIBRARY) ENDIF() diff --git a/components/homme/src/share/cxx/Config.hpp b/components/homme/src/share/cxx/Config.hpp index 684f9143beaf..b204b1dbd047 100644 --- a/components/homme/src/share/cxx/Config.hpp +++ b/components/homme/src/share/cxx/Config.hpp @@ -21,7 +21,7 @@ # endif #endif -#if ! defined HOMMEXX_CUDA_SPACE && ! defined HOMMEXX_OPENMP_SPACE && ! defined HOMMEXX_THREADS_SPACE && ! defined HOMMEXX_SERIAL_SPACE && ! defined HOMMEXX_HIP_SPACE +#if ! defined HOMMEXX_CUDA_SPACE && ! defined HOMMEXX_OPENMP_SPACE && ! defined HOMMEXX_THREADS_SPACE && ! defined HOMMEXX_SERIAL_SPACE && ! defined HOMMEXX_HIP_SPACE && ! defined HOMMEXX_SYCL_SPACE # define HOMMEXX_DEFAULT_SPACE #endif diff --git a/components/homme/src/share/cxx/ExecSpaceDefs.hpp b/components/homme/src/share/cxx/ExecSpaceDefs.hpp index cd6649c7ab2d..82f5e803801c 100644 --- a/components/homme/src/share/cxx/ExecSpaceDefs.hpp +++ b/components/homme/src/share/cxx/ExecSpaceDefs.hpp @@ -34,6 +34,10 @@ using HommexxGPU = Kokkos::Cuda; using HommexxGPU = Kokkos::Experimental::HIP; #endif +#ifdef KOKKOS_ENABLE_SYCL +using HommexxGPU = Kokkos::Experimental::SYCL; +#endif + #else using HommexxGPU = void; #endif diff --git a/components/homme/test_execs/share_kokkos_ut/CMakeLists.txt b/components/homme/test_execs/share_kokkos_ut/CMakeLists.txt index 3fbeff9f6f2c..bc788462ce6e 100644 --- a/components/homme/test_execs/share_kokkos_ut/CMakeLists.txt +++ b/components/homme/test_execs/share_kokkos_ut/CMakeLists.txt @@ -10,7 +10,7 @@ SET(UTILS_TIMING_DIRS ${UTILS_TIMING_SRC_DIR} ${UTILS_TIMING_BIN_DIR}) # Note: need CUDA_BUILD and HOMMEXX_BFB_TESTING here, since the share # unit tests do not include a config.h file SET (COMMON_DEFINITIONS NP=4 NC=4) -IF (CUDA_BUILD OR HIP_BUILD) +IF (CUDA_BUILD OR HIP_BUILD OR SYCL_BUILD) SET(COMMON_DEFINITIONS ${COMMON_DEFINITIONS} HOMMEXX_ENABLE_GPU_F90) ENDIF() IF (HOMMEXX_BFB_TESTING) @@ -158,7 +158,7 @@ ELSE() SET (NUM_CPUS 1) ENDIF() cxx_unit_test (sphere_op_ut "${SPHERE_OP_UT_F90_SRCS}" "${SPHERE_OP_UT_CXX_SRCS}" "${SPHERE_OP_UT_INCLUDE_DIRS}" "${CONFIG_DEFINES}" ${NUM_CPUS}) -endif () +endif () #BFB ### Limiters unit test ### diff --git a/components/homme/test_execs/thetal_kokkos_ut/CMakeLists.txt b/components/homme/test_execs/thetal_kokkos_ut/CMakeLists.txt index 205635e918cc..e8bf5e20bd03 100644 --- a/components/homme/test_execs/thetal_kokkos_ut/CMakeLists.txt +++ b/components/homme/test_execs/thetal_kokkos_ut/CMakeLists.txt @@ -11,6 +11,8 @@ SET(UTILS_TIMING_BIN_DIR ${HOMME_BINARY_DIR}/utils/cime/CIME/non_py/src/timing) THETAL_KOKKOS_SETUP() # This is needed to compile the lib and test executables with the correct options +#these vars shared between all targets, so changing one var +#for one test only won't work, config is built once and for the last test SET(THIS_CONFIG_IN ${HOMME_SOURCE_DIR}/src/theta-l_kokkos/config.h.cmake.in) SET(THIS_CONFIG_HC ${CMAKE_CURRENT_BINARY_DIR}/config.h.c) SET(THIS_CONFIG_H ${CMAKE_CURRENT_BINARY_DIR}/config.h) @@ -18,6 +20,7 @@ SET (NUM_POINTS 4) SET (NUM_PLEV 12) SET (QSIZE_D 4) SET (PIO_INTERP TRUE) + HommeConfigFile (${THIS_CONFIG_IN} ${THIS_CONFIG_HC} ${THIS_CONFIG_H} ) ADD_LIBRARY(thetal_kokkos_ut_lib From c7320a6c5e31c161cf8c48ecb72d0ad792506ddf Mon Sep 17 00:00:00 2001 From: Oksana Guba Date: Thu, 5 Sep 2024 16:30:22 -0600 Subject: [PATCH 03/12] HOMME: replace some printf with Kokkos::printf --- components/homme/src/share/cxx/utilities/BfbUtils.hpp | 2 +- .../homme/src/theta-l_kokkos/cxx/DirkFunctorImpl.hpp | 4 ++-- .../homme/src/theta-l_kokkos/cxx/LimiterFunctor.hpp | 8 ++++---- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/components/homme/src/share/cxx/utilities/BfbUtils.hpp b/components/homme/src/share/cxx/utilities/BfbUtils.hpp index e3570874e266..7fb4d042f7f2 100644 --- a/components/homme/src/share/cxx/utilities/BfbUtils.hpp +++ b/components/homme/src/share/cxx/utilities/BfbUtils.hpp @@ -64,7 +64,7 @@ KOKKOS_INLINE_FUNCTION ScalarType int_pow (ScalarType val, int k) { constexpr int max_shift = 30; if (k<0) { - printf ("k = %d\n",k); + Kokkos::printf ("k = %d\n",k); Kokkos::abort("int_pow implemented only for k>=0.\n"); } diff --git a/components/homme/src/theta-l_kokkos/cxx/DirkFunctorImpl.hpp b/components/homme/src/theta-l_kokkos/cxx/DirkFunctorImpl.hpp index ace1ba920141..d16769079729 100644 --- a/components/homme/src/theta-l_kokkos/cxx/DirkFunctorImpl.hpp +++ b/components/homme/src/theta-l_kokkos/cxx/DirkFunctorImpl.hpp @@ -382,8 +382,8 @@ struct DirkFunctorImpl { kv.team_barrier(); if (it >= maxiter) { - printf("[DIRK] WARNING! Newton reached max iteration count," - " with deltaerr = %3.17f\n", deltaerr); + Kokkos::printf("[DIRK] WARNING! Newton reached max iteration count," + " with deltaerr = %3.17f\n", deltaerr); nerr = 1; } diff --git a/components/homme/src/theta-l_kokkos/cxx/LimiterFunctor.hpp b/components/homme/src/theta-l_kokkos/cxx/LimiterFunctor.hpp index cd3bf7c32526..7914c0a60e3a 100644 --- a/components/homme/src/theta-l_kokkos/cxx/LimiterFunctor.hpp +++ b/components/homme/src/theta-l_kokkos/cxx/LimiterFunctor.hpp @@ -141,8 +141,8 @@ struct LimiterFunctor { [&](const int k,Real& result) { #ifndef HOMMEXX_BFB_TESTING if(diff_as_real(k) < 0){ - printf("WARNING:CAAR: dp3d too small. k=%d, dp3d(k)=%f, dp0=%f \n", - k+1,dp_as_real(k),dp0_as_real(k)); + Kokkos::printf("WARNING:CAAR: dp3d too small. k=%d, dp3d(k)=%f, dp0=%f \n", + k+1,dp_as_real(k),dp0_as_real(k)); } #endif result = result<=diff_as_real(k) ? result : diff_as_real(k); @@ -202,8 +202,8 @@ struct LimiterFunctor { for (int ivec=0; ivec Date: Thu, 5 Sep 2024 16:33:09 -0600 Subject: [PATCH 04/12] HOMME: prefer to use int to bool in a few places --- components/homme/src/share/control_mod.F90 | 1 + components/homme/src/share/cxx/GllFvRemap.cpp | 4 +- components/homme/src/share/cxx/GllFvRemap.hpp | 4 +- .../homme/src/share/cxx/GllFvRemapImpl.cpp | 8 ++-- .../homme/src/share/cxx/GllFvRemapImpl.hpp | 5 ++- .../homme/src/share/cxx/SimulationParams.hpp | 4 +- components/homme/src/share/gllfvremap_mod.F90 | 14 +++---- components/homme/src/share/namelist_mod.F90 | 6 +++ .../src/theta-l_kokkos/cxx/CamForcing.cpp | 2 +- .../theta-l_kokkos/cxx/EquationOfState.hpp | 4 +- .../src/theta-l_kokkos/cxx/ForcingFunctor.hpp | 4 +- .../cxx/HyperviscosityFunctorImpl.cpp | 8 +++- .../cxx/HyperviscosityFunctorImpl.hpp | 2 +- .../cxx/cxx_f90_interface_theta.cpp | 19 +++++----- .../src/theta-l_kokkos/prim_driver_mod.F90 | 38 ++++++++++++------- .../src/theta-l_kokkos/theta_f2c_mod.F90 | 12 +++--- 16 files changed, 79 insertions(+), 56 deletions(-) diff --git a/components/homme/src/share/control_mod.F90 b/components/homme/src/share/control_mod.F90 index 0e9494f5a6cd..9c3c599b2324 100644 --- a/components/homme/src/share/control_mod.F90 +++ b/components/homme/src/share/control_mod.F90 @@ -43,6 +43,7 @@ module control_mod ! flag used by preqx, theta-l and theta-c models ! should be renamed to "hydrostatic_mode" logical, public :: theta_hydrostatic_mode + integer, public :: theta_hydrostatic_mode_integer integer, public :: tstep_type= 5 ! preqx timestepping options diff --git a/components/homme/src/share/cxx/GllFvRemap.cpp b/components/homme/src/share/cxx/GllFvRemap.cpp index e36dbc14d74f..a8f564958d46 100644 --- a/components/homme/src/share/cxx/GllFvRemap.cpp +++ b/components/homme/src/share/cxx/GllFvRemap.cpp @@ -16,7 +16,7 @@ namespace Homme { void init_gllfvremap_c (int nelemd, int np, int nf, int nf_max, - bool theta_hydrostatic_mode, + int theta_hydrostatic_mode, CF90Ptr fv_metdet, CF90Ptr g2f_remapd, CF90Ptr f2g_remapd, CF90Ptr D_f, CF90Ptr Dinv_f) { auto& c = Context::singleton(); @@ -52,7 +52,7 @@ void GllFvRemap::init_boundary_exchanges () { } void GllFvRemap -::init_data (const int nf, const int nf_max, bool theta_hydrostatic_mode, +::init_data (const int nf, const int nf_max, const int theta_hydrostatic_mode, const Real* fv_metdet, const Real* g2f_remapd, const Real* f2g_remapd, const Real* D_f, const Real* Dinv_f) { m_impl->init_data(nf, nf_max, theta_hydrostatic_mode, fv_metdet, diff --git a/components/homme/src/share/cxx/GllFvRemap.hpp b/components/homme/src/share/cxx/GllFvRemap.hpp index 07e4bf58a903..2adff0aeaa96 100644 --- a/components/homme/src/share/cxx/GllFvRemap.hpp +++ b/components/homme/src/share/cxx/GllFvRemap.hpp @@ -40,7 +40,7 @@ class GllFvRemap { typedef Phys2T::const_type CPhys2T; typedef Phys3T::const_type CPhys3T; - void init_data(const int nf, const int nf_max, bool theta_hydrostatic_mode, + void init_data(const int nf, const int nf_max, const int theta_hydrostatic_mode, const Real* fv_metdet, const Real* g2f_remapd, const Real* f2g_remapd, const Real* D_f, const Real* Dinv_f); @@ -81,7 +81,7 @@ class GllFvRemap { extern "C" void init_gllfvremap_c(int nelemd, int np, int nf, int nf_max, - const bool theta_hydrostatic_mode, + const int theta_hydrostatic_mode, CF90Ptr fv_metdet, CF90Ptr g2f_remapd, CF90Ptr f2g_remapd, CF90Ptr D_f, CF90Ptr Dinv_f); diff --git a/components/homme/src/share/cxx/GllFvRemapImpl.cpp b/components/homme/src/share/cxx/GllFvRemapImpl.cpp index 6148f69cfa9c..d4ab5c89f510 100644 --- a/components/homme/src/share/cxx/GllFvRemapImpl.cpp +++ b/components/homme/src/share/cxx/GllFvRemapImpl.cpp @@ -131,7 +131,7 @@ void GllFvRemapImpl::init_boundary_exchanges () { template using FV = Kokkos::View; void GllFvRemapImpl -::init_data (const int nf, const int nf_max, const bool theta_hydrostatic_mode, +::init_data (const int nf, const int nf_max, const int theta_hydrostatic_mode, const Real* fv_metdet_r, const Real* g2f_remapd_r, const Real* f2g_remapd_r, const Real* D_f_r, const Real* Dinv_f_r) { using Kokkos::create_mirror_view; @@ -142,7 +142,7 @@ ::init_data (const int nf, const int nf_max, const bool theta_hydrostatic_mode, " nf must be > 1.", Errors::err_not_implemented); auto& sp = Context::singleton().get(); - m_data.use_moisture = sp.moisture == MoistDry::MOIST; + m_data.use_moisture = sp.use_moisture; // Only in the unit test gllfvremap_ut does theta_hydrostatic_mode not already // == sp.theta_hydrostatic_mode. m_data.theta_hydrostatic_mode = sp.theta_hydrostatic_mode = theta_hydrostatic_mode; @@ -395,7 +395,7 @@ ::run_dyn_to_fv_phys (const int timeidx, const Phys1T& ps, const Phys1T& phis, c const auto hvcoord = m_hvcoord; const bool use_moisture = m_data.use_moisture; - const bool theta_hydrostatic_mode = m_data.theta_hydrostatic_mode; + const int theta_hydrostatic_mode = m_data.theta_hydrostatic_mode; const bool want_dp_fv_out = dp_fv_out_ptr != nullptr; VPhys2T dp_fv_out; @@ -605,7 +605,7 @@ run_fv_phys_to_dyn (const int timeidx, const CPhys2T& Ts, const CPhys3T& uvs, const auto fT = m_forcing.m_ft; const auto hvcoord = m_hvcoord; const auto dp3d = m_state.m_dp3d; - const bool theta_hydrostatic_mode = m_data.theta_hydrostatic_mode; + const int theta_hydrostatic_mode = m_data.theta_hydrostatic_mode; EquationOfState eos; eos.init(theta_hydrostatic_mode, hvcoord); ElementOps ops; ops.init(hvcoord); const auto tu_ne = m_tu_ne; diff --git a/components/homme/src/share/cxx/GllFvRemapImpl.hpp b/components/homme/src/share/cxx/GllFvRemapImpl.hpp index 11738b2bf455..7388fddb1231 100644 --- a/components/homme/src/share/cxx/GllFvRemapImpl.hpp +++ b/components/homme/src/share/cxx/GllFvRemapImpl.hpp @@ -60,7 +60,8 @@ struct GllFvRemapImpl { struct Data { int nelemd, qsize, nf2, n_dss_fld; - bool use_moisture, theta_hydrostatic_mode; + bool use_moisture; + int theta_hydrostatic_mode; static constexpr int nbuf1 = 2, nbuf2 = 1; Buf1 buf1[nbuf1]; @@ -107,7 +108,7 @@ struct GllFvRemapImpl { void init_buffers(const FunctorsBuffersManager& fbm); void init_boundary_exchanges(); - void init_data(const int nf, const int nf_max, const bool theta_hydrostatic_mode, + void init_data(const int nf, const int nf_max, const int theta_hydrostatic_mode, const Real* fv_metdet_r, const Real* g2f_remapd_r, const Real* f2g_remapd_r, const Real* D_f_r, const Real* Dinv_f_r); diff --git a/components/homme/src/share/cxx/SimulationParams.hpp b/components/homme/src/share/cxx/SimulationParams.hpp index b435911da2e6..4f36962b16c3 100644 --- a/components/homme/src/share/cxx/SimulationParams.hpp +++ b/components/homme/src/share/cxx/SimulationParams.hpp @@ -23,7 +23,7 @@ struct SimulationParams void print(std::ostream& out = std::cout); TimeStepType time_step_type; - MoistDry moisture; + bool use_moisture; RemapAlg remap_alg; TestCase test_case; ForcingAlg ftype = ForcingAlg::FORCING_OFF; @@ -77,7 +77,7 @@ inline void SimulationParams::print (std::ostream& out) { out << "\n************** CXX SimulationParams **********************\n\n"; out << " time_step_type: " << etoi(time_step_type) << "\n"; - out << " moisture: " << (moisture==MoistDry::DRY ? "dry" : "moist") << "\n"; + out << " use_moisture: " << (use_moisture ? "moist" : "dry") << "\n"; out << " remap_alg: " << etoi(remap_alg) << "\n"; out << " test case: " << etoi(test_case) << "\n"; out << " ftype: " << etoi(ftype) << "\n"; diff --git a/components/homme/src/share/gllfvremap_mod.F90 b/components/homme/src/share/gllfvremap_mod.F90 index e0e0fa6c4daa..e927f04aba06 100644 --- a/components/homme/src/share/gllfvremap_mod.F90 +++ b/components/homme/src/share/gllfvremap_mod.F90 @@ -265,22 +265,22 @@ end subroutine gfr_init subroutine gfr_init_hxx() bind(c) #if KOKKOS_TARGET - use control_mod, only: theta_hydrostatic_mode - use iso_c_binding, only: c_bool + use control_mod, only: theta_hydrostatic_mode_integer + use iso_c_binding, only: c_int interface - subroutine init_gllfvremap_c(nelemd, np, nf, nf_max, theta_hydrostatic_mode, & + subroutine init_gllfvremap_c(nelemd, np, nf, nf_max, theta_hydrostatic_mode_integer, & fv_metdet, g2f_remapd, f2g_remapd, D_f, Dinv_f) bind(c) - use iso_c_binding, only: c_bool, c_int, c_double + use iso_c_binding, only: c_int, c_double integer (c_int), value, intent(in) :: nelemd, np, nf, nf_max - logical (c_bool), value, intent(in) :: theta_hydrostatic_mode + integer (c_int), value, intent(in) :: theta_hydrostatic_mode_integer real (c_double), dimension(nf*nf,nelemd), intent(in) :: fv_metdet real (c_double), dimension(np,np,nf_max*nf_max), intent(in) :: g2f_remapd real (c_double), dimension(nf_max*nf_max,np,np), intent(in) :: f2g_remapd real (c_double), dimension(nf*nf,2,2,nelemd), intent(in) :: D_f, Dinv_f end subroutine init_gllfvremap_c end interface - logical (c_bool) :: thm - thm = theta_hydrostatic_mode + integer (c_int) :: thm + thm = theta_hydrostatic_mode_integer call init_gllfvremap_c(nelemd, np, gfr%nphys, nphys_max, thm, & gfr%fv_metdet, gfr%g2f_remapd, gfr%f2g_remapd, gfr%D_f, gfr%Dinv_f) #endif diff --git a/components/homme/src/share/namelist_mod.F90 b/components/homme/src/share/namelist_mod.F90 index 1d47090182ba..a3edaa07e235 100644 --- a/components/homme/src/share/namelist_mod.F90 +++ b/components/homme/src/share/namelist_mod.F90 @@ -41,6 +41,7 @@ module namelist_mod runtype, & integration, & ! integration method theta_hydrostatic_mode, & + theta_hydrostatic_mode_integer, & transport_alg , & ! SE Eulerian, classical SL, cell-integrated SL semi_lagrange_cdr_alg, & ! see control_mod for semi_lagrange_* descriptions semi_lagrange_cdr_check, & @@ -452,8 +453,10 @@ subroutine readnl(par) planar_slice = .false. theta_hydrostatic_mode = .true. ! for preqx, this must be .true. + theta_hydrostatic_mode_integer = 1 ! for preqx, this must be .true. #if ( defined MODEL_THETA_C || defined MODEL_THETA_L ) theta_hydrostatic_mode = .false. ! default NH + theta_hydrostatic_mode_integer = 0 ! default NH #endif @@ -850,7 +853,10 @@ subroutine readnl(par) call MPI_bcast(case_planar_bubble,1,MPIlogical_t,par%root,par%comm,ierr) #endif +if(theta_hydrostatic_mode) theta_hydrostatic_mode_integer = 1 +if(.not. theta_hydrostatic_mode) theta_hydrostatic_mode_integer = 0 call MPI_bcast(theta_hydrostatic_mode ,1,MPIlogical_t,par%root,par%comm,ierr) + call MPI_bcast(theta_hydrostatic_mode_integer ,1,MPIinteger_t,par%root,par%comm,ierr) call MPI_bcast(transport_alg ,1,MPIinteger_t,par%root,par%comm,ierr) call MPI_bcast(semi_lagrange_cdr_alg ,1,MPIinteger_t,par%root,par%comm,ierr) call MPI_bcast(semi_lagrange_cdr_check ,1,MPIlogical_t,par%root,par%comm,ierr) diff --git a/components/homme/src/theta-l_kokkos/cxx/CamForcing.cpp b/components/homme/src/theta-l_kokkos/cxx/CamForcing.cpp index 02b999db16e9..bd7cee3e7c0a 100644 --- a/components/homme/src/theta-l_kokkos/cxx/CamForcing.cpp +++ b/components/homme/src/theta-l_kokkos/cxx/CamForcing.cpp @@ -33,7 +33,7 @@ static void apply_cam_forcing_tracers(const Real dt, ForcingFunctor& ff, if ( p.ftype == ForcingAlg::FORCING_2) adjustment = true; #endif - ff.tracers_forcing(dt, tl.n0, tl.n0_qdp, adjustment, p.moisture); + ff.tracers_forcing(dt, tl.n0, tl.n0_qdp, adjustment, p.use_moisture); GPTLstop("ApplyCAMForcing_tracers"); } diff --git a/components/homme/src/theta-l_kokkos/cxx/EquationOfState.hpp b/components/homme/src/theta-l_kokkos/cxx/EquationOfState.hpp index dd97720f1be2..a50a28d58f55 100644 --- a/components/homme/src/theta-l_kokkos/cxx/EquationOfState.hpp +++ b/components/homme/src/theta-l_kokkos/cxx/EquationOfState.hpp @@ -23,7 +23,7 @@ class EquationOfState { EquationOfState () = default; - void init (const bool theta_hydrostatic_mode, + void init (const int theta_hydrostatic_mode, const HybridVCoord& hvcoord) { m_theta_hydrostatic_mode = theta_hydrostatic_mode; m_hvcoord = hvcoord; @@ -250,7 +250,7 @@ class EquationOfState { public: - bool m_theta_hydrostatic_mode; + int m_theta_hydrostatic_mode; HybridVCoord m_hvcoord; }; diff --git a/components/homme/src/theta-l_kokkos/cxx/ForcingFunctor.hpp b/components/homme/src/theta-l_kokkos/cxx/ForcingFunctor.hpp index 28a702c1d273..00fa1deef667 100644 --- a/components/homme/src/theta-l_kokkos/cxx/ForcingFunctor.hpp +++ b/components/homme/src/theta-l_kokkos/cxx/ForcingFunctor.hpp @@ -236,7 +236,7 @@ class ForcingFunctor }); } - void tracers_forcing (const Real dt, const int np1, const int np1_qdp, const bool adjustment, const MoistDry moisture) { + void tracers_forcing (const Real dt, const int np1, const int np1_qdp, const bool adjustment, const bool use_moisture) { // The Functor needs to be fully setup to use this function assert (is_setup); @@ -245,7 +245,7 @@ class ForcingFunctor m_np1_qdp = np1_qdp; m_adjustment = adjustment; - m_moist = (moisture==MoistDry::MOIST); + m_moist = use_moisture; Kokkos::parallel_for("temperature, NH perturb press, FQps",m_policy_tracers_pre,*this); Kokkos::fence(); diff --git a/components/homme/src/theta-l_kokkos/cxx/HyperviscosityFunctorImpl.cpp b/components/homme/src/theta-l_kokkos/cxx/HyperviscosityFunctorImpl.cpp index 046e6f9956d4..55792051d33b 100644 --- a/components/homme/src/theta-l_kokkos/cxx/HyperviscosityFunctorImpl.cpp +++ b/components/homme/src/theta-l_kokkos/cxx/HyperviscosityFunctorImpl.cpp @@ -118,9 +118,13 @@ void HyperviscosityFunctorImpl::init_params(const SimulationParams& params) m_eos.init(params.theta_hydrostatic_mode,m_hvcoord); #ifdef HOMMEXX_BFB_TESTING - m_process_nh_vars = true; + m_process_nh_vars = 1; #else - m_process_nh_vars = !params.theta_hydrostatic_mode; + if (params.theta_hydrostatic_mode){ + m_process_nh_vars = 0; + }else{ + m_process_nh_vars = 1; + } #endif } diff --git a/components/homme/src/theta-l_kokkos/cxx/HyperviscosityFunctorImpl.hpp b/components/homme/src/theta-l_kokkos/cxx/HyperviscosityFunctorImpl.hpp index a55ecbb365f9..993d525422f5 100644 --- a/components/homme/src/theta-l_kokkos/cxx/HyperviscosityFunctorImpl.hpp +++ b/components/homme/src/theta-l_kokkos/cxx/HyperviscosityFunctorImpl.hpp @@ -397,7 +397,7 @@ class HyperviscosityFunctorImpl Buffers m_buffers; HybridVCoord m_hvcoord; - bool m_process_nh_vars; + int m_process_nh_vars; // Policies Kokkos::TeamPolicy m_policy_update_states; diff --git a/components/homme/src/theta-l_kokkos/cxx/cxx_f90_interface_theta.cpp b/components/homme/src/theta-l_kokkos/cxx/cxx_f90_interface_theta.cpp index ec4e2cbe6328..40c4ae64dc98 100644 --- a/components/homme/src/theta-l_kokkos/cxx/cxx_f90_interface_theta.cpp +++ b/components/homme/src/theta-l_kokkos/cxx/cxx_f90_interface_theta.cpp @@ -43,12 +43,13 @@ void init_simulation_params_c (const int& remap_alg, const int& limiter_option, const Real& nu, const Real& nu_p, const Real& nu_q, const Real& nu_s, const Real& nu_div, const Real& nu_top, const int& hypervis_order, const int& hypervis_subcycle, const int& hypervis_subcycle_tom, const double& hypervis_scaling, const double& dcmip16_mu, - const int& ftype, const int& theta_adv_form, const bool& prescribed_wind, const bool& moisture, const bool& disable_diagnostics, - const bool& use_cpstar, const int& transport_alg, const bool& theta_hydrostatic_mode, const char** test_case, + const int& ftype, const int& theta_adv_form, const int& prescribed_wind, const int& use_moisture, const int& disable_diagnostics, + const int& use_cpstar, const int& transport_alg, const int& theta_hydrostatic_mode, const char** test_case, const int& dt_remap_factor, const int& dt_tracer_factor, - const double& scale_factor, const double& laplacian_rigid_factor, const int& nsplit, const bool& pgrad_correction, + const double& scale_factor, const double& laplacian_rigid_factor, const int& nsplit, const int& pgrad_correction, const double& dp3d_thresh, const double& vtheta_thresh, const int& internal_diagnostics_level) { + // Check that the simulation options are supported. This helps us in the future, since we // are currently 'assuming' some option have/not have certain values. As we support for more // options in the C++ build, we will remove some checks @@ -111,16 +112,16 @@ void init_simulation_params_c (const int& remap_alg, const int& limiter_option, params.hypervis_subcycle = hypervis_subcycle; params.hypervis_subcycle_tom = hypervis_subcycle_tom; params.hypervis_scaling = hypervis_scaling; - params.disable_diagnostics = disable_diagnostics; - params.moisture = (moisture ? MoistDry::MOIST : MoistDry::DRY); - params.use_cpstar = use_cpstar; + params.disable_diagnostics = (bool)disable_diagnostics; + params.use_moisture = (bool)use_moisture; + params.use_cpstar = (bool)use_cpstar; params.transport_alg = transport_alg; - params.theta_hydrostatic_mode = theta_hydrostatic_mode; + params.theta_hydrostatic_mode = (bool)theta_hydrostatic_mode; params.dcmip16_mu = dcmip16_mu; params.nsplit = nsplit; params.scale_factor = scale_factor; params.laplacian_rigid_factor = laplacian_rigid_factor; - params.pgrad_correction = pgrad_correction; + params.pgrad_correction = (bool)pgrad_correction; params.dp3d_thresh = dp3d_thresh; params.vtheta_thresh = vtheta_thresh; params.internal_diagnostics_level = internal_diagnostics_level; @@ -304,7 +305,7 @@ void init_elements_c (const int& num_elems) c.create_ref(e.m_forcing); } -void init_functors_c (const bool& allocate_buffer) +void init_functors_c (const int& allocate_buffer) { auto& c = Context::singleton(); diff --git a/components/homme/src/theta-l_kokkos/prim_driver_mod.F90 b/components/homme/src/theta-l_kokkos/prim_driver_mod.F90 index 96b42314453f..262ba19f4b7a 100644 --- a/components/homme/src/theta-l_kokkos/prim_driver_mod.F90 +++ b/components/homme/src/theta-l_kokkos/prim_driver_mod.F90 @@ -64,7 +64,7 @@ subroutine prim_init2(elem, hybrid, nets, nete, tl, hvcoord) end subroutine prim_init2 subroutine prim_create_c_data_structures (tl, hvcoord, mp) - use iso_c_binding, only : c_loc, c_ptr, c_bool, C_NULL_CHAR + use iso_c_binding, only : c_loc, c_ptr, C_NULL_CHAR use theta_f2c_mod, only : init_reference_element_c, init_simulation_params_c, & init_time_level_c, init_hvcoord_c, init_elements_c use time_mod, only : TimeLevel_t, nsplit @@ -73,7 +73,7 @@ subroutine prim_create_c_data_structures (tl, hvcoord, mp) nu, nu_p, nu_q, nu_s, nu_div, nu_top, vert_remap_q_alg, & hypervis_order, hypervis_subcycle, hypervis_subcycle_tom,& hypervis_scaling, & - ftype, prescribed_wind, moisture, disable_diagnostics, & + ftype, prescribed_wind, use_moisture, disable_diagnostics, & use_cpstar, transport_alg, theta_hydrostatic_mode, & dcmip16_mu, theta_advect_form, test_case, & MAX_STRING_LEN, dt_remap_factor, dt_tracer_factor, & @@ -93,6 +93,8 @@ subroutine prim_create_c_data_structures (tl, hvcoord, mp) type (c_ptr) :: hybrid_am_ptr, hybrid_ai_ptr, hybrid_bm_ptr, hybrid_bi_ptr character(len=MAX_STRING_LEN), target :: test_name + integer :: disable_diagnostics_int, theta_hydrostatic_mode_int, use_moisture_int + ! Initialize the C++ reference element structure (i.e., pseudo-spectral deriv matrix and ref element mass matrix) dvv = deriv1%dvv elem_mp = mp @@ -100,22 +102,30 @@ subroutine prim_create_c_data_structures (tl, hvcoord, mp) ! Fill the simulation params structures in C++ test_name = TRIM(test_case) // C_NULL_CHAR + + if (disable_diagnostics) disable_diagnostics_int=1 + if (.not.disable_diagnostics) disable_diagnostics_int=0 + if (use_moisture) use_moisture_int=1 + if (.not.use_moisture) use_moisture_int=0 + if(theta_hydrostatic_mode) theta_hydrostatic_mode_int=1 + if(.not.theta_hydrostatic_mode) theta_hydrostatic_mode_int=0 + call init_simulation_params_c (vert_remap_q_alg, limiter_option, rsplit, qsplit, tstep_type, & qsize, statefreq, nu, nu_p, nu_q, nu_s, nu_div, nu_top, & hypervis_order, hypervis_subcycle, hypervis_subcycle_tom, & hypervis_scaling, & dcmip16_mu, ftype, theta_advect_form, & - LOGICAL(prescribed_wind==1,c_bool), & - LOGICAL(moisture/="dry",c_bool), & - LOGICAL(disable_diagnostics,c_bool), & - LOGICAL(use_cpstar==1,c_bool), & + prescribed_wind, & + use_moisture_int, & + disable_diagnostics_int, & + use_cpstar, & transport_alg, & - LOGICAL(theta_hydrostatic_mode,c_bool), & + theta_hydrostatic_mode_int, & c_loc(test_name), & dt_remap_factor, dt_tracer_factor, & scale_factor, laplacian_rigid_factor, & nsplit, & - LOGICAL(pgrad_correction==1,c_bool), & + pgrad_correction, & dp3d_thresh, vtheta_thresh, internal_diagnostics_level) ! Initialize time level structure in C++ @@ -343,21 +353,21 @@ subroutine prim_init_elements_views (elem) end subroutine prim_init_elements_views subroutine prim_init_kokkos_functors (allocate_buffer) - use iso_c_binding, only : c_bool + use iso_c_binding, only : c_int use theta_f2c_mod, only : init_functors_c, init_boundary_exchanges_c - ! ! Optional Input ! - logical(kind=c_bool), optional :: allocate_buffer ! Whether functor memory buffer should be allocated internally - + integer, intent(in), optional :: allocate_buffer ! Whether functor memory buffer should be allocated internally + integer(kind=c_int) :: dummy ! Initialize the C++ functors in the C++ context ! If no argument allocate_buffer is present, ! let Homme internally allocate buffers if (present(allocate_buffer)) then - call init_functors_c (logical(allocate_buffer,c_bool)) + call init_functors_c (allocate_buffer) else - call init_functors_c (logical(.true.,c_bool)) + dummy=1; + call init_functors_c (dummy) endif ! Initialize boundary exchange structure in C++ diff --git a/components/homme/src/theta-l_kokkos/theta_f2c_mod.F90 b/components/homme/src/theta-l_kokkos/theta_f2c_mod.F90 index 7a4c0424807b..ba39bb03c22b 100644 --- a/components/homme/src/theta-l_kokkos/theta_f2c_mod.F90 +++ b/components/homme/src/theta-l_kokkos/theta_f2c_mod.F90 @@ -11,14 +11,14 @@ subroutine init_simulation_params_c (remap_alg, limiter_option, rsplit, qsplit, qsize, state_frequency, nu, nu_p, nu_q, nu_s, nu_div, nu_top, & hypervis_order, hypervis_subcycle, hypervis_subcycle_tom, & hypervis_scaling, & - dcmip16_mu, ftype, theta_adv_form, prescribed_wind, moisture, & + dcmip16_mu, ftype, theta_adv_form, prescribed_wind, use_moisture, & disable_diagnostics, use_cpstar, transport_alg, & theta_hydrostatic_mode, test_case_name, dt_remap_factor, & dt_tracer_factor, scale_factor, laplacian_rigid_factor, & nsplit, pgrad_correction, dp3d_thresh, vtheta_thresh, & internal_diagnostics_level) bind(c) - use iso_c_binding, only: c_int, c_bool, c_double, c_ptr + use iso_c_binding, only: c_int, c_double, c_ptr ! ! Inputs ! @@ -29,8 +29,8 @@ subroutine init_simulation_params_c (remap_alg, limiter_option, rsplit, qsplit, scale_factor, laplacian_rigid_factor, dp3d_thresh, vtheta_thresh integer(kind=c_int), intent(in) :: hypervis_order, hypervis_subcycle, hypervis_subcycle_tom integer(kind=c_int), intent(in) :: ftype, theta_adv_form - logical(kind=c_bool), intent(in) :: prescribed_wind, moisture, disable_diagnostics, use_cpstar - logical(kind=c_bool), intent(in) :: theta_hydrostatic_mode, pgrad_correction + integer(kind=c_int), intent(in) :: prescribed_wind, use_moisture, disable_diagnostics, use_cpstar + integer(kind=c_int), intent(in) :: theta_hydrostatic_mode, pgrad_correction type(c_ptr), intent(in) :: test_case_name end subroutine init_simulation_params_c @@ -138,11 +138,11 @@ end subroutine init_reference_element_c ! Create C++ functors subroutine init_functors_c (allocate_buffer) bind(c) - use iso_c_binding, only: c_bool + use iso_c_binding, only: c_int ! ! Inputs ! - logical(kind=c_bool), intent(in) :: allocate_buffer + integer(kind=c_int), intent(in) :: allocate_buffer end subroutine init_functors_c ! Initialize C++ boundary exchange structures From 0200065081910233ce6c861471b062d3e0608a2a Mon Sep 17 00:00:00 2001 From: Oksana Guba Date: Thu, 5 Sep 2024 16:33:51 -0600 Subject: [PATCH 05/12] HOMME: some SYCL related mods in kokkos initialization and team policy defaults --- .../homme/src/share/cxx/ExecSpaceDefs.cpp | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/components/homme/src/share/cxx/ExecSpaceDefs.cpp b/components/homme/src/share/cxx/ExecSpaceDefs.cpp index 8d496bff5d16..c9ca8a0ecd93 100644 --- a/components/homme/src/share/cxx/ExecSpaceDefs.cpp +++ b/components/homme/src/share/cxx/ExecSpaceDefs.cpp @@ -21,6 +21,10 @@ #include #endif +#ifdef KOKKOS_ENABLE_SYCL +#include +#endif + namespace Homme { // Since we're initializing from inside a Fortran code and don't have access to @@ -52,7 +56,16 @@ void initialize_kokkos () { // It isn't a big deal if we can't get the device count. nd = 1; } +#elif defined(KOKKOS_ENABLE_SYCL) + +//https://developer.codeplay.com/products/computecpp/ce/2.11.0/guides/sycl-for-cuda-developers/migrating-from-cuda-to-sycl + +//to make it build + int nd = 1; + #endif + + #ifdef HOMMEXX_ENABLE_GPU std::stringstream ss; ss << "--kokkos-num-devices=" << nd; @@ -117,6 +130,7 @@ team_num_threads_vectors_for_gpu ( assert(num_warps_total >= max_num_warps); assert(tp.max_threads_usable >= 1 && tp.max_vectors_usable >= 1); +#ifndef KOKKOS_ENABLE_SYCL int num_warps; if (tp.prefer_larger_team) { const int num_warps_usable = @@ -161,6 +175,9 @@ team_num_threads_vectors_for_gpu ( return std::make_pair( num_device_threads / num_vectors, num_vectors ); } +#else + return std::make_pair(4,16); +#endif } } // namespace Parallel From 5a43946dcf66085a54e64af76c8c5fade9a0e575 Mon Sep 17 00:00:00 2001 From: Oksana Guba Date: Thu, 5 Sep 2024 16:34:39 -0600 Subject: [PATCH 06/12] HOMME: do not add dependency on cprnc if building without PIO --- components/homme/test_execs/CMakeLists.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/components/homme/test_execs/CMakeLists.txt b/components/homme/test_execs/CMakeLists.txt index a3113921b029..a007a5532b61 100644 --- a/components/homme/test_execs/CMakeLists.txt +++ b/components/homme/test_execs/CMakeLists.txt @@ -142,8 +142,11 @@ ADD_CUSTOM_TARGET(test-execs) ADD_CUSTOM_TARGET(check COMMAND ${CMAKE_CTEST_COMMAND} "--output-on-failure") +if(NOT BUILD_HOMME_WITHOUT_PIOLIBRARY) # Force cprnc to be built when make check is run ADD_DEPENDENCIES(check cprnc) +endif() + # Create a target for making the reference data ADD_CUSTOM_TARGET(baseline From 132b3a506b8a90274bcb9cc5f7c67cf2eafdfb20 Mon Sep 17 00:00:00 2001 From: Oksana Guba Date: Thu, 5 Sep 2024 16:30:50 -0600 Subject: [PATCH 07/12] HOMME: minor GPTL timing related mods * Disable timing in prim_main for the first two teps * Add some timers in EulerStepFunctor --- components/homme/src/prim_main.F90 | 7 ++++++- components/homme/src/share/cxx/EulerStepFunctorImpl.hpp | 5 +++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/components/homme/src/prim_main.F90 b/components/homme/src/prim_main.F90 index bfbe57e8b317..d6901151d365 100644 --- a/components/homme/src/prim_main.F90 +++ b/components/homme/src/prim_main.F90 @@ -20,7 +20,7 @@ program prim_main use element_mod, only: element_t use common_io_mod, only: output_dir, infilenames use common_movie_mod, only: nextoutputstep - use perf_mod, only: t_initf, t_prf, t_finalizef, t_startf, t_stopf ! _EXTERNAL + use perf_mod, only: t_initf, t_prf, t_finalizef, t_startf, t_stopf, t_disablef, t_enablef ! _EXTERNAL use restart_io_mod , only: restartheader_t, writerestart use hybrid_mod, only: hybrid_create #if (defined MODEL_THETA_L && defined ARKODE) @@ -240,6 +240,11 @@ end subroutine finalize_kokkos_f90 nstep = nextoutputstep(tl) do while(tl%nstep= 2) call t_enablef() call t_startf('prim_run') call prim_run_subcycle(elem, hybrid,nets,nete, tstep, .false., tl, hvcoord,1) call t_stopf('prim_run') diff --git a/components/homme/src/share/cxx/EulerStepFunctorImpl.hpp b/components/homme/src/share/cxx/EulerStepFunctorImpl.hpp index f3029764dac3..f87bb108bebf 100644 --- a/components/homme/src/share/cxx/EulerStepFunctorImpl.hpp +++ b/components/homme/src/share/cxx/EulerStepFunctorImpl.hpp @@ -652,7 +652,10 @@ class EulerStepFunctorImpl { minmax_and_biharmonic(); } } + + GPTLstart("tl-at adv-n-limit"); advect_and_limit(); + GPTLstop("tl-at adv-n-limit"); exchange_qdp_dss_var(); } @@ -667,6 +670,7 @@ class EulerStepFunctorImpl { void run_tracer_phase (const KernelVariables& kv) const { compute_qtens(kv); kv.team_barrier(); + if (m_data.limiter_option == 8) { limiter_optim_iter_full(kv); kv.team_barrier(); @@ -674,6 +678,7 @@ class EulerStepFunctorImpl { limiter_clip_and_sum(kv); kv.team_barrier(); } + apply_spheremp(kv); } From 30b9f60a441723c13a6e111767b234f8eb482f73 Mon Sep 17 00:00:00 2001 From: "Andrew M. Bradley" Date: Mon, 9 Sep 2024 14:14:42 -0500 Subject: [PATCH 08/12] Hommexx: Isolate int-bool workaround to just the C++-F90 interface code. Also fix preqx's use of use_moisture. Remove MOIST-DRY enum to avoid confusion, since it's no longer used. --- .../homme/src/preqx_kokkos/cxx/CamForcing.cpp | 6 ++--- .../cxx/cxx_f90_interface_preqx.cpp | 4 +-- .../src/preqx_kokkos/cxx/prim_advance_exp.cpp | 2 +- components/homme/src/share/control_mod.F90 | 1 - components/homme/src/share/cxx/GllFvRemap.cpp | 6 ++--- components/homme/src/share/cxx/GllFvRemap.hpp | 2 +- .../homme/src/share/cxx/GllFvRemapImpl.cpp | 6 ++--- .../homme/src/share/cxx/GllFvRemapImpl.hpp | 5 ++-- .../homme/src/share/cxx/HommexxEnums.hpp | 5 ---- components/homme/src/share/gllfvremap_mod.F90 | 9 ++++--- components/homme/src/share/namelist_mod.F90 | 6 ----- .../theta-l_kokkos/cxx/EquationOfState.hpp | 4 +-- .../cxx/HyperviscosityFunctorImpl.cpp | 6 +---- .../cxx/HyperviscosityFunctorImpl.hpp | 2 +- .../src/theta-l_kokkos/prim_driver_mod.F90 | 25 +++++++++---------- .../thetal_kokkos_ut/forcing_ut.cpp | 8 +++--- .../thetal_kokkos_ut/gllfvremap_ut.cpp | 4 +-- 17 files changed, 42 insertions(+), 59 deletions(-) diff --git a/components/homme/src/preqx_kokkos/cxx/CamForcing.cpp b/components/homme/src/preqx_kokkos/cxx/CamForcing.cpp index 2b1e6514389e..36ca5f4a95f4 100644 --- a/components/homme/src/preqx_kokkos/cxx/CamForcing.cpp +++ b/components/homme/src/preqx_kokkos/cxx/CamForcing.cpp @@ -51,7 +51,7 @@ void state_forcing( void tracer_forcing( const ExecViewUnmanaged &f_q, const HybridVCoord &hvcoord, const TimeLevel &tl, const int &num_q, - const MoistDry &moisture, const double &dt, + const bool &use_moisture, const double &dt, const ExecViewManaged &ps_v, const ExecViewManaged< Scalar * [Q_NUM_TIME_LEVELS][QSIZE_D][NP][NP][NUM_LEV]> &qdp, @@ -61,7 +61,7 @@ void tracer_forcing( const int np1 = tl.n0; const int np1_qdp = tl.n0_qdp; - if (moisture == MoistDry::MOIST) { + if (use_moisture) { // Remove the m_fq_ps_v buffer since it's not actually needed. // Instead apply the forcing to m_ps_v directly // Bonus - one less parallel reduce in dry cases! @@ -161,7 +161,7 @@ void apply_cam_forcing(const Real &dt) { tracers.fq = decltype(tracers.fq)("fq", elems.num_elems(),tracers.num_tracers()); } tracer_forcing(tracers.fq, hvcoord, tl, tracers.num_tracers(), - sim_params.moisture, dt, elems.m_state.m_ps_v, tracers.qdp, tracers.Q); + sim_params.use_moisture, dt, elems.m_state.m_ps_v, tracers.qdp, tracers.Q); GPTLstop("ApplyCAMForcing"); } diff --git a/components/homme/src/preqx_kokkos/cxx/cxx_f90_interface_preqx.cpp b/components/homme/src/preqx_kokkos/cxx/cxx_f90_interface_preqx.cpp index c75143a9836a..b433a48c2abc 100644 --- a/components/homme/src/preqx_kokkos/cxx/cxx_f90_interface_preqx.cpp +++ b/components/homme/src/preqx_kokkos/cxx/cxx_f90_interface_preqx.cpp @@ -37,7 +37,7 @@ void init_simulation_params_c (const int& remap_alg, const int& limiter_option, const int& time_step_type, const int& qsize, const int& state_frequency, const Real& nu, const Real& nu_p, const Real& nu_q, const Real& nu_s, const Real& nu_div, const Real& nu_top, const int& hypervis_order, const int& hypervis_subcycle, const double& hypervis_scaling, - const int& ftype, const bool& prescribed_wind, const bool& moisture, const bool& disable_diagnostics, + const int& ftype, const bool& prescribed_wind, const bool& use_moisture, const bool& disable_diagnostics, const bool& use_cpstar, const int& transport_alg, const int& dt_remap_factor, const int& dt_tracer_factor, const double& scale_factor, const double& laplacian_rigid_factor) @@ -90,7 +90,7 @@ void init_simulation_params_c (const int& remap_alg, const int& limiter_option, params.hypervis_subcycle = hypervis_subcycle; params.hypervis_scaling = hypervis_scaling; params.disable_diagnostics = disable_diagnostics; - params.moisture = (moisture ? MoistDry::MOIST : MoistDry::DRY); + params.use_moisture = use_moisture; params.use_cpstar = use_cpstar; params.transport_alg = transport_alg; // SphereOperators parameters; preqx supports only the sphere. diff --git a/components/homme/src/preqx_kokkos/cxx/prim_advance_exp.cpp b/components/homme/src/preqx_kokkos/cxx/prim_advance_exp.cpp index f7c7600aab8d..58e58f0160bf 100644 --- a/components/homme/src/preqx_kokkos/cxx/prim_advance_exp.cpp +++ b/components/homme/src/preqx_kokkos/cxx/prim_advance_exp.cpp @@ -34,7 +34,7 @@ void prim_advance_exp (TimeLevel& tl, const Real dt, const bool compute_diagnost // Determine the tracers time level tl.n0_qdp= -1; - if (params.moisture == MoistDry::MOIST) { + if (params.use_moisture) { tl.update_tracers_levels(params.qsplit); } diff --git a/components/homme/src/share/control_mod.F90 b/components/homme/src/share/control_mod.F90 index 9c3c599b2324..0e9494f5a6cd 100644 --- a/components/homme/src/share/control_mod.F90 +++ b/components/homme/src/share/control_mod.F90 @@ -43,7 +43,6 @@ module control_mod ! flag used by preqx, theta-l and theta-c models ! should be renamed to "hydrostatic_mode" logical, public :: theta_hydrostatic_mode - integer, public :: theta_hydrostatic_mode_integer integer, public :: tstep_type= 5 ! preqx timestepping options diff --git a/components/homme/src/share/cxx/GllFvRemap.cpp b/components/homme/src/share/cxx/GllFvRemap.cpp index a8f564958d46..7b0400427f38 100644 --- a/components/homme/src/share/cxx/GllFvRemap.cpp +++ b/components/homme/src/share/cxx/GllFvRemap.cpp @@ -21,8 +21,8 @@ void init_gllfvremap_c (int nelemd, int np, int nf, int nf_max, CF90Ptr f2g_remapd, CF90Ptr D_f, CF90Ptr Dinv_f) { auto& c = Context::singleton(); auto& g = c.get(); - g.init_data(nf, nf_max, theta_hydrostatic_mode, fv_metdet, g2f_remapd, - f2g_remapd, D_f, Dinv_f); + const bool thm = static_cast(theta_hydrostatic_mode); + g.init_data(nf, nf_max, thm, fv_metdet, g2f_remapd, f2g_remapd, D_f, Dinv_f); } GllFvRemap::GllFvRemap () { @@ -52,7 +52,7 @@ void GllFvRemap::init_boundary_exchanges () { } void GllFvRemap -::init_data (const int nf, const int nf_max, const int theta_hydrostatic_mode, +::init_data (const int nf, const int nf_max, const bool theta_hydrostatic_mode, const Real* fv_metdet, const Real* g2f_remapd, const Real* f2g_remapd, const Real* D_f, const Real* Dinv_f) { m_impl->init_data(nf, nf_max, theta_hydrostatic_mode, fv_metdet, diff --git a/components/homme/src/share/cxx/GllFvRemap.hpp b/components/homme/src/share/cxx/GllFvRemap.hpp index 2adff0aeaa96..7ebf5a82b71a 100644 --- a/components/homme/src/share/cxx/GllFvRemap.hpp +++ b/components/homme/src/share/cxx/GllFvRemap.hpp @@ -40,7 +40,7 @@ class GllFvRemap { typedef Phys2T::const_type CPhys2T; typedef Phys3T::const_type CPhys3T; - void init_data(const int nf, const int nf_max, const int theta_hydrostatic_mode, + void init_data(const int nf, const int nf_max, const bool theta_hydrostatic_mode, const Real* fv_metdet, const Real* g2f_remapd, const Real* f2g_remapd, const Real* D_f, const Real* Dinv_f); diff --git a/components/homme/src/share/cxx/GllFvRemapImpl.cpp b/components/homme/src/share/cxx/GllFvRemapImpl.cpp index d4ab5c89f510..ea1a52f5efdf 100644 --- a/components/homme/src/share/cxx/GllFvRemapImpl.cpp +++ b/components/homme/src/share/cxx/GllFvRemapImpl.cpp @@ -131,7 +131,7 @@ void GllFvRemapImpl::init_boundary_exchanges () { template using FV = Kokkos::View; void GllFvRemapImpl -::init_data (const int nf, const int nf_max, const int theta_hydrostatic_mode, +::init_data (const int nf, const int nf_max, const bool theta_hydrostatic_mode, const Real* fv_metdet_r, const Real* g2f_remapd_r, const Real* f2g_remapd_r, const Real* D_f_r, const Real* Dinv_f_r) { using Kokkos::create_mirror_view; @@ -395,7 +395,7 @@ ::run_dyn_to_fv_phys (const int timeidx, const Phys1T& ps, const Phys1T& phis, c const auto hvcoord = m_hvcoord; const bool use_moisture = m_data.use_moisture; - const int theta_hydrostatic_mode = m_data.theta_hydrostatic_mode; + const bool theta_hydrostatic_mode = m_data.theta_hydrostatic_mode; const bool want_dp_fv_out = dp_fv_out_ptr != nullptr; VPhys2T dp_fv_out; @@ -605,7 +605,7 @@ run_fv_phys_to_dyn (const int timeidx, const CPhys2T& Ts, const CPhys3T& uvs, const auto fT = m_forcing.m_ft; const auto hvcoord = m_hvcoord; const auto dp3d = m_state.m_dp3d; - const int theta_hydrostatic_mode = m_data.theta_hydrostatic_mode; + const bool theta_hydrostatic_mode = m_data.theta_hydrostatic_mode; EquationOfState eos; eos.init(theta_hydrostatic_mode, hvcoord); ElementOps ops; ops.init(hvcoord); const auto tu_ne = m_tu_ne; diff --git a/components/homme/src/share/cxx/GllFvRemapImpl.hpp b/components/homme/src/share/cxx/GllFvRemapImpl.hpp index 7388fddb1231..11738b2bf455 100644 --- a/components/homme/src/share/cxx/GllFvRemapImpl.hpp +++ b/components/homme/src/share/cxx/GllFvRemapImpl.hpp @@ -60,8 +60,7 @@ struct GllFvRemapImpl { struct Data { int nelemd, qsize, nf2, n_dss_fld; - bool use_moisture; - int theta_hydrostatic_mode; + bool use_moisture, theta_hydrostatic_mode; static constexpr int nbuf1 = 2, nbuf2 = 1; Buf1 buf1[nbuf1]; @@ -108,7 +107,7 @@ struct GllFvRemapImpl { void init_buffers(const FunctorsBuffersManager& fbm); void init_boundary_exchanges(); - void init_data(const int nf, const int nf_max, const int theta_hydrostatic_mode, + void init_data(const int nf, const int nf_max, const bool theta_hydrostatic_mode, const Real* fv_metdet_r, const Real* g2f_remapd_r, const Real* f2g_remapd_r, const Real* D_f_r, const Real* Dinv_f_r); diff --git a/components/homme/src/share/cxx/HommexxEnums.hpp b/components/homme/src/share/cxx/HommexxEnums.hpp index 59c8f3c9652c..06abbf35adbc 100644 --- a/components/homme/src/share/cxx/HommexxEnums.hpp +++ b/components/homme/src/share/cxx/HommexxEnums.hpp @@ -47,11 +47,6 @@ enum class ForcingAlg : int { FORCING_2 = 2, // TODO: Rename FORCING_1 and FORCING_2 to something more descriptive }; -enum class MoistDry { - MOIST, - DRY -}; - enum class AdvectionForm { Conservative, NonConservative diff --git a/components/homme/src/share/gllfvremap_mod.F90 b/components/homme/src/share/gllfvremap_mod.F90 index e927f04aba06..a5f9b3033c96 100644 --- a/components/homme/src/share/gllfvremap_mod.F90 +++ b/components/homme/src/share/gllfvremap_mod.F90 @@ -265,14 +265,14 @@ end subroutine gfr_init subroutine gfr_init_hxx() bind(c) #if KOKKOS_TARGET - use control_mod, only: theta_hydrostatic_mode_integer + use control_mod, only: theta_hydrostatic_mode use iso_c_binding, only: c_int interface - subroutine init_gllfvremap_c(nelemd, np, nf, nf_max, theta_hydrostatic_mode_integer, & + subroutine init_gllfvremap_c(nelemd, np, nf, nf_max, theta_hydrostatic_mode, & fv_metdet, g2f_remapd, f2g_remapd, D_f, Dinv_f) bind(c) use iso_c_binding, only: c_int, c_double integer (c_int), value, intent(in) :: nelemd, np, nf, nf_max - integer (c_int), value, intent(in) :: theta_hydrostatic_mode_integer + integer (c_int), value, intent(in) :: theta_hydrostatic_mode real (c_double), dimension(nf*nf,nelemd), intent(in) :: fv_metdet real (c_double), dimension(np,np,nf_max*nf_max), intent(in) :: g2f_remapd real (c_double), dimension(nf_max*nf_max,np,np), intent(in) :: f2g_remapd @@ -280,7 +280,8 @@ subroutine init_gllfvremap_c(nelemd, np, nf, nf_max, theta_hydrostatic_mode_inte end subroutine init_gllfvremap_c end interface integer (c_int) :: thm - thm = theta_hydrostatic_mode_integer + thm = 0 + if (theta_hydrostatic_mode) thm = 1 call init_gllfvremap_c(nelemd, np, gfr%nphys, nphys_max, thm, & gfr%fv_metdet, gfr%g2f_remapd, gfr%f2g_remapd, gfr%D_f, gfr%Dinv_f) #endif diff --git a/components/homme/src/share/namelist_mod.F90 b/components/homme/src/share/namelist_mod.F90 index a3edaa07e235..1d47090182ba 100644 --- a/components/homme/src/share/namelist_mod.F90 +++ b/components/homme/src/share/namelist_mod.F90 @@ -41,7 +41,6 @@ module namelist_mod runtype, & integration, & ! integration method theta_hydrostatic_mode, & - theta_hydrostatic_mode_integer, & transport_alg , & ! SE Eulerian, classical SL, cell-integrated SL semi_lagrange_cdr_alg, & ! see control_mod for semi_lagrange_* descriptions semi_lagrange_cdr_check, & @@ -453,10 +452,8 @@ subroutine readnl(par) planar_slice = .false. theta_hydrostatic_mode = .true. ! for preqx, this must be .true. - theta_hydrostatic_mode_integer = 1 ! for preqx, this must be .true. #if ( defined MODEL_THETA_C || defined MODEL_THETA_L ) theta_hydrostatic_mode = .false. ! default NH - theta_hydrostatic_mode_integer = 0 ! default NH #endif @@ -853,10 +850,7 @@ subroutine readnl(par) call MPI_bcast(case_planar_bubble,1,MPIlogical_t,par%root,par%comm,ierr) #endif -if(theta_hydrostatic_mode) theta_hydrostatic_mode_integer = 1 -if(.not. theta_hydrostatic_mode) theta_hydrostatic_mode_integer = 0 call MPI_bcast(theta_hydrostatic_mode ,1,MPIlogical_t,par%root,par%comm,ierr) - call MPI_bcast(theta_hydrostatic_mode_integer ,1,MPIinteger_t,par%root,par%comm,ierr) call MPI_bcast(transport_alg ,1,MPIinteger_t,par%root,par%comm,ierr) call MPI_bcast(semi_lagrange_cdr_alg ,1,MPIinteger_t,par%root,par%comm,ierr) call MPI_bcast(semi_lagrange_cdr_check ,1,MPIlogical_t,par%root,par%comm,ierr) diff --git a/components/homme/src/theta-l_kokkos/cxx/EquationOfState.hpp b/components/homme/src/theta-l_kokkos/cxx/EquationOfState.hpp index a50a28d58f55..dd97720f1be2 100644 --- a/components/homme/src/theta-l_kokkos/cxx/EquationOfState.hpp +++ b/components/homme/src/theta-l_kokkos/cxx/EquationOfState.hpp @@ -23,7 +23,7 @@ class EquationOfState { EquationOfState () = default; - void init (const int theta_hydrostatic_mode, + void init (const bool theta_hydrostatic_mode, const HybridVCoord& hvcoord) { m_theta_hydrostatic_mode = theta_hydrostatic_mode; m_hvcoord = hvcoord; @@ -250,7 +250,7 @@ class EquationOfState { public: - int m_theta_hydrostatic_mode; + bool m_theta_hydrostatic_mode; HybridVCoord m_hvcoord; }; diff --git a/components/homme/src/theta-l_kokkos/cxx/HyperviscosityFunctorImpl.cpp b/components/homme/src/theta-l_kokkos/cxx/HyperviscosityFunctorImpl.cpp index 55792051d33b..d160e114475b 100644 --- a/components/homme/src/theta-l_kokkos/cxx/HyperviscosityFunctorImpl.cpp +++ b/components/homme/src/theta-l_kokkos/cxx/HyperviscosityFunctorImpl.cpp @@ -120,11 +120,7 @@ void HyperviscosityFunctorImpl::init_params(const SimulationParams& params) #ifdef HOMMEXX_BFB_TESTING m_process_nh_vars = 1; #else - if (params.theta_hydrostatic_mode){ - m_process_nh_vars = 0; - }else{ - m_process_nh_vars = 1; - } + m_process_nh_vars = not params.theta_hydrostatic_mode; #endif } diff --git a/components/homme/src/theta-l_kokkos/cxx/HyperviscosityFunctorImpl.hpp b/components/homme/src/theta-l_kokkos/cxx/HyperviscosityFunctorImpl.hpp index 993d525422f5..a55ecbb365f9 100644 --- a/components/homme/src/theta-l_kokkos/cxx/HyperviscosityFunctorImpl.hpp +++ b/components/homme/src/theta-l_kokkos/cxx/HyperviscosityFunctorImpl.hpp @@ -397,7 +397,7 @@ class HyperviscosityFunctorImpl Buffers m_buffers; HybridVCoord m_hvcoord; - int m_process_nh_vars; + bool m_process_nh_vars; // Policies Kokkos::TeamPolicy m_policy_update_states; diff --git a/components/homme/src/theta-l_kokkos/prim_driver_mod.F90 b/components/homme/src/theta-l_kokkos/prim_driver_mod.F90 index 262ba19f4b7a..eae8544ca865 100644 --- a/components/homme/src/theta-l_kokkos/prim_driver_mod.F90 +++ b/components/homme/src/theta-l_kokkos/prim_driver_mod.F90 @@ -103,12 +103,12 @@ subroutine prim_create_c_data_structures (tl, hvcoord, mp) ! Fill the simulation params structures in C++ test_name = TRIM(test_case) // C_NULL_CHAR - if (disable_diagnostics) disable_diagnostics_int=1 - if (.not.disable_diagnostics) disable_diagnostics_int=0 - if (use_moisture) use_moisture_int=1 - if (.not.use_moisture) use_moisture_int=0 - if(theta_hydrostatic_mode) theta_hydrostatic_mode_int=1 - if(.not.theta_hydrostatic_mode) theta_hydrostatic_mode_int=0 + disable_diagnostics_int = 0 + if (disable_diagnostics) disable_diagnostics_int = 1 + use_moisture_int = 0 + if (use_moisture) use_moisture_int = 1 + theta_hydrostatic_mode_int = 0 + if (theta_hydrostatic_mode) theta_hydrostatic_mode_int = 1 call init_simulation_params_c (vert_remap_q_alg, limiter_option, rsplit, qsplit, tstep_type, & qsize, statefreq, nu, nu_p, nu_q, nu_s, nu_div, nu_top, & @@ -358,17 +358,16 @@ subroutine prim_init_kokkos_functors (allocate_buffer) ! ! Optional Input ! - integer, intent(in), optional :: allocate_buffer ! Whether functor memory buffer should be allocated internally - integer(kind=c_int) :: dummy + logical, intent(in), optional :: allocate_buffer ! Whether functor memory buffer should be allocated internally + integer(kind=c_int) :: ab ! Initialize the C++ functors in the C++ context ! If no argument allocate_buffer is present, ! let Homme internally allocate buffers + ab = 1 if (present(allocate_buffer)) then - call init_functors_c (allocate_buffer) - else - dummy=1; - call init_functors_c (dummy) - endif + if (.not. allocate_buffer) ab = 0 + end if + call init_functors_c (ab) ! Initialize boundary exchange structure in C++ call init_boundary_exchanges_c () diff --git a/components/homme/test_execs/thetal_kokkos_ut/forcing_ut.cpp b/components/homme/test_execs/thetal_kokkos_ut/forcing_ut.cpp index 5e4c51c7ca11..fb301166f429 100644 --- a/components/homme/test_execs/thetal_kokkos_ut/forcing_ut.cpp +++ b/components/homme/test_execs/thetal_kokkos_ut/forcing_ut.cpp @@ -160,8 +160,8 @@ TEST_CASE("forcing", "forcing") { std::cout << "Testing tracers forcing.\n"; for (const bool hydrostatic : {true,false}) { std::cout << " -> hydrostatic mode: " << (hydrostatic ? "true" : "false") << "\n"; - for (const MoistDry moisture : {MoistDry::DRY,MoistDry::MOIST}) { - std::cout << " -> moisture: " << (moisture==MoistDry::MOIST ? "moist" : "dry") << "\n"; + for (const bool use_moisture: {false,true}) { + std::cout << " -> moisture: " << (use_moisture ? "moist" : "dry") << "\n"; for (const bool adjustment : {true,false}) { std::cout << " -> adjustment: " << (adjustment ? "true" : "false") << "\n"; @@ -200,8 +200,8 @@ TEST_CASE("forcing", "forcing") { ff.init_buffers(fbm); // Run tracers forcing (cxx and f90) - ff.tracers_forcing(dt,np1,np1_qdp,adjustment,moisture); - tracers_forcing_f90(dt,np1+1,np1_qdp+1,hydrostatic,moisture==MoistDry::MOIST,adjustment); + ff.tracers_forcing(dt,np1,np1_qdp,adjustment,use_moisture); + tracers_forcing_f90(dt,np1+1,np1_qdp+1,hydrostatic,use_moisture,adjustment); // Compare answers Kokkos::deep_copy(h_dp,state.m_dp3d); diff --git a/components/homme/test_execs/thetal_kokkos_ut/gllfvremap_ut.cpp b/components/homme/test_execs/thetal_kokkos_ut/gllfvremap_ut.cpp index 0f14b0c3e55a..cf9db941ea1c 100644 --- a/components/homme/test_execs/thetal_kokkos_ut/gllfvremap_ut.cpp +++ b/components/homme/test_execs/thetal_kokkos_ut/gllfvremap_ut.cpp @@ -183,7 +183,7 @@ struct Session { p.qsize = qsize; p.hypervis_scaling = 0; p.transport_alg = 0; - p.moisture = MoistDry::MOIST; + p.use_moisture = true; p.theta_hydrostatic_mode = false; p.scale_factor = is_sphere ? PhysicalConstants::rearth0 : 1; p.laplacian_rigid_factor = is_sphere ? 1/p.scale_factor : 0; @@ -725,7 +725,7 @@ static void test_get_temperature (Session& s) { const auto& sp = c.get(); EquationOfState eos; eos.init(theta_hydrostatic_mode, s.h); ElementOps ops; ops.init(s.h); - const bool use_moisture = sp.moisture == MoistDry::MOIST; + const bool use_moisture = sp.use_moisture; const auto state = c.get(); const auto tracers = c.get(); const auto dp3d = state.m_dp3d; From f30afb2f1db294c3ddda8e9dcb7bd9ffdba1a3fb Mon Sep 17 00:00:00 2001 From: Oksana Guba Date: Tue, 10 Sep 2024 19:04:55 +0000 Subject: [PATCH 09/12] switch linker back to F --- components/homme/cmake/HommeMacros.cmake | 8 +------- components/homme/cmake/machineFiles/chrysalis-bfb.cmake | 2 -- components/homme/cmake/machineFiles/chrysalis.cmake | 2 -- 3 files changed, 1 insertion(+), 11 deletions(-) diff --git a/components/homme/cmake/HommeMacros.cmake b/components/homme/cmake/HommeMacros.cmake index 5610947cb299..6d073dbbe83b 100644 --- a/components/homme/cmake/HommeMacros.cmake +++ b/components/homme/cmake/HommeMacros.cmake @@ -112,13 +112,7 @@ macro(createTestExec execName execType macroNP macroNC ADD_DEFINITIONS(-DHAVE_CONFIG_H) ADD_EXECUTABLE(${execName} ${EXEC_SOURCES}) - - if(SUNSPOT_MACHINE) - SET_TARGET_PROPERTIES(${execName} PROPERTIES LINKER_LANGUAGE CXX) - else() - SET_TARGET_PROPERTIES(${execName} PROPERTIES LINKER_LANGUAGE Fortran) - endif() - + SET_TARGET_PROPERTIES(${execName} PROPERTIES LINKER_LANGUAGE Fortran) IF(BUILD_HOMME_WITHOUT_PIOLIBRARY) TARGET_COMPILE_DEFINITIONS(${execName} PUBLIC HOMME_WITHOUT_PIOLIBRARY) ENDIF() diff --git a/components/homme/cmake/machineFiles/chrysalis-bfb.cmake b/components/homme/cmake/machineFiles/chrysalis-bfb.cmake index fa1f1ac545c5..b9f0d41a0606 100644 --- a/components/homme/cmake/machineFiles/chrysalis-bfb.cmake +++ b/components/homme/cmake/machineFiles/chrysalis-bfb.cmake @@ -17,8 +17,6 @@ ENDIF() SET (USE_MPIEXEC "srun" CACHE STRING "") SET (USE_MPI_OPTIONS "-K --cpu_bind=cores" CACHE STRING "") -SET (CHRYSALIS_MACHINE TRUE CACHE BOOL "") - # Set kokkos arch, to get correct avx flags SET (Kokkos_ARCH_ZEN2 ON CACHE BOOL "") diff --git a/components/homme/cmake/machineFiles/chrysalis.cmake b/components/homme/cmake/machineFiles/chrysalis.cmake index 97bc682c9546..68ff76ec8082 100644 --- a/components/homme/cmake/machineFiles/chrysalis.cmake +++ b/components/homme/cmake/machineFiles/chrysalis.cmake @@ -17,8 +17,6 @@ ENDIF() SET (USE_MPIEXEC "srun" CACHE STRING "") SET (USE_MPI_OPTIONS "-K --cpu_bind=cores" CACHE STRING "") -SET (CHRYSALIS_MACHINE TRUE CACHE BOOL "") - # Set kokkos arch, to get correct avx flags SET (Kokkos_ARCH_ZEN2 ON CACHE BOOL "") From 30ffb16650cfeeb54098495f630fd9698586d16c Mon Sep 17 00:00:00 2001 From: Oksana Guba Date: Tue, 17 Sep 2024 01:02:49 +0000 Subject: [PATCH 10/12] switch to 16,8 pair --- components/homme/src/share/cxx/ExecSpaceDefs.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/components/homme/src/share/cxx/ExecSpaceDefs.cpp b/components/homme/src/share/cxx/ExecSpaceDefs.cpp index c9ca8a0ecd93..4f3d97135fea 100644 --- a/components/homme/src/share/cxx/ExecSpaceDefs.cpp +++ b/components/homme/src/share/cxx/ExecSpaceDefs.cpp @@ -176,7 +176,7 @@ team_num_threads_vectors_for_gpu ( num_vectors ); } #else - return std::make_pair(4,16); + return std::make_pair(16,8); #endif } From 112fe79e60eeaa4d7ca0d85a54f24640b6a974f6 Mon Sep 17 00:00:00 2001 From: Oksana Guba Date: Tue, 17 Sep 2024 01:03:44 +0000 Subject: [PATCH 11/12] adjust aurora flags --- components/homme/cmake/machineFiles/aurora-aot.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/components/homme/cmake/machineFiles/aurora-aot.cmake b/components/homme/cmake/machineFiles/aurora-aot.cmake index b6fe34a78d72..094ec8882784 100644 --- a/components/homme/cmake/machineFiles/aurora-aot.cmake +++ b/components/homme/cmake/machineFiles/aurora-aot.cmake @@ -43,9 +43,9 @@ SET(CMAKE_CXX_COMPILER "mpicxx" CACHE STRING "") #AOT flags SET(SYCL_COMPILE_FLAGS "-std=c++17 -fsycl -fsycl-device-code-split=per_kernel -fno-sycl-id-queries-fit-in-int -fsycl-unnamed-lambda") -SET(SYCL_LINK_FLAGS "-fsycl -fsycl-device-code-split=per_kernel -fsycl-link-huge-device-code -fsycl-targets=spir64_gen -Xsycl-target-backend \"-device 12.60.7\"") +SET(SYCL_LINK_FLAGS "-fsycl-max-parallel-link-jobs=32 -fsycl-link-huge-device-code -fsycl -fsycl-device-code-split=per_kernel -fsycl-targets=spir64_gen -Xsycl-target-backend \"-device 12.60.7\"") -SET(ADD_Fortran_FLAGS "-fc=ifx -O3 -DNDEBUG -DCPRINTEL -g" CACHE STRING "") +SET(ADD_Fortran_FLAGS "-fc=ifx -fpscomp logicals -O3 -DNDEBUG -DCPRINTEL -g" CACHE STRING "") SET(ADD_C_FLAGS "-O3 -DNDEBUG " CACHE STRING "") SET(ADD_CXX_FLAGS "-std=c++17 -O3 -DNDEBUG ${SYCL_COMPILE_FLAGS}" CACHE STRING "") From 0fcac3cc2906dee8a391f2d446bcf5286db38919 Mon Sep 17 00:00:00 2001 From: Oksana Guba Date: Tue, 17 Sep 2024 01:04:25 +0000 Subject: [PATCH 12/12] adopt ekat sycl changes, TeamVectorRange --- .../share/cxx/utilities/scream_tridiag.hpp | 21 +++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/components/homme/src/share/cxx/utilities/scream_tridiag.hpp b/components/homme/src/share/cxx/utilities/scream_tridiag.hpp index e18bbc4e7e27..26221db39552 100644 --- a/components/homme/src/share/cxx/utilities/scream_tridiag.hpp +++ b/components/homme/src/share/cxx/utilities/scream_tridiag.hpp @@ -128,6 +128,10 @@ int get_thread_id_within_team_gpu (const TeamMember& team) { // Can't use team.team_rank() here because vector direction also uses physical // threads but TeamMember types don't expose that information. return blockDim.x * threadIdx.y + threadIdx.x; +#elif defined(__SYCL_DEVICE_ONLY__) + auto item = team.item(); + return static_cast(item.get_local_range(1) * item.get_local_id(0) + + item.get_local_id(1)); #else assert(0); return -1; @@ -138,6 +142,9 @@ template KOKKOS_FORCEINLINE_FUNCTION int get_team_nthr_gpu (const TeamMember& team) { #if defined __CUDA_ARCH__ || defined __HIP_DEVICE_COMPILE__ return blockDim.x * blockDim.y; +#elif defined __SYCL_DEVICE_ONLY__ + auto item = team.item(); + return static_cast(item.get_local_range(0) * item.get_local_range(1)); #else assert(0); return -1; @@ -161,6 +168,16 @@ KOKKOS_FORCEINLINE_FUNCTION int get_team_nthr (const Kokkos::Impl::HIPTeamMember& team) { return get_team_nthr_gpu(team); } #endif // KOKKOS_ENABLE_HIP + +#ifdef KOKKOS_ENABLE_SYCL +KOKKOS_FORCEINLINE_FUNCTION +int get_thread_id_within_team (const Kokkos::Impl::SYCLTeamMember& team) +{ return get_thread_id_within_team_gpu(team); } +KOKKOS_FORCEINLINE_FUNCTION +int get_team_nthr (const Kokkos::Impl::SYCLTeamMember& team) +{ return get_team_nthr_gpu(team); } +#endif // KOKKOS_ENABLE_SYCL + template KOKKOS_INLINE_FUNCTION const T& min (const T& a, const T& b) { return a < b ? a : b; } @@ -634,7 +651,7 @@ void bfb (const TeamMember& team, const auto f = [&] (const int& j) { impl::bfb_thomas_solve(dl, d, du, Kokkos::subview(X , Kokkos::ALL(), j)); }; - Kokkos::parallel_for(Kokkos::TeamThreadRange(team, nrhs), f); + Kokkos::parallel_for(Kokkos::TeamVectorRange(team, nrhs), f); } template @@ -664,7 +681,7 @@ void bfb (const TeamMember& team, subview(du, ALL(), j), subview(X , ALL(), j)); }; - Kokkos::parallel_for(Kokkos::TeamThreadRange(team, nrhs), f); + Kokkos::parallel_for(Kokkos::TeamVectorRange(team, nrhs), f); } } // namespace tridiag