Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

homme SYCL changes #6594

Merged
merged 12 commits into from
Sep 21, 2024
Merged
8 changes: 5 additions & 3 deletions components/homme/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,9 @@ IF (HOMME_USE_KOKKOS)

STRING (TOUPPER ${HOMMEXX_EXEC_SPACE} HOMMEXX_EXEC_SPACE_UPPER)

IF (HOMMEXX_EXEC_SPACE_UPPER STREQUAL "HIP")
IF (${HOMMEXX_EXEC_SPACE_UPPER} STREQUAL "SYCL")
SET (HOMMEXX_SYCL_SPACE ON)
ELSEIF (${HOMMEXX_EXEC_SPACE_UPPER} STREQUAL "HIP")
SET (HOMMEXX_HIP_SPACE ON)
ELSEIF (HOMMEXX_EXEC_SPACE_UPPER STREQUAL "CUDA")
SET (HOMMEXX_CUDA_SPACE ON)
Expand Down Expand Up @@ -303,7 +305,7 @@ SET (HOMMEXX_ENABLE_GPU_F90 FALSE)

IF (HOMME_USE_KOKKOS)

IF (CUDA_BUILD OR HIP_BUILD)
IF (CUDA_BUILD OR HIP_BUILD OR SYCL_BUILD)
SET (DEFAULT_VECTOR_SIZE 1)
SET (HOMMEXX_ENABLE_GPU TRUE)
SET (HOMMEXX_ENABLE_GPU_F90 TRUE)
Expand All @@ -312,7 +314,7 @@ IF (HOMME_USE_KOKKOS)
ENDIF()

SET (HOMMEXX_VECTOR_SIZE ${DEFAULT_VECTOR_SIZE} CACHE STRING
"If AVX or Cuda or HIP don't take priority, use this software vector size.")
"If AVX or Cuda or HIP or SYCL don't take priority, use this software vector size.")

IF (CMAKE_BUILD_TYPE_UPPER MATCHES "DEBUG" OR CMAKE_BUILD_TYPE_UPPER MATCHES "RELWITHDEBINFO")
SET (HOMMEXX_DEBUG ON)
Expand Down
8 changes: 7 additions & 1 deletion components/homme/cmake/HommeMacros.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,13 @@ macro(createTestExec execName execType macroNP macroNC
ADD_DEFINITIONS(-DHAVE_CONFIG_H)

ADD_EXECUTABLE(${execName} ${EXEC_SOURCES})
SET_TARGET_PROPERTIES(${execName} PROPERTIES LINKER_LANGUAGE Fortran)

if(SUNSPOT_MACHINE)
SET_TARGET_PROPERTIES(${execName} PROPERTIES LINKER_LANGUAGE CXX)
else()
SET_TARGET_PROPERTIES(${execName} PROPERTIES LINKER_LANGUAGE Fortran)
endif()

IF(BUILD_HOMME_WITHOUT_PIOLIBRARY)
TARGET_COMPILE_DEFINITIONS(${execName} PUBLIC HOMME_WITHOUT_PIOLIBRARY)
ENDIF()
Expand Down
64 changes: 64 additions & 0 deletions components/homme/cmake/machineFiles/aurora-aot.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
#module restore
#module load oneapi/eng-compiler/2022.12.30.005
#module load intel_compute_runtime/release/agama-devel-627
#module load spack cmake
#module list


SET (SUNSPOT_MACHINE TRUE CACHE BOOL "")

SET(BUILD_HOMME_WITHOUT_PIOLIBRARY TRUE CACHE BOOL "")
SET(HOMMEXX_MPI_ON_DEVICE FALSE CACHE BOOL "")

SET(HOMME_FIND_BLASLAPACK TRUE CACHE BOOL "")

SET(WITH_PNETCDF FALSE CACHE FILEPATH "")

SET(USE_QUEUING FALSE CACHE BOOL "")

#temp hack
SET(HOMME_USE_KOKKOS TRUE CACHE BOOL "")

SET(BUILD_HOMME_PREQX_KOKKOS TRUE CACHE BOOL "")
SET(BUILD_HOMME_THETA_KOKKOS TRUE CACHE BOOL "")

#set(KOKKOS_HOME "/home/onguba/kokkos-build/mar05-aot/install" CACHE STRING "")
#set(E3SM_KOKKOS_PATH ${KOKKOS_HOME} CACHE STRING "")

SET(USE_TRILINOS OFF CACHE BOOL "")

SET(SYCL_BUILD TRUE CACHE BOOL "")
SET(HOMME_ENABLE_COMPOSE FALSE CACHE BOOL "")

SET(CMAKE_CXX_STANDARD 17)

SET(CMAKE_C_COMPILER "mpicc" CACHE STRING "")
SET(CMAKE_Fortran_COMPILER "mpifort" CACHE STRING "")
SET(CMAKE_CXX_COMPILER "mpicxx" CACHE STRING "")

# -fsycl-link-huge-device-code for theta to get build
#JIT flags
#SET(SYCL_COMPILE_FLAGS "-std=c++17 -fsycl -fsycl-device-code-split=per_kernel -fno-sycl-id-queries-fit-in-int -fsycl-unnamed-lambda")
#SET(SYCL_LINK_FLAGS "-fsycl -fsycl-link-huge-device-code -fsycl-device-code-split=per_kernel -fsycl-targets=spir64")

#AOT flags
SET(SYCL_COMPILE_FLAGS "-std=c++17 -fsycl -fsycl-device-code-split=per_kernel -fno-sycl-id-queries-fit-in-int -fsycl-unnamed-lambda")
SET(SYCL_LINK_FLAGS "-fsycl -fsycl-device-code-split=per_kernel -fsycl-link-huge-device-code -fsycl-targets=spir64_gen -Xsycl-target-backend \"-device 12.60.7\"")

SET(ADD_Fortran_FLAGS "-fc=ifx -O3 -DNDEBUG -DCPRINTEL -g" CACHE STRING "")
SET(ADD_C_FLAGS "-O3 -DNDEBUG " CACHE STRING "")

SET(ADD_CXX_FLAGS "-std=c++17 -O3 -DNDEBUG ${SYCL_COMPILE_FLAGS}" CACHE STRING "")
SET(ADD_LINKER_FLAGS "-O3 -DNDEBUG ${SYCL_LINK_FLAGS} -fortlib" CACHE STRING "")

set (ENABLE_OPENMP OFF CACHE BOOL "")
set (ENABLE_COLUMN_OPENMP OFF CACHE BOOL "")
set (ENABLE_HORIZ_OPENMP OFF CACHE BOOL "")

set (HOMME_TESTING_PROFILE "dev" CACHE STRING "")

set (USE_NUM_PROCS 4 CACHE STRING "")

SET (USE_MPI_OPTIONS "--bind-to core" CACHE FILEPATH "")


58 changes: 58 additions & 0 deletions components/homme/cmake/machineFiles/aurora-jit.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
#module restore
#module load oneapi/eng-compiler/2022.12.30.005
#module load intel_compute_runtime/release/agama-devel-627
#module load spack cmake
#module list



SET(BUILD_HOMME_WITHOUT_PIOLIBRARY TRUE CACHE BOOL "")
SET(HOMMEXX_MPI_ON_DEVICE FALSE CACHE BOOL "")

SET(HOMME_FIND_BLASLAPACK TRUE CACHE BOOL "")

SET(WITH_PNETCDF FALSE CACHE FILEPATH "")

SET(USE_QUEUING FALSE CACHE BOOL "")

#temp hack
SET(HOMME_USE_KOKKOS TRUE CACHE BOOL "")

SET(BUILD_HOMME_PREQX_KOKKOS TRUE CACHE BOOL "")
SET(BUILD_HOMME_THETA_KOKKOS TRUE CACHE BOOL "")

#set(KOKKOS_HOME "/home/onguba/kokkos-build/jan03-2024/install" CACHE STRING "")
#set(E3SM_KOKKOS_PATH ${KOKKOS_HOME} CACHE STRING "")

SET(USE_TRILINOS OFF CACHE BOOL "")

SET(SYCL_BUILD TRUE CACHE BOOL "")
SET(HOMME_ENABLE_COMPOSE FALSE CACHE BOOL "")

SET(CMAKE_CXX_STANDARD 17)

SET(CMAKE_C_COMPILER "mpicc" CACHE STRING "")
SET(CMAKE_Fortran_COMPILER "mpifort" CACHE STRING "")
SET(CMAKE_CXX_COMPILER "mpicxx" CACHE STRING "")

# -fsycl-link-huge-device-code for theta to get build
SET(SYCL_COMPILE_FLAGS "-std=c++17 -fsycl -fsycl-device-code-split=per_kernel -fno-sycl-id-queries-fit-in-int -fsycl-unnamed-lambda")
SET(SYCL_LINK_FLAGS "-fsycl -fsycl-link-huge-device-code -fsycl-device-code-split=per_kernel -fsycl-targets=spir64")

SET(ADD_Fortran_FLAGS "-fc=ifx -O3 -DNDEBUG -DCPRINTEL -g" CACHE STRING "")
SET(ADD_C_FLAGS "-O3 -DNDEBUG " CACHE STRING "")

SET(ADD_CXX_FLAGS "-std=c++17 -O3 -DNDEBUG ${SYCL_COMPILE_FLAGS}" CACHE STRING "")
SET(ADD_LINKER_FLAGS "-O3 -DNDEBUG ${SYCL_LINK_FLAGS} -fortlib" CACHE STRING "")

set (ENABLE_OPENMP OFF CACHE BOOL "")
set (ENABLE_COLUMN_OPENMP OFF CACHE BOOL "")
set (ENABLE_HORIZ_OPENMP OFF CACHE BOOL "")

set (HOMME_TESTING_PROFILE "dev" CACHE STRING "")

set (USE_NUM_PROCS 4 CACHE STRING "")

SET (USE_MPI_OPTIONS "--bind-to core" CACHE FILEPATH "")


2 changes: 2 additions & 0 deletions components/homme/cmake/machineFiles/chrysalis-bfb.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ ENDIF()
SET (USE_MPIEXEC "srun" CACHE STRING "")
SET (USE_MPI_OPTIONS "-K --cpu_bind=cores" CACHE STRING "")

SET (CHRYSALIS_MACHINE TRUE CACHE BOOL "")

# Set kokkos arch, to get correct avx flags
SET (Kokkos_ARCH_ZEN2 ON CACHE BOOL "")

Expand Down
2 changes: 2 additions & 0 deletions components/homme/cmake/machineFiles/chrysalis.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ ENDIF()
SET (USE_MPIEXEC "srun" CACHE STRING "")
SET (USE_MPI_OPTIONS "-K --cpu_bind=cores" CACHE STRING "")

SET (CHRYSALIS_MACHINE TRUE CACHE BOOL "")

# Set kokkos arch, to get correct avx flags
SET (Kokkos_ARCH_ZEN2 ON CACHE BOOL "")

Expand Down
74 changes: 74 additions & 0 deletions components/homme/cmake/machineFiles/polaris-a100.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
#Currently Loaded Modules:
# 1) craype-x86-rome 6) craype/2.7.15 11) cray-libpals/1.1.7 16) nvhpc-mixed/21.9
# 2) libfabric/1.11.0.4.125 7) cray-dsmml/0.2.2 12) PrgEnv-gnu/8.3.3 17) cudatoolkit-standalone/11.6.2
# 3) craype-network-ofi 8) cray-pmi/6.1.2 13) gnu-parallel/2021-09-22 18) cmake/3.23.2
# 4) perftools-base/22.05.0 9) cray-pmi-lib/6.0.17 14) gcc/11.2.0
# 5) craype-accel-nvidia80 10) cray-pals/1.1.7 15) cray-mpich/8.1.16



#SET(HOMMEXX_EXEC_SPACE CUDA CACHE STRING "")
#SET(HOMMEXX_MPI_ON_DEVICE FALSE CACHE BOOL "")
#SET(HOMMEXX_CUDA_MAX_WARP_PER_TEAM "16" CACHE STRING "")

# cray-hdf5-parallel/1.12.0.6 cray-netcdf-hdf5parallel/4.7.4.6 cray-parallel-netcdf/1.12.1.6
#SET(NETCDF_DIR $ENV{CRAY_NETCDF_HDF5PARALLEL_PREFIX} CACHE FILEPATH "")
#SET(PNETCDF_DIR $ENV{CRAY_PARALLEL_NETCDF_DIR} CACHE FILEPATH "")
#SET(HDF5_DIR $ENV{CRAY_HDF5_PARALLEL_PREFIX} CACHE FILEPATH "")

#for scorpio
#SET (NetCDF_C_PATH $ENV{CRAY_NETCDF_HDF5PARALLEL_PREFIX} CACHE FILEPATH "")
#SET (NetCDF_Fortran_PATH $ENV{CRAY_NETCDF_HDF5PARALLEL_PREFIX} CACHE FILEPATH "")

SET(BUILD_HOMME_WITHOUT_PIOLIBRARY TRUE CACHE BOOL "")

SET(HOMME_FIND_BLASLAPACK FALSE CACHE BOOL "")

SET(WITH_PNETCDF FALSE CACHE FILEPATH "")

SET(USE_QUEUING FALSE CACHE BOOL "")

SET(BUILD_HOMME_THETA_KOKKOS TRUE CACHE BOOL "")

SET(CUDA_BUILD TRUE CACHE BOOL "")

#SET(HOMMEXX_BFB_TESTING TRUE CACHE BOOL "")

SET(USE_TRILINOS OFF CACHE BOOL "")

SET(Kokkos_ENABLE_OPENMP OFF CACHE BOOL "")
SET(Kokkos_ENABLE_CUDA ON CACHE BOOL "")
SET(Kokkos_ENABLE_CUDA_LAMBDA ON CACHE BOOL "")
SET(Kokkos_ARCH_AMPERE80 ON CACHE BOOL "")
#SET(Kokkos_ARCH_ZEN2 ON CACHE BOOL "") # works, and perf same if both AMPERE80 and ZEN2 are on
#SET(Kokkos_ENABLE_CUDA_UVM ON CACHE BOOL "")
SET(Kokkos_ENABLE_EXPLICIT_INSTANTIATION OFF CACHE BOOL "")
#SET(Kokkos_ENABLE_CUDA_ARCH_LINKING OFF CACHE BOOL "")

#SET(CMAKE_C_COMPILER "mpicc" CACHE STRING "")
#SET(CMAKE_Fortran_COMPILER "mpifort" CACHE STRING "")
#SET(CMAKE_CXX_COMPILER "mpicxx" CACHE STRING "")
SET(CMAKE_C_COMPILER "cc" CACHE STRING "")
SET(CMAKE_Fortran_COMPILER "ftn" CACHE STRING "")
SET(CMAKE_CXX_COMPILER "CC" CACHE STRING "")

#SET(CMAKE_C_COMPILER "mpicc" CACHE STRING "")
#SET(CMAKE_Fortran_COMPILER "mpifort" CACHE STRING "")
#SET(CMAKE_CXX_COMPILER "${CMAKE_CURRENT_SOURCE_DIR}/../../externals/kokkos/bin/nvcc_wrapper" CACHE STRING "")

# Note: need to set MPICH_CXX env variable and perhaps NVCC_WRAPPER_DEFAULT_COMPILER

SET(CXXLIB_SUPPORTED_CACHE FALSE CACHE BOOL "")

SET(ENABLE_OPENMP OFF CACHE BOOL "")
SET(ENABLE_COLUMN_OPENMP OFF CACHE BOOL "")
SET(ENABLE_HORIZ_OPENMP OFF CACHE BOOL "")

SET(CMAKE_VERBOSE_MAKEFILE ON CACHE BOOL "")

#SET(HOMME_TESTING_PROFILE "dev" CACHE STRING "")

SET(USE_NUM_PROCS 4 CACHE STRING "")

SET(USE_MPIEXEC "srun" CACHE STRING "")
#SET(CPRNC_DIR /global/cfs/cdirs/e3sm/tools/cprnc CACHE FILEPATH "")
63 changes: 63 additions & 0 deletions components/homme/cmake/machineFiles/spot-aot-AB2.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
#module restore
#module load oneapi/eng-compiler/2022.12.30.005
#module load intel_compute_runtime/release/agama-devel-627
#module load spack cmake
#module list

SET (SUNSPOT_MACHINE TRUE CACHE BOOL "")

SET (HOMMEXX_MPI_ON_DEVICE TRUE CACHE BOOL "")

#SET(BUILD_HOMME_WITHOUT_PIOLIBRARY TRUE CACHE BOOL "")

SET(HOMME_FIND_BLASLAPACK TRUE CACHE BOOL "")

SET(WITH_PNETCDF FALSE CACHE FILEPATH "")

SET(USE_QUEUING FALSE CACHE BOOL "")

#temp hack
SET(HOMME_USE_KOKKOS TRUE CACHE BOOL "")

SET(BUILD_HOMME_PREQX_KOKKOS TRUE CACHE BOOL "")
SET(BUILD_HOMME_THETA_KOKKOS TRUE CACHE BOOL "")

#set(KOKKOS_HOME "/home/onguba/kokkos-build/june22-2024-aot/install" CACHE STRING "")
#set(E3SM_KOKKOS_PATH ${KOKKOS_HOME} CACHE STRING "")

SET (NetCDF_Fortran_PATH "/lus/gila/projects/CSC249ADSE15_CNDA/software/oneAPI.2022.12.30.003/netcdf" CACHE STRING "")
SET (NetCDF_C_PATH "/lus/gila/projects/CSC249ADSE15_CNDA/software/oneAPI.2022.12.30.003/netcdf" CACHE STRING "")

SET(USE_TRILINOS OFF CACHE BOOL "")

SET(SYCL_BUILD TRUE CACHE BOOL "")
SET(HOMME_ENABLE_COMPOSE FALSE CACHE BOOL "")

#SET(CMAKE_CXX_STANDARD 17)
SET(CMAKE_CXX_STANDARD 17 CACHE STRING "CXX Standard")

SET(CMAKE_C_COMPILER "mpicc" CACHE STRING "")
SET(CMAKE_Fortran_COMPILER "mpifort" CACHE STRING "")
SET(CMAKE_CXX_COMPILER "mpicxx" CACHE STRING "")

SET(SYCL_COMPILE_FLAGS "-std=c++17 -fsycl -fsycl-device-code-split=per_kernel -fno-sycl-id-queries-fit-in-int -fsycl-unnamed-lambda")
SET(SYCL_LINK_FLAGS "-fsycl-max-parallel-link-jobs=32 -fsycl-link-huge-device-code -fsycl -fsycl-device-code-split=per_kernel -fsycl-targets=spir64_gen -Xsycl-target-backend \"-device 12.60.7\"")

#-fpscomp does not actually solve the issue with bools in here,another suggestion was -fp-model=precise, not working either
SET(ADD_Fortran_FLAGS " -fc=ifx -fpscomp logicals -O3 -DNDEBUG -DCPRINTEL -g" CACHE STRING "")
SET(ADD_C_FLAGS "-O3 -DNDEBUG " CACHE STRING "")

SET(ADD_CXX_FLAGS " -std=c++17 -O3 -DNDEBUG ${SYCL_COMPILE_FLAGS}" CACHE STRING "")
SET(ADD_LINKER_FLAGS "-O3 -DNDEBUG ${SYCL_LINK_FLAGS} -fortlib" CACHE STRING "")

set (ENABLE_OPENMP OFF CACHE BOOL "")
set (ENABLE_COLUMN_OPENMP OFF CACHE BOOL "")
set (ENABLE_HORIZ_OPENMP OFF CACHE BOOL "")

set (HOMME_TESTING_PROFILE "dev" CACHE STRING "")

set (USE_NUM_PROCS 4 CACHE STRING "")

SET (USE_MPI_OPTIONS "--bind-to core" CACHE FILEPATH "")


7 changes: 6 additions & 1 deletion components/homme/src/prim_main.F90
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ program prim_main
use element_mod, only: element_t
use common_io_mod, only: output_dir, infilenames
use common_movie_mod, only: nextoutputstep
use perf_mod, only: t_initf, t_prf, t_finalizef, t_startf, t_stopf ! _EXTERNAL
use perf_mod, only: t_initf, t_prf, t_finalizef, t_startf, t_stopf, t_disablef, t_enablef ! _EXTERNAL
use restart_io_mod , only: restartheader_t, writerestart
use hybrid_mod, only: hybrid_create
#if (defined MODEL_THETA_L && defined ARKODE)
Expand Down Expand Up @@ -240,6 +240,11 @@ end subroutine finalize_kokkos_f90

nstep = nextoutputstep(tl)
do while(tl%nstep<nstep)

if(tl%nstep < 2) then
call t_disablef()
endif
if(tl%nstep >= 2) call t_enablef()
call t_startf('prim_run')
call prim_run_subcycle(elem, hybrid,nets,nete, tstep, .false., tl, hvcoord,1)
call t_stopf('prim_run')
Expand Down
1 change: 1 addition & 0 deletions components/homme/src/share/control_mod.F90
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ module control_mod
! flag used by preqx, theta-l and theta-c models
! should be renamed to "hydrostatic_mode"
logical, public :: theta_hydrostatic_mode
integer, public :: theta_hydrostatic_mode_integer


integer, public :: tstep_type= 5 ! preqx timestepping options
Expand Down
2 changes: 1 addition & 1 deletion components/homme/src/share/cxx/Config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
# endif
#endif

#if ! defined HOMMEXX_CUDA_SPACE && ! defined HOMMEXX_OPENMP_SPACE && ! defined HOMMEXX_THREADS_SPACE && ! defined HOMMEXX_SERIAL_SPACE && ! defined HOMMEXX_HIP_SPACE
#if ! defined HOMMEXX_CUDA_SPACE && ! defined HOMMEXX_OPENMP_SPACE && ! defined HOMMEXX_THREADS_SPACE && ! defined HOMMEXX_SERIAL_SPACE && ! defined HOMMEXX_HIP_SPACE && ! defined HOMMEXX_SYCL_SPACE
# define HOMMEXX_DEFAULT_SPACE
#endif

Expand Down
5 changes: 5 additions & 0 deletions components/homme/src/share/cxx/EulerStepFunctorImpl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -652,7 +652,10 @@ class EulerStepFunctorImpl {
minmax_and_biharmonic();
}
}

GPTLstart("tl-at adv-n-limit");
advect_and_limit();
GPTLstop("tl-at adv-n-limit");
exchange_qdp_dss_var();
}

Expand All @@ -667,13 +670,15 @@ class EulerStepFunctorImpl {
void run_tracer_phase (const KernelVariables& kv) const {
compute_qtens(kv);
kv.team_barrier();

if (m_data.limiter_option == 8) {
limiter_optim_iter_full(kv);
kv.team_barrier();
} else if (m_data.limiter_option == 9) {
limiter_clip_and_sum(kv);
kv.team_barrier();
}

apply_spheremp(kv);
}

Expand Down
Loading