Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

homme SYCL changes #6594

Merged
merged 12 commits into from
Sep 21, 2024
Merged
8 changes: 5 additions & 3 deletions components/homme/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,9 @@ IF (HOMME_USE_KOKKOS)

STRING (TOUPPER ${HOMMEXX_EXEC_SPACE} HOMMEXX_EXEC_SPACE_UPPER)

IF (HOMMEXX_EXEC_SPACE_UPPER STREQUAL "HIP")
IF (${HOMMEXX_EXEC_SPACE_UPPER} STREQUAL "SYCL")
SET (HOMMEXX_SYCL_SPACE ON)
ELSEIF (${HOMMEXX_EXEC_SPACE_UPPER} STREQUAL "HIP")
SET (HOMMEXX_HIP_SPACE ON)
ELSEIF (HOMMEXX_EXEC_SPACE_UPPER STREQUAL "CUDA")
SET (HOMMEXX_CUDA_SPACE ON)
Expand Down Expand Up @@ -303,7 +305,7 @@ SET (HOMMEXX_ENABLE_GPU_F90 FALSE)

IF (HOMME_USE_KOKKOS)

IF (CUDA_BUILD OR HIP_BUILD)
IF (CUDA_BUILD OR HIP_BUILD OR SYCL_BUILD)
SET (DEFAULT_VECTOR_SIZE 1)
SET (HOMMEXX_ENABLE_GPU TRUE)
SET (HOMMEXX_ENABLE_GPU_F90 TRUE)
Expand All @@ -312,7 +314,7 @@ IF (HOMME_USE_KOKKOS)
ENDIF()

SET (HOMMEXX_VECTOR_SIZE ${DEFAULT_VECTOR_SIZE} CACHE STRING
"If AVX or Cuda or HIP don't take priority, use this software vector size.")
"If AVX or Cuda or HIP or SYCL don't take priority, use this software vector size.")

IF (CMAKE_BUILD_TYPE_UPPER MATCHES "DEBUG" OR CMAKE_BUILD_TYPE_UPPER MATCHES "RELWITHDEBINFO")
SET (HOMMEXX_DEBUG ON)
Expand Down
64 changes: 64 additions & 0 deletions components/homme/cmake/machineFiles/aurora-aot.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
#module restore
#module load oneapi/eng-compiler/2022.12.30.005
#module load intel_compute_runtime/release/agama-devel-627
#module load spack cmake
#module list


SET (SUNSPOT_MACHINE TRUE CACHE BOOL "")

SET(BUILD_HOMME_WITHOUT_PIOLIBRARY TRUE CACHE BOOL "")
SET(HOMMEXX_MPI_ON_DEVICE FALSE CACHE BOOL "")

SET(HOMME_FIND_BLASLAPACK TRUE CACHE BOOL "")

SET(WITH_PNETCDF FALSE CACHE FILEPATH "")

SET(USE_QUEUING FALSE CACHE BOOL "")

#temp hack
SET(HOMME_USE_KOKKOS TRUE CACHE BOOL "")

SET(BUILD_HOMME_PREQX_KOKKOS TRUE CACHE BOOL "")
SET(BUILD_HOMME_THETA_KOKKOS TRUE CACHE BOOL "")

#set(KOKKOS_HOME "/home/onguba/kokkos-build/mar05-aot/install" CACHE STRING "")
#set(E3SM_KOKKOS_PATH ${KOKKOS_HOME} CACHE STRING "")

SET(USE_TRILINOS OFF CACHE BOOL "")

SET(SYCL_BUILD TRUE CACHE BOOL "")
SET(HOMME_ENABLE_COMPOSE FALSE CACHE BOOL "")

SET(CMAKE_CXX_STANDARD 17)

SET(CMAKE_C_COMPILER "mpicc" CACHE STRING "")
SET(CMAKE_Fortran_COMPILER "mpifort" CACHE STRING "")
SET(CMAKE_CXX_COMPILER "mpicxx" CACHE STRING "")

# -fsycl-link-huge-device-code for theta to get build
#JIT flags
#SET(SYCL_COMPILE_FLAGS "-std=c++17 -fsycl -fsycl-device-code-split=per_kernel -fno-sycl-id-queries-fit-in-int -fsycl-unnamed-lambda")
#SET(SYCL_LINK_FLAGS "-fsycl -fsycl-link-huge-device-code -fsycl-device-code-split=per_kernel -fsycl-targets=spir64")

#AOT flags
SET(SYCL_COMPILE_FLAGS "-std=c++17 -fsycl -fsycl-device-code-split=per_kernel -fno-sycl-id-queries-fit-in-int -fsycl-unnamed-lambda")
SET(SYCL_LINK_FLAGS "-fsycl-max-parallel-link-jobs=32 -fsycl-link-huge-device-code -fsycl -fsycl-device-code-split=per_kernel -fsycl-targets=spir64_gen -Xsycl-target-backend \"-device 12.60.7\"")

SET(ADD_Fortran_FLAGS "-fc=ifx -fpscomp logicals -O3 -DNDEBUG -DCPRINTEL -g" CACHE STRING "")
SET(ADD_C_FLAGS "-O3 -DNDEBUG " CACHE STRING "")

SET(ADD_CXX_FLAGS "-std=c++17 -O3 -DNDEBUG ${SYCL_COMPILE_FLAGS}" CACHE STRING "")
SET(ADD_LINKER_FLAGS "-O3 -DNDEBUG ${SYCL_LINK_FLAGS} -fortlib" CACHE STRING "")

set (ENABLE_OPENMP OFF CACHE BOOL "")
set (ENABLE_COLUMN_OPENMP OFF CACHE BOOL "")
set (ENABLE_HORIZ_OPENMP OFF CACHE BOOL "")

set (HOMME_TESTING_PROFILE "dev" CACHE STRING "")

set (USE_NUM_PROCS 4 CACHE STRING "")

SET (USE_MPI_OPTIONS "--bind-to core" CACHE FILEPATH "")


58 changes: 58 additions & 0 deletions components/homme/cmake/machineFiles/aurora-jit.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
#module restore
#module load oneapi/eng-compiler/2022.12.30.005
#module load intel_compute_runtime/release/agama-devel-627
#module load spack cmake
#module list



SET(BUILD_HOMME_WITHOUT_PIOLIBRARY TRUE CACHE BOOL "")
SET(HOMMEXX_MPI_ON_DEVICE FALSE CACHE BOOL "")

SET(HOMME_FIND_BLASLAPACK TRUE CACHE BOOL "")

SET(WITH_PNETCDF FALSE CACHE FILEPATH "")

SET(USE_QUEUING FALSE CACHE BOOL "")

#temp hack
SET(HOMME_USE_KOKKOS TRUE CACHE BOOL "")

SET(BUILD_HOMME_PREQX_KOKKOS TRUE CACHE BOOL "")
SET(BUILD_HOMME_THETA_KOKKOS TRUE CACHE BOOL "")

#set(KOKKOS_HOME "/home/onguba/kokkos-build/jan03-2024/install" CACHE STRING "")
#set(E3SM_KOKKOS_PATH ${KOKKOS_HOME} CACHE STRING "")

SET(USE_TRILINOS OFF CACHE BOOL "")

SET(SYCL_BUILD TRUE CACHE BOOL "")
SET(HOMME_ENABLE_COMPOSE FALSE CACHE BOOL "")

SET(CMAKE_CXX_STANDARD 17)

SET(CMAKE_C_COMPILER "mpicc" CACHE STRING "")
SET(CMAKE_Fortran_COMPILER "mpifort" CACHE STRING "")
SET(CMAKE_CXX_COMPILER "mpicxx" CACHE STRING "")

# -fsycl-link-huge-device-code for theta to get build
SET(SYCL_COMPILE_FLAGS "-std=c++17 -fsycl -fsycl-device-code-split=per_kernel -fno-sycl-id-queries-fit-in-int -fsycl-unnamed-lambda")
SET(SYCL_LINK_FLAGS "-fsycl -fsycl-link-huge-device-code -fsycl-device-code-split=per_kernel -fsycl-targets=spir64")

SET(ADD_Fortran_FLAGS "-fc=ifx -O3 -DNDEBUG -DCPRINTEL -g" CACHE STRING "")
SET(ADD_C_FLAGS "-O3 -DNDEBUG " CACHE STRING "")

SET(ADD_CXX_FLAGS "-std=c++17 -O3 -DNDEBUG ${SYCL_COMPILE_FLAGS}" CACHE STRING "")
SET(ADD_LINKER_FLAGS "-O3 -DNDEBUG ${SYCL_LINK_FLAGS} -fortlib" CACHE STRING "")

set (ENABLE_OPENMP OFF CACHE BOOL "")
set (ENABLE_COLUMN_OPENMP OFF CACHE BOOL "")
set (ENABLE_HORIZ_OPENMP OFF CACHE BOOL "")

set (HOMME_TESTING_PROFILE "dev" CACHE STRING "")

set (USE_NUM_PROCS 4 CACHE STRING "")

SET (USE_MPI_OPTIONS "--bind-to core" CACHE FILEPATH "")


74 changes: 74 additions & 0 deletions components/homme/cmake/machineFiles/polaris-a100.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
#Currently Loaded Modules:
# 1) craype-x86-rome 6) craype/2.7.15 11) cray-libpals/1.1.7 16) nvhpc-mixed/21.9
# 2) libfabric/1.11.0.4.125 7) cray-dsmml/0.2.2 12) PrgEnv-gnu/8.3.3 17) cudatoolkit-standalone/11.6.2
# 3) craype-network-ofi 8) cray-pmi/6.1.2 13) gnu-parallel/2021-09-22 18) cmake/3.23.2
# 4) perftools-base/22.05.0 9) cray-pmi-lib/6.0.17 14) gcc/11.2.0
# 5) craype-accel-nvidia80 10) cray-pals/1.1.7 15) cray-mpich/8.1.16



#SET(HOMMEXX_EXEC_SPACE CUDA CACHE STRING "")
#SET(HOMMEXX_MPI_ON_DEVICE FALSE CACHE BOOL "")
#SET(HOMMEXX_CUDA_MAX_WARP_PER_TEAM "16" CACHE STRING "")

# cray-hdf5-parallel/1.12.0.6 cray-netcdf-hdf5parallel/4.7.4.6 cray-parallel-netcdf/1.12.1.6
#SET(NETCDF_DIR $ENV{CRAY_NETCDF_HDF5PARALLEL_PREFIX} CACHE FILEPATH "")
#SET(PNETCDF_DIR $ENV{CRAY_PARALLEL_NETCDF_DIR} CACHE FILEPATH "")
#SET(HDF5_DIR $ENV{CRAY_HDF5_PARALLEL_PREFIX} CACHE FILEPATH "")

#for scorpio
#SET (NetCDF_C_PATH $ENV{CRAY_NETCDF_HDF5PARALLEL_PREFIX} CACHE FILEPATH "")
#SET (NetCDF_Fortran_PATH $ENV{CRAY_NETCDF_HDF5PARALLEL_PREFIX} CACHE FILEPATH "")

SET(BUILD_HOMME_WITHOUT_PIOLIBRARY TRUE CACHE BOOL "")

SET(HOMME_FIND_BLASLAPACK FALSE CACHE BOOL "")

SET(WITH_PNETCDF FALSE CACHE FILEPATH "")

SET(USE_QUEUING FALSE CACHE BOOL "")

SET(BUILD_HOMME_THETA_KOKKOS TRUE CACHE BOOL "")

SET(CUDA_BUILD TRUE CACHE BOOL "")

#SET(HOMMEXX_BFB_TESTING TRUE CACHE BOOL "")

SET(USE_TRILINOS OFF CACHE BOOL "")

SET(Kokkos_ENABLE_OPENMP OFF CACHE BOOL "")
SET(Kokkos_ENABLE_CUDA ON CACHE BOOL "")
SET(Kokkos_ENABLE_CUDA_LAMBDA ON CACHE BOOL "")
SET(Kokkos_ARCH_AMPERE80 ON CACHE BOOL "")
#SET(Kokkos_ARCH_ZEN2 ON CACHE BOOL "") # works, and perf same if both AMPERE80 and ZEN2 are on
#SET(Kokkos_ENABLE_CUDA_UVM ON CACHE BOOL "")
SET(Kokkos_ENABLE_EXPLICIT_INSTANTIATION OFF CACHE BOOL "")
#SET(Kokkos_ENABLE_CUDA_ARCH_LINKING OFF CACHE BOOL "")

#SET(CMAKE_C_COMPILER "mpicc" CACHE STRING "")
#SET(CMAKE_Fortran_COMPILER "mpifort" CACHE STRING "")
#SET(CMAKE_CXX_COMPILER "mpicxx" CACHE STRING "")
SET(CMAKE_C_COMPILER "cc" CACHE STRING "")
SET(CMAKE_Fortran_COMPILER "ftn" CACHE STRING "")
SET(CMAKE_CXX_COMPILER "CC" CACHE STRING "")

#SET(CMAKE_C_COMPILER "mpicc" CACHE STRING "")
#SET(CMAKE_Fortran_COMPILER "mpifort" CACHE STRING "")
#SET(CMAKE_CXX_COMPILER "${CMAKE_CURRENT_SOURCE_DIR}/../../externals/kokkos/bin/nvcc_wrapper" CACHE STRING "")

# Note: need to set MPICH_CXX env variable and perhaps NVCC_WRAPPER_DEFAULT_COMPILER

SET(CXXLIB_SUPPORTED_CACHE FALSE CACHE BOOL "")

SET(ENABLE_OPENMP OFF CACHE BOOL "")
SET(ENABLE_COLUMN_OPENMP OFF CACHE BOOL "")
SET(ENABLE_HORIZ_OPENMP OFF CACHE BOOL "")

SET(CMAKE_VERBOSE_MAKEFILE ON CACHE BOOL "")

#SET(HOMME_TESTING_PROFILE "dev" CACHE STRING "")

SET(USE_NUM_PROCS 4 CACHE STRING "")

SET(USE_MPIEXEC "srun" CACHE STRING "")
#SET(CPRNC_DIR /global/cfs/cdirs/e3sm/tools/cprnc CACHE FILEPATH "")
63 changes: 63 additions & 0 deletions components/homme/cmake/machineFiles/spot-aot-AB2.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
#module restore
#module load oneapi/eng-compiler/2022.12.30.005
#module load intel_compute_runtime/release/agama-devel-627
#module load spack cmake
#module list

SET (SUNSPOT_MACHINE TRUE CACHE BOOL "")

SET (HOMMEXX_MPI_ON_DEVICE TRUE CACHE BOOL "")

#SET(BUILD_HOMME_WITHOUT_PIOLIBRARY TRUE CACHE BOOL "")

SET(HOMME_FIND_BLASLAPACK TRUE CACHE BOOL "")

SET(WITH_PNETCDF FALSE CACHE FILEPATH "")

SET(USE_QUEUING FALSE CACHE BOOL "")

#temp hack
SET(HOMME_USE_KOKKOS TRUE CACHE BOOL "")

SET(BUILD_HOMME_PREQX_KOKKOS TRUE CACHE BOOL "")
SET(BUILD_HOMME_THETA_KOKKOS TRUE CACHE BOOL "")

#set(KOKKOS_HOME "/home/onguba/kokkos-build/june22-2024-aot/install" CACHE STRING "")
#set(E3SM_KOKKOS_PATH ${KOKKOS_HOME} CACHE STRING "")

SET (NetCDF_Fortran_PATH "/lus/gila/projects/CSC249ADSE15_CNDA/software/oneAPI.2022.12.30.003/netcdf" CACHE STRING "")
SET (NetCDF_C_PATH "/lus/gila/projects/CSC249ADSE15_CNDA/software/oneAPI.2022.12.30.003/netcdf" CACHE STRING "")

SET(USE_TRILINOS OFF CACHE BOOL "")

SET(SYCL_BUILD TRUE CACHE BOOL "")
SET(HOMME_ENABLE_COMPOSE FALSE CACHE BOOL "")

#SET(CMAKE_CXX_STANDARD 17)
SET(CMAKE_CXX_STANDARD 17 CACHE STRING "CXX Standard")

SET(CMAKE_C_COMPILER "mpicc" CACHE STRING "")
SET(CMAKE_Fortran_COMPILER "mpifort" CACHE STRING "")
SET(CMAKE_CXX_COMPILER "mpicxx" CACHE STRING "")

SET(SYCL_COMPILE_FLAGS "-std=c++17 -fsycl -fsycl-device-code-split=per_kernel -fno-sycl-id-queries-fit-in-int -fsycl-unnamed-lambda")
SET(SYCL_LINK_FLAGS "-fsycl-max-parallel-link-jobs=32 -fsycl-link-huge-device-code -fsycl -fsycl-device-code-split=per_kernel -fsycl-targets=spir64_gen -Xsycl-target-backend \"-device 12.60.7\"")

#-fpscomp does not actually solve the issue with bools in here,another suggestion was -fp-model=precise, not working either
SET(ADD_Fortran_FLAGS " -fc=ifx -fpscomp logicals -O3 -DNDEBUG -DCPRINTEL -g" CACHE STRING "")
SET(ADD_C_FLAGS "-O3 -DNDEBUG " CACHE STRING "")

SET(ADD_CXX_FLAGS " -std=c++17 -O3 -DNDEBUG ${SYCL_COMPILE_FLAGS}" CACHE STRING "")
SET(ADD_LINKER_FLAGS "-O3 -DNDEBUG ${SYCL_LINK_FLAGS} -fortlib" CACHE STRING "")

set (ENABLE_OPENMP OFF CACHE BOOL "")
set (ENABLE_COLUMN_OPENMP OFF CACHE BOOL "")
set (ENABLE_HORIZ_OPENMP OFF CACHE BOOL "")

set (HOMME_TESTING_PROFILE "dev" CACHE STRING "")

set (USE_NUM_PROCS 4 CACHE STRING "")

SET (USE_MPI_OPTIONS "--bind-to core" CACHE FILEPATH "")


6 changes: 3 additions & 3 deletions components/homme/src/preqx_kokkos/cxx/CamForcing.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ void state_forcing(
void tracer_forcing(
const ExecViewUnmanaged<const Scalar ** [NP][NP][NUM_LEV]> &f_q,
const HybridVCoord &hvcoord, const TimeLevel &tl, const int &num_q,
const MoistDry &moisture, const double &dt,
const bool &use_moisture, const double &dt,
const ExecViewManaged<Real * [NUM_TIME_LEVELS][NP][NP]> &ps_v,
const ExecViewManaged<
Scalar * [Q_NUM_TIME_LEVELS][QSIZE_D][NP][NP][NUM_LEV]> &qdp,
Expand All @@ -61,7 +61,7 @@ void tracer_forcing(
const int np1 = tl.n0;
const int np1_qdp = tl.n0_qdp;

if (moisture == MoistDry::MOIST) {
if (use_moisture) {
// Remove the m_fq_ps_v buffer since it's not actually needed.
// Instead apply the forcing to m_ps_v directly
// Bonus - one less parallel reduce in dry cases!
Expand Down Expand Up @@ -161,7 +161,7 @@ void apply_cam_forcing(const Real &dt) {
tracers.fq = decltype(tracers.fq)("fq", elems.num_elems(),tracers.num_tracers());
}
tracer_forcing(tracers.fq, hvcoord, tl, tracers.num_tracers(),
sim_params.moisture, dt, elems.m_state.m_ps_v, tracers.qdp, tracers.Q);
sim_params.use_moisture, dt, elems.m_state.m_ps_v, tracers.qdp, tracers.Q);
GPTLstop("ApplyCAMForcing");
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ void init_simulation_params_c (const int& remap_alg, const int& limiter_option,
const int& time_step_type, const int& qsize, const int& state_frequency,
const Real& nu, const Real& nu_p, const Real& nu_q, const Real& nu_s, const Real& nu_div, const Real& nu_top,
const int& hypervis_order, const int& hypervis_subcycle, const double& hypervis_scaling,
const int& ftype, const bool& prescribed_wind, const bool& moisture, const bool& disable_diagnostics,
const int& ftype, const bool& prescribed_wind, const bool& use_moisture, const bool& disable_diagnostics,
const bool& use_cpstar, const int& transport_alg,
const int& dt_remap_factor, const int& dt_tracer_factor,
const double& scale_factor, const double& laplacian_rigid_factor)
Expand Down Expand Up @@ -90,7 +90,7 @@ void init_simulation_params_c (const int& remap_alg, const int& limiter_option,
params.hypervis_subcycle = hypervis_subcycle;
params.hypervis_scaling = hypervis_scaling;
params.disable_diagnostics = disable_diagnostics;
params.moisture = (moisture ? MoistDry::MOIST : MoistDry::DRY);
params.use_moisture = use_moisture;
params.use_cpstar = use_cpstar;
params.transport_alg = transport_alg;
// SphereOperators parameters; preqx supports only the sphere.
Expand Down
2 changes: 1 addition & 1 deletion components/homme/src/preqx_kokkos/cxx/prim_advance_exp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ void prim_advance_exp (TimeLevel& tl, const Real dt, const bool compute_diagnost

// Determine the tracers time level
tl.n0_qdp= -1;
if (params.moisture == MoistDry::MOIST) {
if (params.use_moisture) {
tl.update_tracers_levels(params.qsplit);
}

Expand Down
7 changes: 6 additions & 1 deletion components/homme/src/prim_main.F90
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ program prim_main
use element_mod, only: element_t
use common_io_mod, only: output_dir, infilenames
use common_movie_mod, only: nextoutputstep
use perf_mod, only: t_initf, t_prf, t_finalizef, t_startf, t_stopf ! _EXTERNAL
use perf_mod, only: t_initf, t_prf, t_finalizef, t_startf, t_stopf, t_disablef, t_enablef ! _EXTERNAL
use restart_io_mod , only: restartheader_t, writerestart
use hybrid_mod, only: hybrid_create
#if (defined MODEL_THETA_L && defined ARKODE)
Expand Down Expand Up @@ -240,6 +240,11 @@ end subroutine finalize_kokkos_f90

nstep = nextoutputstep(tl)
do while(tl%nstep<nstep)

if(tl%nstep < 2) then
call t_disablef()
endif
if(tl%nstep >= 2) call t_enablef()
call t_startf('prim_run')
call prim_run_subcycle(elem, hybrid,nets,nete, tstep, .false., tl, hvcoord,1)
call t_stopf('prim_run')
Expand Down
2 changes: 1 addition & 1 deletion components/homme/src/share/cxx/Config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
# endif
#endif

#if ! defined HOMMEXX_CUDA_SPACE && ! defined HOMMEXX_OPENMP_SPACE && ! defined HOMMEXX_THREADS_SPACE && ! defined HOMMEXX_SERIAL_SPACE && ! defined HOMMEXX_HIP_SPACE
#if ! defined HOMMEXX_CUDA_SPACE && ! defined HOMMEXX_OPENMP_SPACE && ! defined HOMMEXX_THREADS_SPACE && ! defined HOMMEXX_SERIAL_SPACE && ! defined HOMMEXX_HIP_SPACE && ! defined HOMMEXX_SYCL_SPACE
# define HOMMEXX_DEFAULT_SPACE
#endif

Expand Down
Loading