From 37247402e9a97086fafe15182c4ca7d85c3b3843 Mon Sep 17 00:00:00 2001 From: "Kurt A. O'Hearn" <1138669+ohearnk@users.noreply.github.com> Date: Sat, 21 Dec 2024 18:56:33 -0500 Subject: [PATCH] Disable diagonalization on the GPU with rocSOLVER for older ROCm version (< v5.3.0) due to poor performance and use CPU diagonalization routines instead. --- src/CMakeLists.txt | 28 ++++++++++++++++++---------- src/gpu/hip/CMakeLists.txt | 4 ++-- src/modules/quick_overlap_module.f90 | 7 +++++++ src/modules/quick_scf_module.f90 | 7 +++++++ src/modules/quick_uscf_module.f90 | 14 ++++++++++++++ 5 files changed, 48 insertions(+), 12 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 0f81fe6a..4eb8179f 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -175,13 +175,16 @@ if(CUDA) if(HIP) if(MAGMA) copy_target(libquick libquick_${QUICK_GPU_TARGET_NAME} SWAP_SOURCES TO $ ${QUICK_GPU_FORTRAN_SOURCE}) - target_compile_definitions(libquick_${QUICK_GPU_TARGET_NAME} PRIVATE GPU ${QUICK_GPU_PLATFORM}) target_compile_definitions(libquick_${QUICK_GPU_TARGET_NAME} PRIVATE WITH_MAGMA) - else() + elseif(${HIP_VERSION} VERSION_GREATER_EQUAL 5.3.0) copy_target(libquick libquick_${QUICK_GPU_TARGET_NAME} SWAP_SOURCES TO $ $ ${QUICK_GPU_FORTRAN_SOURCE}) - target_compile_definitions(libquick_${QUICK_GPU_TARGET_NAME} PRIVATE GPU ${QUICK_GPU_PLATFORM}) target_compile_definitions(libquick_${QUICK_GPU_TARGET_NAME} PRIVATE WITH_ROCSOLVER) + # avoid diagonalization on GPU with rocSOLVER on older ROCm versions due to poor performance, + # and instead fall back to CPU diagonalization + else() + copy_target(libquick libquick_${QUICK_GPU_TARGET_NAME} SWAP_SOURCES TO $ ${QUICK_GPU_FORTRAN_SOURCE}) endif() + target_compile_definitions(libquick_${QUICK_GPU_TARGET_NAME} PRIVATE GPU ${QUICK_GPU_PLATFORM}) target_link_options(libquick_${QUICK_GPU_TARGET_NAME} PRIVATE ${GPU_LD_FLAGS}) set_target_properties(libquick_${QUICK_GPU_TARGET_NAME} PROPERTIES OUTPUT_NAME quick_${QUICK_GPU_TARGET_NAME}) @@ -193,9 +196,10 @@ if(CUDA) if(MAGMA) target_link_libraries(libquick_${QUICK_GPU_TARGET_NAME} PRIVATE magma) - else() + elseif(${HIP_VERSION} VERSION_GREATER_EQUAL 5.3.0) target_link_libraries(libquick_${QUICK_GPU_TARGET_NAME} PRIVATE rocsolver) - #target_link_libraries(libquick_${QUICK_GPU_TARGET_NAME} PRIVATE lapack) + else() + target_link_libraries(libquick_${QUICK_GPU_TARGET_NAME} PRIVATE lapack) endif() if(INSIDE_AMBER) @@ -230,13 +234,16 @@ if(MPI AND CUDA) if(HIP) if(MAGMA) copy_target(libquick_mpi libquick_mpi_${QUICK_GPU_TARGET_NAME} SWAP_SOURCES TO $ ${QUICK_GPU_FORTRAN_SOURCE}) - target_compile_definitions(libquick_mpi_${QUICK_GPU_TARGET_NAME} PRIVATE MPIV_GPU ${QUICK_GPU_PLATFORM}_MPIV) target_compile_definitions(libquick_mpi_${QUICK_GPU_TARGET_NAME} PRIVATE WITH_MAGMA) - else() + elseif(${HIP_VERSION} VERSION_GREATER_EQUAL 5.3.0) copy_target(libquick_mpi libquick_mpi_${QUICK_GPU_TARGET_NAME} SWAP_SOURCES TO $ $ ${QUICK_GPU_FORTRAN_SOURCE}) - target_compile_definitions(libquick_mpi_${QUICK_GPU_TARGET_NAME} PRIVATE MPIV_GPU ${QUICK_GPU_PLATFORM}_MPIV) target_compile_definitions(libquick_mpi_${QUICK_GPU_TARGET_NAME} PRIVATE WITH_ROCSOLVER) + # avoid diagonalization on GPU with rocSOLVER on older ROCm versions due to poor performance, + # and instead fall back to CPU diagonalization + else() + copy_target(libquick_mpi libquick_mpi_${QUICK_GPU_TARGET_NAME} SWAP_SOURCES TO $ ${QUICK_GPU_FORTRAN_SOURCE}) endif() + target_compile_definitions(libquick_mpi_${QUICK_GPU_TARGET_NAME} PRIVATE MPIV_GPU ${QUICK_GPU_PLATFORM}_MPIV) target_link_options(libquick_mpi_${QUICK_GPU_TARGET_NAME} PRIVATE ${GPU_LD_FLAGS}) set_target_properties(libquick_mpi_${QUICK_GPU_TARGET_NAME} PROPERTIES OUTPUT_NAME quick_mpi_${QUICK_GPU_TARGET_NAME}) @@ -245,9 +252,10 @@ if(MPI AND CUDA) target_link_libraries(libquick_mpi_${QUICK_GPU_TARGET_NAME} PRIVATE xc_gpu xc_${QUICK_GPU_TARGET_NAME} octree_gpu quick_${QUICK_GPU_TARGET_NAME}_kernels_mpi rocblas) if(MAGMA) target_link_libraries(libquick_mpi_${QUICK_GPU_TARGET_NAME} PRIVATE magma) - else() + elseif(${HIP_VERSION} VERSION_GREATER_EQUAL 5.3.0) target_link_libraries(libquick_mpi_${QUICK_GPU_TARGET_NAME} PRIVATE rocsolver) - #target_link_libraries(libquick_mpi_${QUICK_GPU_TARGET_NAME} PRIVATE lapack) + else() + target_link_libraries(libquick_mpi_${QUICK_GPU_TARGET_NAME} PRIVATE lapack) endif() if(INSIDE_AMBER) target_link_libraries(libquick_mpi_${QUICK_GPU_TARGET_NAME} PRIVATE libcew_mpi) diff --git a/src/gpu/hip/CMakeLists.txt b/src/gpu/hip/CMakeLists.txt index 6a445b95..567fc3e7 100644 --- a/src/gpu/hip/CMakeLists.txt +++ b/src/gpu/hip/CMakeLists.txt @@ -37,7 +37,7 @@ set_property(TARGET rocblas_fortran_obj PROPERTY POSITION_INDEPENDENT_CODE TRUE) target_include_directories(rocblas_fortran_obj PUBLIC ${CUDA_INCLUDE_DIRS}) config_module_dirs(rocblas_fortran_obj quick/${QUICK_GPU_TARGET_NAME}) -if(NOT MAGMA) +if(NOT MAGMA AND ${HIP_VERSION} VERSION_GREATER_EQUAL 5.3.0) # rocsolver #------------------------------------------------------------------------------------------ set(ROCSOLVER_SOURCE rocsolver/quick_rocsolver_module.f90 rocsolver/rocsolver_extra_module.f90) @@ -57,7 +57,7 @@ if(MPI) config_module_dirs(rocblas_fortran_obj_mpi quick/mpi_${QUICK_GPU_TARGET_NAME}) set_property(TARGET rocblas_fortran_obj_mpi PROPERTY COMPILE_OPTIONS ${OPT_FFLAGS}) - if(NOT MAGMA) + if(NOT MAGMA AND ${HIP_VERSION} VERSION_GREATER_EQUAL 5.3.0) copy_target(rocsolver_obj rocsolver_obj_mpi SWAP_SOURCES TO) config_module_dirs(rocsolver_obj_mpi quick/mpi_${QUICK_GPU_TARGET_NAME}) set_property(TARGET rocsolver_obj_mpi PROPERTY COMPILE_OPTIONS ${OPT_FFLAGS}) diff --git a/src/modules/quick_overlap_module.f90 b/src/modules/quick_overlap_module.f90 index d25d5aa0..026e27f5 100644 --- a/src/modules/quick_overlap_module.f90 +++ b/src/modules/quick_overlap_module.f90 @@ -251,6 +251,13 @@ subroutine fullx call magmaDIAG(nbasis, quick_scratch%hold, quick_scratch%Sminhalf, quick_scratch%hold2, IERROR) #elif defined(WITH_ROCSOLVER) call rocDIAG(nbasis, quick_scratch%hold, quick_scratch%Sminhalf, quick_scratch%hold2, IERROR) +#else +#if defined(LAPACK) || defined(MKL) + call DIAGMKL(nbasis, quick_scratch%hold, quick_scratch%Sminhalf, quick_scratch%hold2, IERROR) +#else + call DIAG(NBASIS, quick_scratch%hold, NBASIS,quick_method%DMCutoff, quick_scratch%V, quick_scratch%Sminhalf, & + quick_scratch%IDEGEN1, quick_scratch%hold2, IERROR) +#endif #endif #else #if defined(LAPACK) || defined(MKL) diff --git a/src/modules/quick_scf_module.f90 b/src/modules/quick_scf_module.f90 index efec8cce..c90ac8af 100644 --- a/src/modules/quick_scf_module.f90 +++ b/src/modules/quick_scf_module.f90 @@ -633,6 +633,13 @@ subroutine electdiis(jscf,ierr) call magmaDIAG(nbasis, quick_qm_struct%o, quick_qm_struct%E, quick_qm_struct%vec, IERROR) #elif defined(WITH_ROCSOLVER) call rocDIAG(nbasis, quick_qm_struct%o, quick_qm_struct%E, quick_qm_struct%vec, IERROR) +#else +#if defined(LAPACK) || defined(MKL) + call DIAGMKL(nbasis, quick_qm_struct%o, quick_qm_struct%E, quick_qm_struct%vec, IERROR) +#else + call DIAG(nbasis, quick_qm_struct%o, nbasis, quick_method%DMCutoff, V2, quick_qm_struct%E, & + quick_qm_struct%idegen, quick_qm_struct%vec, IERROR) +#endif #endif #else #if defined(LAPACK) || defined(MKL) diff --git a/src/modules/quick_uscf_module.f90 b/src/modules/quick_uscf_module.f90 index 605b1c71..8d62f235 100644 --- a/src/modules/quick_uscf_module.f90 +++ b/src/modules/quick_uscf_module.f90 @@ -643,6 +643,13 @@ subroutine uelectdiis(jscf,ierr) call magmaDIAG(nbasis, quick_qm_struct%o, quick_qm_struct%E, quick_qm_struct%vec, IERROR) #elif defined(WITH_ROCSOLVER) call rocDIAG(nbasis, quick_qm_struct%o, quick_qm_struct%E, quick_qm_struct%vec, IERROR) +#else +#if defined(LAPACK) || defined(MKL) + call DIAGMKL(nbasis, quick_qm_struct%o, quick_qm_struct%E, quick_qm_struct%vec, IERROR) +#else + call DIAG(nbasis, quick_qm_struct%o, nbasis, quick_method%DMCutoff, V2, quick_qm_struct%E, & + quick_qm_struct%idegen, quick_qm_struct%vec, IERROR) +#endif #endif #else #if defined(LAPACK) || defined(MKL) @@ -744,6 +751,13 @@ subroutine uelectdiis(jscf,ierr) call magmaDIAG(nbasis,quick_qm_struct%ob,quick_qm_struct%EB,quick_qm_struct%vec,IERROR) #elif defined(WITH_ROCSOLVER) call rocDIAG(nbasis,quick_qm_struct%ob,quick_qm_struct%EB,quick_qm_struct%vec,IERROR) +#else +#if defined(LAPACK) || defined(MKL) + call DIAGMKL(nbasis,quick_qm_struct%ob,quick_qm_struct%EB,quick_qm_struct%vec,IERROR) +#else + call DIAG(nbasis,quick_qm_struct%ob,nbasis,quick_method%DMCutoff,V2,quick_qm_struct%EB,& + quick_qm_struct%idegen,quick_qm_struct%vec,IERROR) +#endif #endif #else #if defined(LAPACK) || defined(MKL)