Skip to content

Commit

Permalink
Disable diagonalization on the GPU with rocSOLVER for older ROCm vers…
Browse files Browse the repository at this point in the history
…ion (< v5.3.0) due to poor performance and use CPU diagonalization routines instead.
  • Loading branch information
ohearnk committed Dec 21, 2024
1 parent f98e306 commit 3724740
Show file tree
Hide file tree
Showing 5 changed files with 48 additions and 12 deletions.
28 changes: 18 additions & 10 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -175,13 +175,16 @@ if(CUDA)
if(HIP)
if(MAGMA)
copy_target(libquick libquick_${QUICK_GPU_TARGET_NAME} SWAP_SOURCES TO $<TARGET_OBJECTS:rocblas_fortran_obj> ${QUICK_GPU_FORTRAN_SOURCE})
target_compile_definitions(libquick_${QUICK_GPU_TARGET_NAME} PRIVATE GPU ${QUICK_GPU_PLATFORM})
target_compile_definitions(libquick_${QUICK_GPU_TARGET_NAME} PRIVATE WITH_MAGMA)
else()
elseif(${HIP_VERSION} VERSION_GREATER_EQUAL 5.3.0)
copy_target(libquick libquick_${QUICK_GPU_TARGET_NAME} SWAP_SOURCES TO $<TARGET_OBJECTS:rocblas_fortran_obj> $<TARGET_OBJECTS:rocsolver_obj> ${QUICK_GPU_FORTRAN_SOURCE})
target_compile_definitions(libquick_${QUICK_GPU_TARGET_NAME} PRIVATE GPU ${QUICK_GPU_PLATFORM})
target_compile_definitions(libquick_${QUICK_GPU_TARGET_NAME} PRIVATE WITH_ROCSOLVER)
# avoid diagonalization on GPU with rocSOLVER on older ROCm versions due to poor performance,
# and instead fall back to CPU diagonalization
else()
copy_target(libquick libquick_${QUICK_GPU_TARGET_NAME} SWAP_SOURCES TO $<TARGET_OBJECTS:rocblas_fortran_obj> ${QUICK_GPU_FORTRAN_SOURCE})
endif()
target_compile_definitions(libquick_${QUICK_GPU_TARGET_NAME} PRIVATE GPU ${QUICK_GPU_PLATFORM})
target_link_options(libquick_${QUICK_GPU_TARGET_NAME} PRIVATE ${GPU_LD_FLAGS})

set_target_properties(libquick_${QUICK_GPU_TARGET_NAME} PROPERTIES OUTPUT_NAME quick_${QUICK_GPU_TARGET_NAME})
Expand All @@ -193,9 +196,10 @@ if(CUDA)

if(MAGMA)
target_link_libraries(libquick_${QUICK_GPU_TARGET_NAME} PRIVATE magma)
else()
elseif(${HIP_VERSION} VERSION_GREATER_EQUAL 5.3.0)
target_link_libraries(libquick_${QUICK_GPU_TARGET_NAME} PRIVATE rocsolver)
#target_link_libraries(libquick_${QUICK_GPU_TARGET_NAME} PRIVATE lapack)
else()
target_link_libraries(libquick_${QUICK_GPU_TARGET_NAME} PRIVATE lapack)
endif()

if(INSIDE_AMBER)
Expand Down Expand Up @@ -230,13 +234,16 @@ if(MPI AND CUDA)
if(HIP)
if(MAGMA)
copy_target(libquick_mpi libquick_mpi_${QUICK_GPU_TARGET_NAME} SWAP_SOURCES TO $<TARGET_OBJECTS:rocblas_fortran_obj_mpi> ${QUICK_GPU_FORTRAN_SOURCE})
target_compile_definitions(libquick_mpi_${QUICK_GPU_TARGET_NAME} PRIVATE MPIV_GPU ${QUICK_GPU_PLATFORM}_MPIV)
target_compile_definitions(libquick_mpi_${QUICK_GPU_TARGET_NAME} PRIVATE WITH_MAGMA)
else()
elseif(${HIP_VERSION} VERSION_GREATER_EQUAL 5.3.0)
copy_target(libquick_mpi libquick_mpi_${QUICK_GPU_TARGET_NAME} SWAP_SOURCES TO $<TARGET_OBJECTS:rocblas_fortran_obj_mpi> $<TARGET_OBJECTS:rocsolver_obj_mpi> ${QUICK_GPU_FORTRAN_SOURCE})
target_compile_definitions(libquick_mpi_${QUICK_GPU_TARGET_NAME} PRIVATE MPIV_GPU ${QUICK_GPU_PLATFORM}_MPIV)
target_compile_definitions(libquick_mpi_${QUICK_GPU_TARGET_NAME} PRIVATE WITH_ROCSOLVER)
# avoid diagonalization on GPU with rocSOLVER on older ROCm versions due to poor performance,
# and instead fall back to CPU diagonalization
else()
copy_target(libquick_mpi libquick_mpi_${QUICK_GPU_TARGET_NAME} SWAP_SOURCES TO $<TARGET_OBJECTS:rocblas_fortran_obj_mpi> ${QUICK_GPU_FORTRAN_SOURCE})
endif()
target_compile_definitions(libquick_mpi_${QUICK_GPU_TARGET_NAME} PRIVATE MPIV_GPU ${QUICK_GPU_PLATFORM}_MPIV)
target_link_options(libquick_mpi_${QUICK_GPU_TARGET_NAME} PRIVATE ${GPU_LD_FLAGS})

set_target_properties(libquick_mpi_${QUICK_GPU_TARGET_NAME} PROPERTIES OUTPUT_NAME quick_mpi_${QUICK_GPU_TARGET_NAME})
Expand All @@ -245,9 +252,10 @@ if(MPI AND CUDA)
target_link_libraries(libquick_mpi_${QUICK_GPU_TARGET_NAME} PRIVATE xc_gpu xc_${QUICK_GPU_TARGET_NAME} octree_gpu quick_${QUICK_GPU_TARGET_NAME}_kernels_mpi rocblas)
if(MAGMA)
target_link_libraries(libquick_mpi_${QUICK_GPU_TARGET_NAME} PRIVATE magma)
else()
elseif(${HIP_VERSION} VERSION_GREATER_EQUAL 5.3.0)
target_link_libraries(libquick_mpi_${QUICK_GPU_TARGET_NAME} PRIVATE rocsolver)
#target_link_libraries(libquick_mpi_${QUICK_GPU_TARGET_NAME} PRIVATE lapack)
else()
target_link_libraries(libquick_mpi_${QUICK_GPU_TARGET_NAME} PRIVATE lapack)
endif()
if(INSIDE_AMBER)
target_link_libraries(libquick_mpi_${QUICK_GPU_TARGET_NAME} PRIVATE libcew_mpi)
Expand Down
4 changes: 2 additions & 2 deletions src/gpu/hip/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ set_property(TARGET rocblas_fortran_obj PROPERTY POSITION_INDEPENDENT_CODE TRUE)
target_include_directories(rocblas_fortran_obj PUBLIC ${CUDA_INCLUDE_DIRS})
config_module_dirs(rocblas_fortran_obj quick/${QUICK_GPU_TARGET_NAME})

if(NOT MAGMA)
if(NOT MAGMA AND ${HIP_VERSION} VERSION_GREATER_EQUAL 5.3.0)
# rocsolver
#------------------------------------------------------------------------------------------
set(ROCSOLVER_SOURCE rocsolver/quick_rocsolver_module.f90 rocsolver/rocsolver_extra_module.f90)
Expand All @@ -57,7 +57,7 @@ if(MPI)
config_module_dirs(rocblas_fortran_obj_mpi quick/mpi_${QUICK_GPU_TARGET_NAME})
set_property(TARGET rocblas_fortran_obj_mpi PROPERTY COMPILE_OPTIONS ${OPT_FFLAGS})

if(NOT MAGMA)
if(NOT MAGMA AND ${HIP_VERSION} VERSION_GREATER_EQUAL 5.3.0)
copy_target(rocsolver_obj rocsolver_obj_mpi SWAP_SOURCES TO)
config_module_dirs(rocsolver_obj_mpi quick/mpi_${QUICK_GPU_TARGET_NAME})
set_property(TARGET rocsolver_obj_mpi PROPERTY COMPILE_OPTIONS ${OPT_FFLAGS})
Expand Down
7 changes: 7 additions & 0 deletions src/modules/quick_overlap_module.f90
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,13 @@ subroutine fullx
call magmaDIAG(nbasis, quick_scratch%hold, quick_scratch%Sminhalf, quick_scratch%hold2, IERROR)
#elif defined(WITH_ROCSOLVER)
call rocDIAG(nbasis, quick_scratch%hold, quick_scratch%Sminhalf, quick_scratch%hold2, IERROR)
#else
#if defined(LAPACK) || defined(MKL)
call DIAGMKL(nbasis, quick_scratch%hold, quick_scratch%Sminhalf, quick_scratch%hold2, IERROR)
#else
call DIAG(NBASIS, quick_scratch%hold, NBASIS,quick_method%DMCutoff, quick_scratch%V, quick_scratch%Sminhalf, &
quick_scratch%IDEGEN1, quick_scratch%hold2, IERROR)
#endif
#endif
#else
#if defined(LAPACK) || defined(MKL)
Expand Down
7 changes: 7 additions & 0 deletions src/modules/quick_scf_module.f90
Original file line number Diff line number Diff line change
Expand Up @@ -633,6 +633,13 @@ subroutine electdiis(jscf,ierr)
call magmaDIAG(nbasis, quick_qm_struct%o, quick_qm_struct%E, quick_qm_struct%vec, IERROR)
#elif defined(WITH_ROCSOLVER)
call rocDIAG(nbasis, quick_qm_struct%o, quick_qm_struct%E, quick_qm_struct%vec, IERROR)
#else
#if defined(LAPACK) || defined(MKL)
call DIAGMKL(nbasis, quick_qm_struct%o, quick_qm_struct%E, quick_qm_struct%vec, IERROR)
#else
call DIAG(nbasis, quick_qm_struct%o, nbasis, quick_method%DMCutoff, V2, quick_qm_struct%E, &
quick_qm_struct%idegen, quick_qm_struct%vec, IERROR)
#endif
#endif
#else
#if defined(LAPACK) || defined(MKL)
Expand Down
14 changes: 14 additions & 0 deletions src/modules/quick_uscf_module.f90
Original file line number Diff line number Diff line change
Expand Up @@ -643,6 +643,13 @@ subroutine uelectdiis(jscf,ierr)
call magmaDIAG(nbasis, quick_qm_struct%o, quick_qm_struct%E, quick_qm_struct%vec, IERROR)
#elif defined(WITH_ROCSOLVER)
call rocDIAG(nbasis, quick_qm_struct%o, quick_qm_struct%E, quick_qm_struct%vec, IERROR)
#else
#if defined(LAPACK) || defined(MKL)
call DIAGMKL(nbasis, quick_qm_struct%o, quick_qm_struct%E, quick_qm_struct%vec, IERROR)
#else
call DIAG(nbasis, quick_qm_struct%o, nbasis, quick_method%DMCutoff, V2, quick_qm_struct%E, &
quick_qm_struct%idegen, quick_qm_struct%vec, IERROR)
#endif
#endif
#else
#if defined(LAPACK) || defined(MKL)
Expand Down Expand Up @@ -744,6 +751,13 @@ subroutine uelectdiis(jscf,ierr)
call magmaDIAG(nbasis,quick_qm_struct%ob,quick_qm_struct%EB,quick_qm_struct%vec,IERROR)
#elif defined(WITH_ROCSOLVER)
call rocDIAG(nbasis,quick_qm_struct%ob,quick_qm_struct%EB,quick_qm_struct%vec,IERROR)
#else
#if defined(LAPACK) || defined(MKL)
call DIAGMKL(nbasis,quick_qm_struct%ob,quick_qm_struct%EB,quick_qm_struct%vec,IERROR)
#else
call DIAG(nbasis,quick_qm_struct%ob,nbasis,quick_method%DMCutoff,V2,quick_qm_struct%EB,&
quick_qm_struct%idegen,quick_qm_struct%vec,IERROR)
#endif
#endif
#else
#if defined(LAPACK) || defined(MKL)
Expand Down

0 comments on commit 3724740

Please sign in to comment.