Skip to content

Commit

Permalink
Add wrappers for GPU functions.
Browse files Browse the repository at this point in the history
  • Loading branch information
ohearnk committed Dec 9, 2024
1 parent 9eb63a4 commit 53c25af
Show file tree
Hide file tree
Showing 49 changed files with 6,387 additions and 15,763 deletions.
3 changes: 1 addition & 2 deletions quick-cmake/QUICKCudaConfig.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ set(QUICK_GPU_TARGET_NAME "cuda")
set(GPU_LD_FLAGS "") # hipcc requires special flags for linking (see below)

if(CUDA)

find_package(CUDA REQUIRED)

if(NOT CUDA_FOUND)
Expand Down Expand Up @@ -266,7 +265,7 @@ if(CUDA)
endif()

# extra CUDA flags
list(APPEND CUDA_NVCC_FLAGS -use_fast_math)
list(APPEND CUDA_NVCC_FLAGS --use_fast_math)

if(TARGET_LINUX OR TARGET_OSX)
list(APPEND CUDA_NVCC_FLAGS --compiler-options -fPIC)
Expand Down
221 changes: 116 additions & 105 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ if(lapack_INTERNAL AND NOT INSIDE_AMBER)
endif()

if(CUDA OR HIP)
add_subdirectory(gpu/${QUICK_GPU_TARGET_NAME})
add_subdirectory(gpu/${QUICK_GPU_TARGET_NAME})
endif()

# quick source lists
Expand All @@ -31,7 +31,7 @@ set(QUICK_MODULES_SOURCES
quick_eri_module.f90 quick_eri_grad_module.f90 quick_oei_module.f90
quick_dft_module.f90 quick_scf_operator_module.f90 quick_uscf_operator_module.f90
quick_uscf_module.f90 quick_sad_guess_module.f90 quick_optimizer_module.f90
quick_cew_module.f90 quick_lri_module.f90 quick_lri_grad_module.f90
quick_cew_module.f90 quick_lri_module.f90 quick_lri_grad_module.f90
oshell_quick_eri_module.f90 oshell_quick_eri_grad_module.f90
oshell_quick_gradient_module.f90 quick_dftd3_module.f90 quick_molden_module.f90)

Expand All @@ -55,38 +55,38 @@ set(QUICK_SUBS_SOURCES Angles.f90 copyDMat.f90 copySym.f90
oshell_denspt.f90 naive_distribute.f90 time.cpp)

set(DLFIND_MODULES_SOURCES
dlf_allocate.f90 dlf_checkpoint.f90
dlf_conint.f90 dlf_convergence.f90
dlf_coords.f90 dlf_dimer.f90
dlf_formstep.f90 dlf_global_module.f90
dlf_hdlc_constraint.f90 dlf_hdlc_hdlclib.f90
dlf_hdlc_interface.f90 dlf_hdlc_matrixlib.f90
dlf_hdlc_primitive.f90 dl_find.f90
dlfind_main_driver.f90
dlf_lbfgs.f90 dlf_linalg.f90
dlf_microiter.f90
dlf_neb.f90 dlf_parallel_opt.f90
dlf_qts.f90 dlf_scalestep.f90
dlf_serial.f90 dlf_sort.f90
dlf_stat_module.f90 dlf_svnversion.f90
dlf_task.f90 dlf_time.f90 dlf_util.f90)
dlf_allocate.f90 dlf_checkpoint.f90
dlf_conint.f90 dlf_convergence.f90
dlf_coords.f90 dlf_dimer.f90
dlf_formstep.f90 dlf_global_module.f90
dlf_hdlc_constraint.f90 dlf_hdlc_hdlclib.f90
dlf_hdlc_interface.f90 dlf_hdlc_matrixlib.f90
dlf_hdlc_primitive.f90 dl_find.f90
dlfind_main_driver.f90
dlf_lbfgs.f90 dlf_linalg.f90
dlf_microiter.f90
dlf_neb.f90 dlf_parallel_opt.f90
dlf_qts.f90 dlf_scalestep.f90
dlf_serial.f90 dlf_sort.f90
dlf_stat_module.f90 dlf_svnversion.f90
dlf_task.f90 dlf_time.f90 dlf_util.f90)

set(DLFIND_MODULES_SOURCES
dlf_allocate.f90 dlf_checkpoint.f90
dlf_conint.f90 dlf_convergence.f90
dlf_coords.f90 dlf_dimer.f90
dlf_formstep.f90 dlf_global_module.f90
dlf_hdlc_constraint.f90 dlf_hdlc_hdlclib.f90
dlf_hdlc_interface.f90 dlf_hdlc_matrixlib.f90
dlf_hdlc_primitive.f90 dl_find.f90
dlfind_main_driver.f90
dlf_lbfgs.f90 dlf_linalg.f90
dlf_microiter.f90
dlf_neb.f90 dlf_parallel_opt.f90
dlf_qts.f90 dlf_scalestep.f90
dlf_serial.f90 dlf_sort.f90
dlf_stat_module.f90 dlf_svnversion.f90
dlf_task.f90 dlf_time.f90 dlf_util.f90)
dlf_allocate.f90 dlf_checkpoint.f90
dlf_conint.f90 dlf_convergence.f90
dlf_coords.f90 dlf_dimer.f90
dlf_formstep.f90 dlf_global_module.f90
dlf_hdlc_constraint.f90 dlf_hdlc_hdlclib.f90
dlf_hdlc_interface.f90 dlf_hdlc_matrixlib.f90
dlf_hdlc_primitive.f90 dl_find.f90
dlfind_main_driver.f90
dlf_lbfgs.f90 dlf_linalg.f90
dlf_microiter.f90
dlf_neb.f90 dlf_parallel_opt.f90
dlf_qts.f90 dlf_scalestep.f90
dlf_serial.f90 dlf_sort.f90
dlf_stat_module.f90 dlf_svnversion.f90
dlf_task.f90 dlf_time.f90 dlf_util.f90)

set(DFTD3_SOURCES
api.f90 common.f90 core.f90 pars.f90 sizes.f90)
Expand All @@ -96,9 +96,9 @@ if(mirp_ENABLED)
endif()

set(QUICK_GENERAL_SOURCES initialize.f90 read_job_and_atom.f90
getMol.f90 quick_one_electron_integral.f90 getEnergy.f90
ecp.f90 nuclear.f90 finalize.f90 mpi_setup.f90 quick_debug.f90
calMP2.f90 hessian.f90 CPHF.f90 frequency.f90 basis.f90)
getMol.f90 quick_one_electron_integral.f90 getEnergy.f90
ecp.f90 nuclear.f90 finalize.f90 mpi_setup.f90 quick_debug.f90
calMP2.f90 hessian.f90 CPHF.f90 frequency.f90 basis.f90)

set(QUICK_GPU_FORTRAN_SOURCE gpu/${QUICK_GPU_TARGET_NAME}/gpu_write_info.f90)

Expand All @@ -123,11 +123,11 @@ foreach(SUBS_SOURCE ${QUICK_SUBS_SOURCES})
endforeach()

foreach(DLFIND_SOURCE ${DLFIND_MODULES_SOURCES})
list(APPEND DLFIND_MODULES_SOURCES_FULLPATH "dlfind/${DLFIND_SOURCE}")
list(APPEND DLFIND_MODULES_SOURCES_FULLPATH "dlfind/${DLFIND_SOURCE}")
endforeach()

foreach(DFTD3_SOURCE ${DFTD3_SOURCES})
list(APPEND DFTD3_SOURCES_FULLPATH "dftd3/${DFTD3_SOURCE}")
list(APPEND DFTD3_SOURCES_FULLPATH "dftd3/${DFTD3_SOURCE}")
endforeach()

# create serial and MPI libraries
Expand All @@ -141,7 +141,7 @@ config_module_dirs(libquick quick/serial libxc/serial)

target_link_libraries(libquick PRIVATE xc octree ${QUICK_LAPACK} ${QUICK_BLAS})
if(INSIDE_AMBER)
target_link_libraries(libquick PRIVATE libcew)
target_link_libraries(libquick PRIVATE libcew)
endif()
install_libraries(libquick EXPORT QUICK)

Expand All @@ -151,28 +151,28 @@ if(mirp_ENABLED)
endif()

# "liblibquick" -> "libquick"
set_property(TARGET libquick PROPERTY OUTPUT_NAME quick)
set_target_properties(libquick PROPERTIES OUTPUT_NAME quick)

if(MPI)
make_mpi_version(libquick libquick_mpi LANGUAGES Fortran)

target_compile_definitions(libquick_mpi PRIVATE MPIV)
set_property(TARGET libquick_mpi PROPERTY OUTPUT_NAME quick_mpi)
set_target_properties(libquick_mpi PROPERTIES OUTPUT_NAME quick_mpi)

# change link libraries and mod dirs for MPI
remove_link_libraries(libquick_mpi octree)
target_link_libraries(libquick_mpi PRIVATE octree_mpi)
if(INSIDE_AMBER)
target_link_libraries(libquick_mpi PRIVATE libcew_mpi)
endif()
target_link_libraries(libquick_mpi PRIVATE libcew_mpi)
endif()
config_module_dirs(libquick_mpi quick/mpi libxc/serial)


install_libraries(libquick_mpi EXPORT QUICK)
endif()

if(CUDA)
if(HIP)
if(HIP)
if(MAGMA)
copy_target(libquick libquick_${QUICK_GPU_TARGET_NAME} SWAP_SOURCES TO $<TARGET_OBJECTS:rocblas_fortran_obj> ${QUICK_GPU_FORTRAN_SOURCE})
target_compile_definitions(libquick_${QUICK_GPU_TARGET_NAME} PRIVATE GPU ${QUICK_GPU_PLATFORM})
Expand All @@ -182,52 +182,56 @@ if(CUDA)
target_compile_definitions(libquick_${QUICK_GPU_TARGET_NAME} PRIVATE GPU ${QUICK_GPU_PLATFORM})
target_compile_definitions(libquick_${QUICK_GPU_TARGET_NAME} PRIVATE WITH_ROCSOLVER)
endif()
target_link_options(libquick_${QUICK_GPU_TARGET_NAME} PRIVATE ${GPU_LD_FLAGS})
set_property(TARGET libquick_${QUICK_GPU_TARGET_NAME} PROPERTY OUTPUT_NAME quick_${QUICK_GPU_TARGET_NAME})
# change link libraries and mod dirs for CUDA
remove_link_libraries(libquick_${QUICK_GPU_TARGET_NAME} xc octree)
target_link_libraries(libquick_${QUICK_GPU_TARGET_NAME} PRIVATE xc_gpu octree_gpu quick_${QUICK_GPU_TARGET_NAME}_kernels rocblas)

if(MAGMA)
target_link_libraries(libquick_${QUICK_GPU_TARGET_NAME} PRIVATE magma)
else()
target_link_libraries(libquick_${QUICK_GPU_TARGET_NAME} PRIVATE rocsolver)
target_link_options(libquick_${QUICK_GPU_TARGET_NAME} PRIVATE ${GPU_LD_FLAGS})

set_target_properties(libquick_${QUICK_GPU_TARGET_NAME} PROPERTIES OUTPUT_NAME quick_${QUICK_GPU_TARGET_NAME})

# change link libraries and mod dirs for CUDA
remove_link_libraries(libquick_${QUICK_GPU_TARGET_NAME} xc octree)

target_link_libraries(libquick_${QUICK_GPU_TARGET_NAME} PRIVATE xc_gpu octree_gpu quick_${QUICK_GPU_TARGET_NAME}_kernels rocblas)

if(MAGMA)
target_link_libraries(libquick_${QUICK_GPU_TARGET_NAME} PRIVATE magma)
else()
target_link_libraries(libquick_${QUICK_GPU_TARGET_NAME} PRIVATE rocsolver)
#target_link_libraries(libquick_${QUICK_GPU_TARGET_NAME} PRIVATE lapack)
endif()

if(INSIDE_AMBER)
target_link_libraries(libquick_${QUICK_GPU_TARGET_NAME} PRIVATE libcew)
endif()

config_module_dirs(libquick_${QUICK_GPU_TARGET_NAME} quick/${QUICK_GPU_TARGET_NAME} libxc/gpu)

install_libraries(libquick_${QUICK_GPU_TARGET_NAME} EXPORT QUICK)
else()
copy_target(libquick libquick_${QUICK_GPU_TARGET_NAME} SWAP_SOURCES TO $<TARGET_OBJECTS:cublas_fortran_obj> $<TARGET_OBJECTS:cusolver_obj> ${QUICK_GPU_FORTRAN_SOURCE})
target_compile_definitions(libquick_${QUICK_GPU_TARGET_NAME} PRIVATE GPU ${QUICK_GPU_PLATFORM})

set_property(TARGET libquick_${QUICK_GPU_TARGET_NAME} PROPERTY OUTPUT_NAME quick_${QUICK_GPU_TARGET_NAME})

# change link libraries and mod dirs for CUDA
remove_link_libraries(libquick_${QUICK_GPU_TARGET_NAME} xc octree)

target_link_libraries(libquick_${QUICK_GPU_TARGET_NAME} PRIVATE xc_gpu octree_gpu quick_${QUICK_GPU_TARGET_NAME}_kernels cublas cusolver)

if(INSIDE_AMBER)
target_link_libraries(libquick_${QUICK_GPU_TARGET_NAME} PRIVATE libcew)
endif()

config_module_dirs(libquick_${QUICK_GPU_TARGET_NAME} quick/${QUICK_GPU_TARGET_NAME} libxc/gpu)

install_libraries(libquick_${QUICK_GPU_TARGET_NAME} EXPORT QUICK)
endif()
endif()

if(INSIDE_AMBER)
target_link_libraries(libquick_${QUICK_GPU_TARGET_NAME} PRIVATE libcew)
endif()

config_module_dirs(libquick_${QUICK_GPU_TARGET_NAME} quick/${QUICK_GPU_TARGET_NAME} libxc/gpu)

install_libraries(libquick_${QUICK_GPU_TARGET_NAME} EXPORT QUICK)
else()
copy_target(libquick libquick_${QUICK_GPU_TARGET_NAME} SWAP_SOURCES TO $<TARGET_OBJECTS:cublas_fortran_obj> $<TARGET_OBJECTS:cusolver_obj> ${QUICK_GPU_FORTRAN_SOURCE})
target_compile_definitions(libquick_${QUICK_GPU_TARGET_NAME} PRIVATE GPU ${QUICK_GPU_PLATFORM})
set_target_properties(libquick_${QUICK_GPU_TARGET_NAME} PROPERTIES OUTPUT_NAME quick_${QUICK_GPU_TARGET_NAME})
# target_compile_options(libquick_${QUICK_GPU_TARGET_NAME} PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:
# --use_fast_math
# --relocatable-device-code=true
# >)

# change link libraries and mod dirs for CUDA
remove_link_libraries(libquick_${QUICK_GPU_TARGET_NAME} xc octree)

target_link_libraries(libquick_${QUICK_GPU_TARGET_NAME} PRIVATE xc_gpu octree_gpu
quick_${QUICK_GPU_TARGET_NAME}_kernels cublas cusolver)

if(INSIDE_AMBER)
target_link_libraries(libquick_${QUICK_GPU_TARGET_NAME} PRIVATE libcew)
endif()

config_module_dirs(libquick_${QUICK_GPU_TARGET_NAME} quick/${QUICK_GPU_TARGET_NAME} libxc/gpu)

install_libraries(libquick_${QUICK_GPU_TARGET_NAME} EXPORT QUICK)
endif()
endif()

if(MPI AND CUDA)
if(HIP)
if(HIP)
if(MAGMA)
copy_target(libquick_mpi libquick_mpi_${QUICK_GPU_TARGET_NAME} SWAP_SOURCES TO $<TARGET_OBJECTS:rocblas_fortran_obj_mpi> ${QUICK_GPU_FORTRAN_SOURCE})
target_compile_definitions(libquick_mpi_${QUICK_GPU_TARGET_NAME} PRIVATE MPIV_GPU ${QUICK_GPU_PLATFORM}_MPIV)
Expand All @@ -238,7 +242,8 @@ if(MPI AND CUDA)
target_compile_definitions(libquick_mpi_${QUICK_GPU_TARGET_NAME} PRIVATE WITH_ROCSOLVER)
endif()
target_link_options(libquick_mpi_${QUICK_GPU_TARGET_NAME} PRIVATE ${GPU_LD_FLAGS})
set_property(TARGET libquick_mpi_${QUICK_GPU_TARGET_NAME} PROPERTY OUTPUT_NAME quick_mpi_${QUICK_GPU_TARGET_NAME})

set_target_properties(libquick_mpi_${QUICK_GPU_TARGET_NAME} PROPERTIES OUTPUT_NAME quick_mpi_${QUICK_GPU_TARGET_NAME})

remove_link_libraries(libquick_mpi_${QUICK_GPU_TARGET_NAME} octree_mpi xc)
target_link_libraries(libquick_mpi_${QUICK_GPU_TARGET_NAME} PRIVATE xc_gpu xc_${QUICK_GPU_TARGET_NAME} octree_gpu quick_${QUICK_GPU_TARGET_NAME}_kernels_mpi rocblas)
Expand All @@ -249,27 +254,34 @@ if(MPI AND CUDA)
#target_link_libraries(libquick_mpi_${QUICK_GPU_TARGET_NAME} PRIVATE lapack)
endif()
if(INSIDE_AMBER)
target_link_libraries(libquick_mpi_${QUICK_GPU_TARGET_NAME} PRIVATE libcew_mpi)
target_link_libraries(libquick_mpi_${QUICK_GPU_TARGET_NAME} PRIVATE libcew_mpi)
endif()

config_module_dirs(libquick_mpi_${QUICK_GPU_TARGET_NAME} quick/mpi_${QUICK_GPU_TARGET_NAME} libxc/gpu)

install_libraries(libquick_mpi_${QUICK_GPU_TARGET_NAME} EXPORT QUICK)
else()
else()
copy_target(libquick_mpi libquick_mpi_${QUICK_GPU_TARGET_NAME} SWAP_SOURCES TO $<TARGET_OBJECTS:cublas_fortran_obj> $<TARGET_OBJECTS:cusolver_obj> ${QUICK_GPU_FORTRAN_SOURCE})
target_compile_definitions(libquick_mpi_${QUICK_GPU_TARGET_NAME} PRIVATE MPIV_GPU ${QUICK_GPU_PLATFORM}_MPIV)
set_property(TARGET libquick_mpi_${QUICK_GPU_TARGET_NAME} PROPERTY OUTPUT_NAME quick_mpi_${QUICK_GPU_TARGET_NAME})
set_target_properties(libquick_mpi_${QUICK_GPU_TARGET_NAME} PROPERTIES OUTPUT_NAME quick_mpi_${QUICK_GPU_TARGET_NAME})
# target_compile_options(libquick_mpi_${QUICK_GPU_TARGET_NAME} PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:
# --use_fast_math
# --relocatable-device-code=true
# >)

remove_link_libraries(libquick_mpi_${QUICK_GPU_TARGET_NAME} octree_mpi xc)
target_link_libraries(libquick_mpi_${QUICK_GPU_TARGET_NAME} PRIVATE xc_gpu xc_${QUICK_GPU_TARGET_NAME} octree_gpu quick_${QUICK_GPU_TARGET_NAME}_kernels_mpi cublas cusolver)

target_link_libraries(libquick_mpi_${QUICK_GPU_TARGET_NAME} PRIVATE xc_gpu xc_${QUICK_GPU_TARGET_NAME} octree_gpu
quick_${QUICK_GPU_TARGET_NAME}_kernels_mpi cublas cusolver)

if(INSIDE_AMBER)
target_link_libraries(libquick_mpi_${QUICK_GPU_TARGET_NAME} PRIVATE libcew_mpi)
target_link_libraries(libquick_mpi_${QUICK_GPU_TARGET_NAME} PRIVATE libcew_mpi)
endif()

config_module_dirs(libquick_mpi_${QUICK_GPU_TARGET_NAME} quick/mpi_${QUICK_GPU_TARGET_NAME} libxc/gpu)

install_libraries(libquick_mpi_${QUICK_GPU_TARGET_NAME} EXPORT QUICK)
endif()
endif()
endif()

# Enable -DLAPACK or -DMKL for non-CUDA versions
Expand Down Expand Up @@ -322,23 +334,22 @@ if(MPI)
endif()

if(CUDA)
copy_target(quick quick.${QUICK_GPU_TARGET_NAME})
target_compile_definitions(quick.${QUICK_GPU_TARGET_NAME} PRIVATE GPU ${QUICK_GPU_PLATFORM})

copy_target(quick quick.${QUICK_GPU_TARGET_NAME})
target_compile_definitions(quick.${QUICK_GPU_TARGET_NAME} PRIVATE GPU ${QUICK_GPU_PLATFORM})

copy_target(test-api test-api.${QUICK_GPU_TARGET_NAME})
target_compile_definitions(test-api.${QUICK_GPU_TARGET_NAME} PRIVATE GPU ${QUICK_GPU_PLATFORM})
copy_target(test-api test-api.${QUICK_GPU_TARGET_NAME})
target_compile_definitions(test-api.${QUICK_GPU_TARGET_NAME} PRIVATE GPU ${QUICK_GPU_PLATFORM})

remove_link_libraries(quick.${QUICK_GPU_TARGET_NAME} libquick)
target_link_libraries(quick.${QUICK_GPU_TARGET_NAME} libquick_${QUICK_GPU_TARGET_NAME})
config_module_dirs(quick.${QUICK_GPU_TARGET_NAME} quick/${QUICK_GPU_TARGET_NAME} libxc/gpu)
remove_link_libraries(quick.${QUICK_GPU_TARGET_NAME} libquick)
target_link_libraries(quick.${QUICK_GPU_TARGET_NAME} libquick_${QUICK_GPU_TARGET_NAME})
config_module_dirs(quick.${QUICK_GPU_TARGET_NAME} quick/${QUICK_GPU_TARGET_NAME} libxc/gpu)

remove_link_libraries(test-api.${QUICK_GPU_TARGET_NAME} libquick)
target_link_libraries(test-api.${QUICK_GPU_TARGET_NAME} libquick_${QUICK_GPU_TARGET_NAME})
config_module_dirs(test-api.${QUICK_GPU_TARGET_NAME} quick/${QUICK_GPU_TARGET_NAME} libxc/gpu)
remove_link_libraries(test-api.${QUICK_GPU_TARGET_NAME} libquick)
target_link_libraries(test-api.${QUICK_GPU_TARGET_NAME} libquick_${QUICK_GPU_TARGET_NAME})
config_module_dirs(test-api.${QUICK_GPU_TARGET_NAME} quick/${QUICK_GPU_TARGET_NAME} libxc/gpu)

install(TARGETS quick.${QUICK_GPU_TARGET_NAME} DESTINATION ${BINDIR} EXPORT QUICK)
install(TARGETS test-api.${QUICK_GPU_TARGET_NAME} DESTINATION ${BINDIR} EXPORT QUICK)
install(TARGETS quick.${QUICK_GPU_TARGET_NAME} DESTINATION ${BINDIR} EXPORT QUICK)
install(TARGETS test-api.${QUICK_GPU_TARGET_NAME} DESTINATION ${BINDIR} EXPORT QUICK)
endif()

if(CUDA AND MPI)
Expand Down
2 changes: 1 addition & 1 deletion src/gpu/cuda/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
set(QUICK_CUDA_SOURCE gpu.cu gpu_type.cu gpu_get2e.cu gpu_oei.cu gpu_lri.cu ../xc_redistribute.cpp gpu_get2e_grad_ffff.cu)
set(QUICK_CUDA_SOURCE gpu_utils.cu gpu.cu gpu_type.cu gpu_get2e.cu ../gpu_oei.cu ../gpu_lri.cu ../xc_redistribute.cpp gpu_get2e_grad_ffff.cu)

#sadly FindCUDA doesn't support OBJECT libraries, so we have to use a static cuda_add_library
cuda_add_library(quick_cuda_kernels STATIC ${QUICK_CUDA_SOURCE} OPTIONS ${CUDA_DEVICE_CODE_FLAGS} -DGPU -D${QUICK_GPU_PLATFORM})
Expand Down
Loading

0 comments on commit 53c25af

Please sign in to comment.