Skip to content

Commit

Permalink
[Kernel][Hardware][Amd]Custom paged attention kernel for rocm (vllm-p…
Browse files Browse the repository at this point in the history
  • Loading branch information
charlifu authored and Jeffwan committed Sep 19, 2024
1 parent f1bad16 commit 4e900f2
Show file tree
Hide file tree
Showing 8 changed files with 1,371 additions and 16 deletions.
23 changes: 23 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -324,12 +324,35 @@ define_gpu_extension_target(
WITH_SOABI)


if(VLLM_GPU_LANG STREQUAL "HIP")
#
# _rocm_C extension
#
set(VLLM_ROCM_EXT_SRC
"csrc/rocm/torch_bindings.cpp"
"csrc/rocm/attention.cu")

define_gpu_extension_target(
_rocm_C
DESTINATION vllm
LANGUAGE ${VLLM_GPU_LANG}
SOURCES ${VLLM_ROCM_EXT_SRC}
COMPILE_FLAGS ${VLLM_GPU_FLAGS}
ARCHITECTURES ${VLLM_GPU_ARCHES}
USE_SABI 3
WITH_SOABI)
endif()


if(VLLM_GPU_LANG STREQUAL "CUDA" OR VLLM_GPU_LANG STREQUAL "HIP")
message(STATUS "Enabling C extension.")
add_dependencies(default _C)

message(STATUS "Enabling moe extension.")
add_dependencies(default _moe_C)
endif()

if(VLLM_GPU_LANG STREQUAL "HIP")
message(STATUS "Enabling rocm extension.")
add_dependencies(default _rocm_C)
endif()
Loading

0 comments on commit 4e900f2

Please sign in to comment.