diff --git a/CMakeLists.txt b/CMakeLists.txt index 8a6c1fb14b2a9..7b24c4abc650e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -482,6 +482,17 @@ if (NOT VLLM_TARGET_DEVICE STREQUAL "cuda") return() endif () +# vLLM flash attention requires VLLM_GPU_ARCHES to contain the set of target +# arches in the CMake syntax (75-real, 89-virtual, etc), since we clear the +# arches in the CUDA case (and instead set the gencodes on a per file basis) +# we need to manually set VLLM_GPU_ARCHES here. +if(VLLM_GPU_LANG STREQUAL "CUDA") + foreach(_ARCH ${CUDA_ARCHS}) + string(REPLACE "." "" _ARCH "${_ARCH}") + list(APPEND VLLM_GPU_ARCHES "${_ARCH}-real") + endforeach() +endif() + # # Build vLLM flash attention from source #