From c0724fc9150329d42abaf2f0f77dc8ca91d48acb Mon Sep 17 00:00:00 2001 From: alexeykondrat <143633163+alexeykondrat@users.noreply.github.com> Date: Sat, 18 May 2024 01:09:11 -0400 Subject: [PATCH] [ROCm][Hardware][AMD] Adding Navi21 to fallback to naive attention if Triton is not used (#4658) --- vllm/attention/backends/rocm_flash_attn.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/vllm/attention/backends/rocm_flash_attn.py b/vllm/attention/backends/rocm_flash_attn.py index bb828d6fc04fe..94f3f55636ed6 100644 --- a/vllm/attention/backends/rocm_flash_attn.py +++ b/vllm/attention/backends/rocm_flash_attn.py @@ -231,8 +231,9 @@ def __init__( self.attn_func = triton_attention logger.debug("Using Triton FA in ROCmBackend") else: - # if not using triton, navi3x not use flash-attn either - if torch.cuda.get_device_capability()[0] == 11: + # if not using triton, navi3x/navi21/navi10 do not use flash-attn + # either + if torch.cuda.get_device_capability()[0] != 9: self.use_naive_attn = True else: try: