[ROCm][Hardware][AMD] Adding Navi21 to fallback to naive attention if…

… Triton is not used (vllm-project#4658)
GeauxEric · May 18, 2024 · c0724fc · c0724fc
1 parent 86b45ae
commit c0724fc
Showing 1 changed file with 3 additions and 2 deletions.
diff --git a/vllm/attention/backends/rocm_flash_attn.py b/vllm/attention/backends/rocm_flash_attn.py
@@ -231,8 +231,9 @@ def __init__(
             self.attn_func = triton_attention
             logger.debug("Using Triton FA in ROCmBackend")
         else:
-            # if not using triton, navi3x not use flash-attn either
-            if torch.cuda.get_device_capability()[0] == 11:
+            # if not using triton, navi3x/navi21/navi10 do not use flash-attn
+            # either
+            if torch.cuda.get_device_capability()[0] != 9:
                 self.use_naive_attn = True
             else:
                 try: