Fix default value for FSDPA (#448)

HabanaAI · Oct 30, 2024 · 94858b5 · 94858b5
1 parent 2f7f963
commit 94858b5
Showing 1 changed file with 6 additions and 4 deletions.
diff --git a/vllm/attention/backends/hpu_attn.py b/vllm/attention/backends/hpu_attn.py
@@ -16,6 +16,7 @@
 from vllm.attention.ops.hpu_paged_attn import (HPUPagedAttention,
                                                HPUPagedAttentionMetadata)
 from vllm.logger import init_logger
+from vllm.utils import is_fake_hpu
 
 logger = init_logger(__name__)
 
@@ -120,9 +121,10 @@ def __init__(
         assert self.num_heads % self.num_kv_heads == 0
         self.num_queries_per_kv = self.num_heads // self.num_kv_heads
 
-        self.prefill_usefusedsdpa = os.getenv('VLLM_PROMPT_USE_FUSEDSDPA',
-                                              '0').lower() in ['1', 'true']
-        if self.prefill_usefusedsdpa:
+        self.prefill_use_fusedsdpa = os.getenv('VLLM_PROMPT_USE_FUSEDSDPA',
+                                               '1').lower() in ['1', 'true'] \
+                                               and not is_fake_hpu()
+        if self.prefill_use_fusedsdpa:
             assert alibi_slopes is None, \
                 'Prefill with FusedSDPA not supported with alibi slopes!'
 
@@ -188,7 +190,7 @@ def forward(
             kv_shape = (batch_size, seq_len_kv, self.num_kv_heads,
                         self.head_size)
             if attn_metadata is None or attn_metadata.block_list is None:
-                if not self.prefill_usefusedsdpa:
+                if not self.prefill_use_fusedsdpa:
                     # TODO: move this outside of model
                     assert attn_metadata.attn_bias is not None, \
                             'attn_bias must be set before calling model.forward'