Skip to content

Commit

Permalink
Fix default value for FSDPA (#448)
Browse files Browse the repository at this point in the history
  • Loading branch information
madamczykhabana authored Oct 30, 2024
1 parent 2f7f963 commit 94858b5
Showing 1 changed file with 6 additions and 4 deletions.
10 changes: 6 additions & 4 deletions vllm/attention/backends/hpu_attn.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from vllm.attention.ops.hpu_paged_attn import (HPUPagedAttention,
HPUPagedAttentionMetadata)
from vllm.logger import init_logger
from vllm.utils import is_fake_hpu

logger = init_logger(__name__)

Expand Down Expand Up @@ -120,9 +121,10 @@ def __init__(
assert self.num_heads % self.num_kv_heads == 0
self.num_queries_per_kv = self.num_heads // self.num_kv_heads

self.prefill_usefusedsdpa = os.getenv('VLLM_PROMPT_USE_FUSEDSDPA',
'0').lower() in ['1', 'true']
if self.prefill_usefusedsdpa:
self.prefill_use_fusedsdpa = os.getenv('VLLM_PROMPT_USE_FUSEDSDPA',
'1').lower() in ['1', 'true'] \
and not is_fake_hpu()
if self.prefill_use_fusedsdpa:
assert alibi_slopes is None, \
'Prefill with FusedSDPA not supported with alibi slopes!'

Expand Down Expand Up @@ -188,7 +190,7 @@ def forward(
kv_shape = (batch_size, seq_len_kv, self.num_kv_heads,
self.head_size)
if attn_metadata is None or attn_metadata.block_list is None:
if not self.prefill_usefusedsdpa:
if not self.prefill_use_fusedsdpa:
# TODO: move this outside of model
assert attn_metadata.attn_bias is not None, \
'attn_bias must be set before calling model.forward'
Expand Down

0 comments on commit 94858b5

Please sign in to comment.