Skip to content

Commit

Permalink
fix: incorrect bigcode attention heads num (#676)
Browse files Browse the repository at this point in the history
  • Loading branch information
HermitSun authored Aug 4, 2023
1 parent aa84c92 commit 621980b
Showing 1 changed file with 5 additions and 3 deletions.
8 changes: 5 additions & 3 deletions vllm/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,9 +98,11 @@ def get_num_heads(self, parallel_config: "ParallelConfig") -> int:
# Note: for falcon, when new_decoder_architecture is True, the
# multi_query flag is ignored and we use n_head_kv for the number of
# KV heads.
if (getattr(self.hf_config, "multi_query", False) and
(self.hf_config.model_type == "falcon" and
not getattr(self.hf_config, "new_decoder_architecture", False))):
new_decoder_arch_falcon = (
self.hf_config.model_type == "falcon"
and getattr(self.hf_config, "new_decoder_architecture", False))
if not new_decoder_arch_falcon and getattr(self.hf_config,
"multi_query", False):
# Multi-query attention, only one KV head.
return 1
# For Falcon:
Expand Down

0 comments on commit 621980b

Please sign in to comment.