diff --git a/vllm/model_executor/models/aquila.py b/vllm/model_executor/models/aquila.py index 3fece0e2dd784..4dae9e46dad7d 100644 --- a/vllm/model_executor/models/aquila.py +++ b/vllm/model_executor/models/aquila.py @@ -25,7 +25,7 @@ The input of the model is flattened to a 1D tensor of tokens. The model uses InputMetadata to extract the original 2D shape of the input. """ -from typing import List, Optional, Tuple +from typing import Any, Dict, List, Optional, Tuple import torch from torch import nn @@ -110,6 +110,7 @@ def __init__( num_kv_heads: int, rope_theta: float = 10000, max_position_embeddings: int = 8192, + rope_scaling: Optional[Dict[str, Any]] = None, ): super().__init__() self.hidden_size = hidden_size @@ -148,6 +149,7 @@ def __init__( base=self.rope_theta, max_position=self.max_position_embeddings, num_kv_heads=self.num_kv_heads, + rope_scaling=rope_scaling, ) def forward( @@ -173,6 +175,7 @@ def __init__(self, config: AquilaConfig): super().__init__() self.hidden_size = config.hidden_size rope_theta = getattr(config, "rope_theta", 10000) + rope_scaling = getattr(config, "rope_scaling", None) max_position_embeddings = getattr(config, "max_position_embeddings", 8192) self.self_attn = AquilaAttention( @@ -181,6 +184,7 @@ def __init__(self, config: AquilaConfig): num_kv_heads=config.num_key_value_heads, rope_theta=rope_theta, max_position_embeddings=max_position_embeddings, + rope_scaling=rope_scaling, ) self.mlp = AquilaMLP( hidden_size=self.hidden_size,