From 305b79cdf31d2f5ccc80aba00e28534babf35584 Mon Sep 17 00:00:00 2001 From: Woosuk Kwon Date: Wed, 27 Mar 2024 04:39:55 +0000 Subject: [PATCH 1/2] [Misc] Minor fix in KVCache type --- docs/source/models/adding_model.rst | 4 ++-- vllm/model_executor/models/llava.py | 4 +--- vllm/worker/neuron_model_runner.py | 2 -- 3 files changed, 3 insertions(+), 7 deletions(-) diff --git a/docs/source/models/adding_model.rst b/docs/source/models/adding_model.rst index bf243a044769f..45ef0340aae25 100644 --- a/docs/source/models/adding_model.rst +++ b/docs/source/models/adding_model.rst @@ -56,8 +56,8 @@ Next, you need to rewrite the :code:`forward` methods of your model by following - return_dict: Optional[bool] = None, -) -> Union[Tuple, CausalLMOutputWithPast]: + positions: torch.Tensor, - + kv_caches: List[KVCache], - + input_metadata: InputMetadata, + + kv_caches: List[torch.Tensor], + + attn_metadata: AttentionMetadata, +) -> Optional[SamplerOutput]: 1. Update the code by considering that :code:`input_ids` and :code:`positions` are now flattened tensors. diff --git a/vllm/model_executor/models/llava.py b/vllm/model_executor/models/llava.py index c824efdf04684..94fa989e86912 100644 --- a/vllm/model_executor/models/llava.py +++ b/vllm/model_executor/models/llava.py @@ -19,8 +19,6 @@ hf_model_weights_iterator) from vllm.sequence import SamplerOutput -KVCache = Tuple[torch.Tensor, torch.Tensor] - _KEYS_TO_MODIFY_MAPPING = { "language_model.lm_head": "lm_head", "language_model.model": "language_model", @@ -102,7 +100,7 @@ def forward( self, input_ids: torch.Tensor, positions: torch.Tensor, - kv_caches: List[KVCache], + kv_caches: List[torch.Tensor], attn_metadata: AttentionMetadata, image_input: Optional[torch.Tensor] = None ) -> SamplerOutput: # noqa: E501 diff --git a/vllm/worker/neuron_model_runner.py b/vllm/worker/neuron_model_runner.py index ded22b9a3ac0f..fff721a80c204 100644 --- a/vllm/worker/neuron_model_runner.py +++ b/vllm/worker/neuron_model_runner.py @@ -14,8 +14,6 @@ logger = init_logger(__name__) -KVCache = Tuple[torch.Tensor, torch.Tensor] - class NeuronModelRunner: From bc543d3de6b30bfab51b4d5f5711dcc16b42f6bd Mon Sep 17 00:00:00 2001 From: Woosuk Kwon Date: Wed, 27 Mar 2024 04:49:47 +0000 Subject: [PATCH 2/2] yapf --- vllm/model_executor/models/llava.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/model_executor/models/llava.py b/vllm/model_executor/models/llava.py index 94fa989e86912..c2571d0893c8d 100644 --- a/vllm/model_executor/models/llava.py +++ b/vllm/model_executor/models/llava.py @@ -1,4 +1,4 @@ -from typing import List, Optional, Tuple +from typing import List, Optional import torch from torch import nn