diff --git a/tests/models/test_llava.py b/tests/models/test_llava.py index 93634f245cee7..9d7da5f803ea4 100644 --- a/tests/models/test_llava.py +++ b/tests/models/test_llava.py @@ -179,3 +179,20 @@ def test_models(hf_runner, vllm_runner, image_assets, model, size_factors, num_logprobs=num_logprobs, tensor_parallel_size=1, ) + + +@pytest.mark.parametrize("model", models) +def test_context_length_too_short(vllm_runner, image_assets, model): + images = [asset.pil_image for asset in image_assets] + + with pytest.raises(ValueError, match="too long to fit into the model"): + vllm_model = vllm_runner( + model, + max_model_len=128, # LLaVA has a feature size of 576 + enforce_eager=True, + ) + + with vllm_model: + vllm_model.generate_greedy([HF_IMAGE_PROMPTS[0]], + max_tokens=1, + images=[images[0]]) diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py index 62954a505a6a7..98994c586625b 100644 --- a/vllm/engine/llm_engine.py +++ b/vllm/engine/llm_engine.py @@ -2054,7 +2054,22 @@ def is_embedding_model(self): def _validate_model_inputs(self, inputs: Union[LLMInputs, EncoderDecoderLLMInputs]): - prompt_key = "encoder_prompt_token_ids" \ - if self.is_encoder_decoder_model() else "prompt_token_ids" - if not inputs.get(prompt_key): + if self.is_encoder_decoder_model(): + prompt_ids = inputs.get("encoder_prompt_token_ids") + else: + prompt_ids = inputs.get("prompt_token_ids") + + if prompt_ids is None or len(prompt_ids) == 0: raise ValueError("Prompt cannot be empty") + + if self.model_config.multimodal_config is not None: + max_prompt_len = self.model_config.max_model_len + + if len(prompt_ids) > max_prompt_len: + raise ValueError( + f"The prompt (total length {len(prompt_ids)}) is too long " + f"to fit into the model (context length {max_prompt_len}). " + "Make sure that `max_model_len` is no smaller than the " + "number of text tokens plus multimodal tokens. For image " + "inputs, the number of image tokens depends on the number " + "of images, and possibly their aspect ratios as well.")