Add task to vlm test info for phi3v

vllm-project · Oct 19, 2024 · 7b4c97f · 7b4c97f
1 parent 6ab62f1
commit 7b4c97f
Show file tree

Hide file tree

Showing 4 changed files with 10 additions and 5 deletions.
diff --git a/tests/models/decoder_only/vision_language/test_models.py b/tests/models/decoder_only/vision_language/test_models.py
@@ -234,6 +234,7 @@
         img_idx_to_prompt=lambda idx: f"<|image_{idx}|>\n",
         max_model_len=4096,
         max_num_seqs=2,
+        task="generate",
         # use eager mode for hf runner, since phi3v didn't work with flash_attn
         model_kwargs={"_attn_implementation": "eager"},
         use_tokenizer_eos=True,

diff --git a/tests/models/decoder_only/vision_language/vlm_utils/case_filtering.py b/tests/models/decoder_only/vision_language/vlm_utils/case_filtering.py
@@ -3,17 +3,17 @@
 handling multimodal placeholder substitution, and so on.
 """
 import itertools
-from typing import Dict, Iterable, Tuple
+from typing import Dict, Iterable, List, Tuple
 
 import pytest
 
 from .types import (EMBEDDING_SIZE_FACTORS, ImageSizeWrapper, SizeType,
                     VLMTestInfo, VLMTestType)
 
 
-def get_filtered_test_settings(test_settings: Dict[str, VLMTestInfo],
-                               test_type: VLMTestType,
-                               fork_per_test: bool) -> Dict[str, VLMTestInfo]:
+def get_filtered_test_settings(
+        test_settings: Dict[str, VLMTestInfo], test_type: VLMTestType,
+        fork_per_test: bool) -> Tuple[Dict[str, VLMTestInfo], List[str]]:
     """Given the dict of potential test settings to run, return a subdict
     of tests who have the current test type enabled, with the matching val for
     fork_per_test, as well as a list of the all tests that were enabled, but

diff --git a/tests/models/decoder_only/vision_language/vlm_utils/core.py b/tests/models/decoder_only/vision_language/vlm_utils/core.py
@@ -32,6 +32,7 @@ def run_test(
     limit_mm_per_prompt: Dict[str, int],
     model_kwargs: Optional[Dict[str, Any]],
     patch_hf_runner: Optional[Callable[[HfRunner], HfRunner]],
+    task: str = "auto",
     runner_mm_key: str = "images",
     distributed_executor_backend: Optional[str] = None,
     tensor_parallel_size: int = 1,
@@ -60,7 +61,8 @@ def run_test(
                      limit_mm_per_prompt=limit_mm_per_prompt,
                      tensor_parallel_size=tensor_parallel_size,
                      distributed_executor_backend=distributed_executor_backend,
-                     enforce_eager=enforce_eager) as vllm_model:
+                     enforce_eager=enforce_eager,
+                     task=task) as vllm_model:
         for prompts, media in vllm_inputs:
             vllm_kwargs[runner_mm_key] = media
             vllm_output = vllm_model.generate_greedy_logprobs(

diff --git a/tests/models/decoder_only/vision_language/vlm_utils/types.py b/tests/models/decoder_only/vision_language/vlm_utils/types.py
@@ -87,6 +87,7 @@ class VLMTestInfo(NamedTuple):
     enforce_eager: bool = True
     max_model_len: int = 1024
     max_num_seqs: int = 256
+    task: str = "auto"
     tensor_parallel_size: int = 1
 
     # Optional callable which gets a list of token IDs from the model tokenizer
@@ -152,6 +153,7 @@ def get_non_parametrized_runner_kwargs(self):
             "enforce_eager": self.enforce_eager,
             "max_model_len": self.max_model_len,
             "max_num_seqs": self.max_num_seqs,
+            "task": self.task,
             "hf_output_post_proc": self.hf_output_post_proc,
             "vllm_output_post_proc": self.vllm_output_post_proc,
             "auto_cls": self.auto_cls,