Disable CPU tests for danube3-4b

vllm-project · Jul 22, 2024 · 662c925 · 662c925
1 parent 526c675
commit 662c925
Show file tree

Hide file tree

Showing 3 changed files with 53 additions and 2 deletions.
diff --git a/.buildkite/run-cpu-test.sh b/.buildkite/run-cpu-test.sh
@@ -25,4 +25,4 @@ docker exec cpu-test-avx2 bash -c "python3 examples/offline_inference.py"
 docker exec cpu-test bash -c "cd tests;
   pip install pytest Pillow protobuf
   cd ../
-  pytest -v -s tests/models -m \"not vlm\" --ignore=tests/models/test_embedding.py --ignore=tests/models/test_registry.py --ignore=tests/models/test_jamba.py" # Mamba on CPU is not supported
+  pytest -v -s tests/models -m \"not vlm\" --ignore=tests/models/test_embedding.py --ignore=tests/models/test_registry.py --ignore=tests/models/test_jamba.py --ignore=tests/models/test_danube3_4b.py" # Mamba on CPU is not supported
diff --git a/tests/models/test_big_models.py b/tests/models/test_big_models.py
@@ -17,7 +17,6 @@
     "EleutherAI/gpt-j-6b",
     # "mosaicml/mpt-7b",  # Broken
     # "Qwen/Qwen1.5-0.5B"  # Broken,
-    "h2oai/h2o-danube3-4b-base",
 ]
 
 #TODO: remove this after CPU float16 support ready

diff --git a/tests/models/test_danube3_4b.py b/tests/models/test_danube3_4b.py
@@ -0,0 +1,52 @@
+"""Compare the outputs of HF and vLLM when using greedy sampling.
+
+This tests danube3 separately because its head size isn't supported on CPU yet.
+
+Run `pytest tests/models/test_danube3_4b.py`.
+"""
+import pytest
+
+from .utils import check_outputs_equal
+
+MODELS = ["h2oai/h2o-danube3-4b-base"]
+
+target_dtype = "bfloat16"
+
+
+@pytest.mark.parametrize("model", MODELS)
+@pytest.mark.parametrize("dtype", [target_dtype])
+@pytest.mark.parametrize("max_tokens", [32])
+def test_models(
+    hf_runner,
+    vllm_runner,
+    example_prompts,
+    model: str,
+    dtype: str,
+    max_tokens: int,
+) -> None:
+    with hf_runner(model, dtype=dtype) as hf_model:
+        hf_outputs = hf_model.generate_greedy(example_prompts, max_tokens)
+
+    with vllm_runner(model, dtype=dtype) as vllm_model:
+        vllm_outputs = vllm_model.generate_greedy(example_prompts, max_tokens)
+
+    check_outputs_equal(
+        outputs_0_lst=hf_outputs,
+        outputs_1_lst=vllm_outputs,
+        name_0="hf",
+        name_1="vllm",
+    )
+
+
+@pytest.mark.parametrize("model", MODELS)
+@pytest.mark.parametrize("dtype", [target_dtype])
+def test_model_print(
+    vllm_runner,
+    model: str,
+    dtype: str,
+) -> None:
+    with vllm_runner(model, dtype=dtype) as vllm_model:
+        # This test is for verifying whether the model's extra_repr
+        # can be printed correctly.
+        print(vllm_model.model.llm_engine.model_executor.driver_worker.
+              model_runner.model)