[CI][SpecDecode] Fix spec decode tests, use flash attention backend f…

…or spec decode CI tests. (vllm-project#8975)
Isotr0py · Oct 1, 2024 · bce3244 · bce3244
1 parent 1425a1b
commit bce3244
Show file tree

Hide file tree

Showing 2 changed files with 4 additions and 3 deletions.
diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml
@@ -207,8 +207,6 @@ steps:
   - vllm/spec_decode
   - tests/spec_decode
   commands:
-    # See https://github.com/vllm-project/vllm/issues/5152
-    - export VLLM_ATTENTION_BACKEND=XFORMERS
     - pytest -v -s spec_decode/e2e/test_multistep_correctness.py
     - pytest -v -s spec_decode --ignore=spec_decode/e2e/test_multistep_correctness.py
 

diff --git a/tests/spec_decode/test_multi_step_worker.py b/tests/spec_decode/test_multi_step_worker.py
@@ -673,7 +673,10 @@ def test_use_draft_model_runner_advance_step():
     worker.model_runner._gpu_advance_step.side_effect = ValueError(
         exception_secret)
 
-    seq_group_metadata_list, _, _ = create_batch(batch_size, k)
+    seq_group_metadata_list, _, _ = create_batch(batch_size,
+                                                 k,
+                                                 block_size=block_size,
+                                                 num_gpu_blocks=num_gpu_blocks)
 
     # Fallback (should not call) when num_steps=1.
     execute_model_req = ExecuteModelRequest(