From 7d4ae0bbbaca34476f3ebb6b0c4e2b8a19367aec Mon Sep 17 00:00:00 2001
From: kevin <kevin@anyscale.com>
Date: Mon, 1 Jul 2024 23:17:15 +0000
Subject: [PATCH 01/20] p

Signed-off-by: kevin <kevin@anyscale.com>
---
 .buildkite/test-template.j2 | 173 ++++++++++++++++++++++++++++++++++++
 1 file changed, 173 insertions(+)
 create mode 100644 .buildkite/test-template.j2

diff --git a/.buildkite/test-template.j2 b/.buildkite/test-template.j2
new file mode 100644
index 0000000000000..df4c34b0d729a
--- /dev/null
+++ b/.buildkite/test-template.j2
@@ -0,0 +1,173 @@
+{% set docker_image = "public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT" %}
+{% set default_working_dir = "/vllm-workspace/tests" %}
+{% set hf_home = "/root/.cache/huggingface" %}
+
+steps:
+  - label: ":docker: build image"
+    key: image-build
+    agents:
+      queue: cpu_queue
+    commands:
+      - "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7"
+      - "docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --tag {{ docker_image }} --target test --progress plain ."
+      - "docker push {{ docker_image }}"
+    env:
+      DOCKER_BUILDKIT: "1"
+    retry:
+      automatic:
+        - exit_status: -1  # Agent was lost
+          limit: 5
+        - exit_status: -10  # Agent was lost
+          limit: 5
+  - wait
+
+  - label: "Neuron Test"
+    depends_on: ~
+    agents:
+      queue: neuron
+    command: bash .buildkite/run-neuron-test.sh
+    soft_fail: false
+
+  - label: "Intel CPU Test"
+    depends_on: ~
+    agents:
+      queue: intel-cpu
+    command: bash .buildkite/run-cpu-test.sh
+
+  - label: "Intel GPU Test"
+    depends_on: ~
+    agents:
+      queue: intel-gpu
+    command: bash .buildkite/run-xpu-test.sh
+
+  {% for step in steps %}
+  {% if $BUILDKITE_PIPELINE_SLUG == "ci-aws" or ($BUILDKITE_PIPELINE_SLUG == "fastcheck" and {{ step.fast_check or false }}) %}
+  {% if step.gpu != "a100" %}
+  - label: "{{ step.label }}"
+    agents:
+      {% if step.label == "Documentation Build" %}
+      queue: small_cpu_queue
+      {% elif step.no_gpu %}
+      queue: cpu_queue
+      {% elif step.num_gpus == 2 or step.num_gpus == 4 %}
+      queue: gpu_4_queue
+      {% else %}
+      queue: gpu_1_queue
+      {% endif %}
+    soft_fail: {{ step.soft_fail or false }}
+    {% if step.parallelism %}
+    parallelism: {{ step.parallelism }}
+    {% endif %}
+    retry:
+      automatic:
+        - exit_status: -1  # Agent was lost
+          limit: 5
+        - exit_status: -10  # Agent was lost
+          limit: 5
+    plugins:
+      - docker#v5.2.0:
+          image: {{ docker_image }}
+          always-pull: true
+          propagate-environment: true
+          {% if not step.no_gpu %}
+          gpus: all
+          {% endif %}
+          {% if step.label == "Benchmarks" %}
+          mount-buildkite-agent: true
+          {% endif %}
+          command: ["bash", "-c", "cd {{ (step.working_dir or default_working_dir) | safe  }} && {{ step.command  or (step.commands | join(' && ')) | safe }}"]
+          environment:
+            - VLLM_USAGE_SOURCE=ci-test
+            - HF_HOME={{ hf_home }}
+            - HF_TOKEN
+            {% if step.label == "Speculative decoding tests" %}
+            - VLLM_ATTENTION_BACKEND=XFORMERS
+            {% endif %}
+          volumes:
+            - /dev/shm:/dev/shm
+            - {{ hf_home }}:{{ hf_home }}
+  {% endif %}
+  {% endif %}
+  {% endfor %}
+
+  - block: "Run A100 tests"
+    depends_on: image-build
+
+  {% for step in steps %}
+  {% if step.gpu == "a100" %}
+  - label: "{{ step.label }}"
+    priority: 10000
+    agents:
+      queue: a100-queue
+    soft_fail: {{ step.soft_fail or false }}
+    {% if step.parallelism %}
+    parallelism: {{ step.parallelism }}
+    {% endif %}
+    retry:
+      automatic:
+        - exit_status: -1  # Agent was lost
+          limit: 5
+        - exit_status: -10  # Agent was lost
+          limit: 5
+    plugins:
+    - kubernetes:
+        podSpec:
+          priorityClassName: ci
+          containers:
+          - image: public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:f17f03744ebabed187634baec601ef35094ae14f
+            command: ["bash"]
+            args:
+            - '-c'
+            - "'cd {{ (step.working_dir or default_working_dir) | safe }} && {{ step.command or (step.commands | join(' && ')) | safe }}'"
+            resources:
+              limits:
+                nvidia.com/gpu: {{ step.num_gpus or 1 }}
+            volumeMounts:
+            - name: devshm
+              mountPath: /dev/shm
+            - name: hf-cache
+              mountPath: {{ hf_home }}
+            env:
+            - name: VLLM_USAGE_SOURCE
+              value: ci-test
+            - name: HF_HOME
+              value: {{ hf_home }}
+            - name: HF_TOKEN
+              valueFrom:
+                secretKeyRef:
+                  name: hf-token-secret
+                  key: token
+          nodeSelector:
+            nvidia.com/gpu.product: NVIDIA-A100-SXM4-80GB
+          volumes:
+          - name: devshm
+            emptyDir:
+              medium: Memory
+          - name: hf-cache
+            hostPath:
+              path: {{ hf_home }}
+              type: Directory
+  
+  {% if $BUILDKITE_PIPELINE_SLUG == "fastcheck" %}
+  - block: "Run AMD tests"
+    depends_on: ~
+  {% endif %}
+
+  - group: "AMD Tests"
+    {% if $BUILDKITE_PIPELINE_SLUG != "fastcheck" %}depends_on: ~{% endif %}
+    steps:
+    {% for step in steps %}
+    {% if step.mirror_hardwares and "amd" in step.mirror_hardwares %}
+      - label: "AMD: {{ step.label }}"
+        agents:
+          queue: amd
+        command: bash .buildkite/run-amd-test.sh "cd {{ (step.working_dir or default_working_dir) | safe  }} ; {{ step.command  or (step.commands | join(" && ")) | safe }}"
+        env:
+          DOCKER_BUILDKIT: "1"
+        priority: 100
+        soft_fail: true
+    {% endif %}
+    {% endfor %}
+
+  {% endif %}
+  {% endfor %}
\ No newline at end of file

From fcf34e6cc0b5d0850c906f1d428595c9751159be Mon Sep 17 00:00:00 2001
From: kevin <kevin@anyscale.com>
Date: Mon, 1 Jul 2024 23:18:14 +0000
Subject: [PATCH 02/20] p

Signed-off-by: kevin <kevin@anyscale.com>
---
 .buildkite/test-pipeline.yaml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml
index c51702886f394..2e0867b6fd482 100644
--- a/.buildkite/test-pipeline.yaml
+++ b/.buildkite/test-pipeline.yaml
@@ -8,6 +8,7 @@
 
 steps:
 - label: Regression Test
+  fastcheck: true
   mirror_hardwares: [amd]
   command: pytest -v -s test_regression.py
   working_dir: "/vllm-workspace/tests" # optional
@@ -15,6 +16,7 @@ steps:
 - label: AsyncEngine Test
   #mirror_hardwares: [amd]
   command: pytest -v -s async_engine
+  fastcheck: true
 
 - label: Basic Correctness Test
   mirror_hardwares: [amd]

From 0c6d7b3850f1c5721aea68f0fc2a05e7c4d31b05 Mon Sep 17 00:00:00 2001
From: kevin <kevin@anyscale.com>
Date: Mon, 1 Jul 2024 23:23:28 +0000
Subject: [PATCH 03/20] p

Signed-off-by: kevin <kevin@anyscale.com>
---
 .buildkite/test-template.j2 | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.buildkite/test-template.j2 b/.buildkite/test-template.j2
index df4c34b0d729a..c322d75db3411 100644
--- a/.buildkite/test-template.j2
+++ b/.buildkite/test-template.j2
@@ -41,7 +41,7 @@ steps:
     command: bash .buildkite/run-xpu-test.sh
 
   {% for step in steps %}
-  {% if $BUILDKITE_PIPELINE_SLUG == "ci-aws" or ($BUILDKITE_PIPELINE_SLUG == "fastcheck" and {{ step.fast_check or false }}) %}
+  {% if env.BUILDKITE_PIPELINE_SLUG == "ci-aws" or (env.BUILDKITE_PIPELINE_SLUG == "fastcheck" and step.fast_check|default(false)) %}
   {% if step.gpu != "a100" %}
   - label: "{{ step.label }}"
     agents:
@@ -148,13 +148,12 @@ steps:
               path: {{ hf_home }}
               type: Directory
   
-  {% if $BUILDKITE_PIPELINE_SLUG == "fastcheck" %}
+  {% if env.BUILDKITE_PIPELINE_SLUG == "fastcheck" %}
   - block: "Run AMD tests"
     depends_on: ~
   {% endif %}
 
   - group: "AMD Tests"
-    {% if $BUILDKITE_PIPELINE_SLUG != "fastcheck" %}depends_on: ~{% endif %}
     steps:
     {% for step in steps %}
     {% if step.mirror_hardwares and "amd" in step.mirror_hardwares %}
@@ -168,6 +167,7 @@ steps:
         soft_fail: true
     {% endif %}
     {% endfor %}
+    {% if env.BUILDKITE_PIPELINE_SLUG != "fastcheck" %}depends_on: ~{% endif %}
 
   {% endif %}
   {% endfor %}
\ No newline at end of file

From 3d57156a9251935aebe61404a7f87593c32eee06 Mon Sep 17 00:00:00 2001
From: kevin <kevin@anyscale.com>
Date: Mon, 1 Jul 2024 23:29:28 +0000
Subject: [PATCH 04/20] p

Signed-off-by: kevin <kevin@anyscale.com>
---
 .buildkite/test-template.j2 | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.buildkite/test-template.j2 b/.buildkite/test-template.j2
index c322d75db3411..091a4feb61901 100644
--- a/.buildkite/test-template.j2
+++ b/.buildkite/test-template.j2
@@ -41,7 +41,7 @@ steps:
     command: bash .buildkite/run-xpu-test.sh
 
   {% for step in steps %}
-  {% if env.BUILDKITE_PIPELINE_SLUG == "ci-aws" or (env.BUILDKITE_PIPELINE_SLUG == "fastcheck" and step.fast_check|default(false)) %}
+  {% if env["BUILDKITE_PIPELINE_SLUG"] == "ci-aws" or (env["BUILDKITE_PIPELINE_SLUG"] == "fastcheck" and step.fast_check|default(false)) %}
   {% if step.gpu != "a100" %}
   - label: "{{ step.label }}"
     agents:
@@ -148,7 +148,7 @@ steps:
               path: {{ hf_home }}
               type: Directory
   
-  {% if env.BUILDKITE_PIPELINE_SLUG == "fastcheck" %}
+  {% if env["BUILDKITE_PIPELINE_SLUG"] == "fastcheck" %}
   - block: "Run AMD tests"
     depends_on: ~
   {% endif %}
@@ -167,7 +167,7 @@ steps:
         soft_fail: true
     {% endif %}
     {% endfor %}
-    {% if env.BUILDKITE_PIPELINE_SLUG != "fastcheck" %}depends_on: ~{% endif %}
+    {% if env["BUILDKITE_PIPELINE_SLUG"] != "fastcheck" %}depends_on: ~{% endif %}
 
   {% endif %}
   {% endfor %}
\ No newline at end of file

From d6579e39b5303455d35263ee7df3aab313f9000d Mon Sep 17 00:00:00 2001
From: kevin <kevin@anyscale.com>
Date: Mon, 1 Jul 2024 23:55:55 +0000
Subject: [PATCH 05/20] p

Signed-off-by: kevin <kevin@anyscale.com>
---
 ...template.j2 => test-template-fastcheck.j2} | 50 +++++++++----------
 1 file changed, 25 insertions(+), 25 deletions(-)
 rename .buildkite/{test-template.j2 => test-template-fastcheck.j2} (94%)

diff --git a/.buildkite/test-template.j2 b/.buildkite/test-template-fastcheck.j2
similarity index 94%
rename from .buildkite/test-template.j2
rename to .buildkite/test-template-fastcheck.j2
index 091a4feb61901..c02efe675f6af 100644
--- a/.buildkite/test-template.j2
+++ b/.buildkite/test-template-fastcheck.j2
@@ -21,6 +21,25 @@ steps:
           limit: 5
   - wait
 
+  - block: "Run AMD tests"
+    depends_on: ~
+
+  - group: "AMD Tests"
+    depends_on: ~
+    steps:
+    {% for step in steps %}
+    {% if step.mirror_hardwares and "amd" in step.mirror_hardwares %}
+      - label: "AMD: {{ step.label }}"
+        agents:
+          queue: amd
+        command: bash .buildkite/run-amd-test.sh "cd {{ (step.working_dir or default_working_dir) | safe  }} ; {{ step.command  or (step.commands | join(" && ")) | safe }}"
+        env:
+          DOCKER_BUILDKIT: "1"
+        priority: 100
+        soft_fail: true
+    {% endif %}
+    {% endfor %}
+
   - label: "Neuron Test"
     depends_on: ~
     agents:
@@ -41,8 +60,12 @@ steps:
     command: bash .buildkite/run-xpu-test.sh
 
   {% for step in steps %}
-  {% if env["BUILDKITE_PIPELINE_SLUG"] == "ci-aws" or (env["BUILDKITE_PIPELINE_SLUG"] == "fastcheck" and step.fast_check|default(false)) %}
   {% if step.gpu != "a100" %}
+  {% if step.fast_check != "true" %}
+  - block: "Run {{ step.label }}"
+    depends_on: image_build
+  {% endif %}
+
   - label: "{{ step.label }}"
     agents:
       {% if step.label == "Documentation Build" %}
@@ -87,7 +110,6 @@ steps:
             - /dev/shm:/dev/shm
             - {{ hf_home }}:{{ hf_home }}
   {% endif %}
-  {% endif %}
   {% endfor %}
 
   - block: "Run A100 tests"
@@ -147,27 +169,5 @@ steps:
             hostPath:
               path: {{ hf_home }}
               type: Directory
-  
-  {% if env["BUILDKITE_PIPELINE_SLUG"] == "fastcheck" %}
-  - block: "Run AMD tests"
-    depends_on: ~
   {% endif %}
-
-  - group: "AMD Tests"
-    steps:
-    {% for step in steps %}
-    {% if step.mirror_hardwares and "amd" in step.mirror_hardwares %}
-      - label: "AMD: {{ step.label }}"
-        agents:
-          queue: amd
-        command: bash .buildkite/run-amd-test.sh "cd {{ (step.working_dir or default_working_dir) | safe  }} ; {{ step.command  or (step.commands | join(" && ")) | safe }}"
-        env:
-          DOCKER_BUILDKIT: "1"
-        priority: 100
-        soft_fail: true
-    {% endif %}
-    {% endfor %}
-    {% if env["BUILDKITE_PIPELINE_SLUG"] != "fastcheck" %}depends_on: ~{% endif %}
-
-  {% endif %}
-  {% endfor %}
\ No newline at end of file
+  {% endfor %}

From fd54c9832c9b8b3636c8e4e6f989eaa9d5672be6 Mon Sep 17 00:00:00 2001
From: kevin <kevin@anyscale.com>
Date: Mon, 1 Jul 2024 23:59:27 +0000
Subject: [PATCH 06/20] p

Signed-off-by: kevin <kevin@anyscale.com>
---
 .buildkite/test-pipeline.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml
index 2e0867b6fd482..a34cc0717e16d 100644
--- a/.buildkite/test-pipeline.yaml
+++ b/.buildkite/test-pipeline.yaml
@@ -8,7 +8,7 @@
 
 steps:
 - label: Regression Test
-  fastcheck: true
+  fast_check: true
   mirror_hardwares: [amd]
   command: pytest -v -s test_regression.py
   working_dir: "/vllm-workspace/tests" # optional
@@ -16,7 +16,7 @@ steps:
 - label: AsyncEngine Test
   #mirror_hardwares: [amd]
   command: pytest -v -s async_engine
-  fastcheck: true
+  fast_check: true
 
 - label: Basic Correctness Test
   mirror_hardwares: [amd]

From fcdf09cdbeb4e654e0ae79bc50012904c1063c33 Mon Sep 17 00:00:00 2001
From: kevin <kevin@anyscale.com>
Date: Tue, 2 Jul 2024 00:05:39 +0000
Subject: [PATCH 07/20] P

Signed-off-by: kevin <kevin@anyscale.com>
---
 .buildkite/test-template-fastcheck.j2 | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.buildkite/test-template-fastcheck.j2 b/.buildkite/test-template-fastcheck.j2
index c02efe675f6af..2862d518f449c 100644
--- a/.buildkite/test-template-fastcheck.j2
+++ b/.buildkite/test-template-fastcheck.j2
@@ -61,7 +61,7 @@ steps:
 
   {% for step in steps %}
   {% if step.gpu != "a100" %}
-  {% if step.fast_check != "true" %}
+  {% if step.fast_check != true %}
   - block: "Run {{ step.label }}"
     depends_on: image_build
   {% endif %}

From c8c9e2c2f98334d16628161d4b7e042385073570 Mon Sep 17 00:00:00 2001
From: kevin <kevin@anyscale.com>
Date: Tue, 2 Jul 2024 00:19:11 +0000
Subject: [PATCH 08/20] p

Signed-off-by: kevin <kevin@anyscale.com>
---
 .buildkite/test-pipeline.yaml | 113 ++++++++++++++--------------------
 1 file changed, 45 insertions(+), 68 deletions(-)

diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml
index a34cc0717e16d..5a42de8658e76 100644
--- a/.buildkite/test-pipeline.yaml
+++ b/.buildkite/test-pipeline.yaml
@@ -7,32 +7,51 @@
 
 
 steps:
-- label: Regression Test
+- label: E2E tests
   fast_check: true
-  mirror_hardwares: [amd]
-  command: pytest -v -s test_regression.py
-  working_dir: "/vllm-workspace/tests" # optional
-
-- label: AsyncEngine Test
-  #mirror_hardwares: [amd]
-  command: pytest -v -s async_engine
-  fast_check: true
-
-- label: Basic Correctness Test
-  mirror_hardwares: [amd]
   commands:
-  - VLLM_ATTENTION_BACKEND=XFORMERS pytest -v -s basic_correctness/test_basic_correctness.py
+  - pytest -v -s test_regression.py # Regression
+  - VLLM_ATTENTION_BACKEND=XFORMERS pytest -v -s basic_correctness/test_basic_correctness.py # Basic Correctness
   - VLLM_ATTENTION_BACKEND=FLASH_ATTN pytest -v -s basic_correctness/test_basic_correctness.py
   - VLLM_ATTENTION_BACKEND=XFORMERS pytest -v -s basic_correctness/test_chunked_prefill.py
   - VLLM_ATTENTION_BACKEND=FLASH_ATTN pytest -v -s basic_correctness/test_chunked_prefill.py
   - VLLM_TEST_ENABLE_ARTIFICIAL_PREEMPT=1 pytest -v -s basic_correctness/test_preemption.py
+  - pytest -v -s entrypoints/llm # Entrypoints
+  - pytest -v -s entrypoints/openai
+
+- label: Unit tests
+  fast_check: true
+  commands:
+  - pytest -v -s async_engine # Async Engine
+  - bash ../.buildkite/download-images.sh # Inputs 
+  - pytest -v -s test_inputs.py
+  - pytest -v -s multimodal
+  - pytest -v -s test_utils.py # Utils
+  - pytest -v -s worker # Worker
+
 
 - label: Core Test
   mirror_hardwares: [amd]
+  fast_check: true
   commands: 
   - pytest -v -s core
   - pytest -v -s distributed/test_parallel_state.py
 
+- label: Other small tests
+  fast_check: true
+  commands:
+  - apt-get install curl libsodium23 && pytest -v -s tensorizer_loader # Tensorizer
+  - pytest -v -s metrics # Metrics
+  - "pip install \
+      opentelemetry-sdk \
+      opentelemetry-api \
+      opentelemetry-exporter-otlp \
+      opentelemetry-semantic-conventions-ai" # Tracings
+  - pytest -v -s tracing
+  - cd /vllm-workspace/test_docs/docs
+  - pip install -r requirements-docs.txt
+  - SPHINXOPTS=\"-W\" make html
+
 - label: Distributed Comm Ops Test
   #mirror_hardwares: [amd]
   working_dir: "/vllm-workspace/tests"
@@ -41,6 +60,19 @@ steps:
   - pytest -v -s distributed/test_comm_ops.py
   - pytest -v -s distributed/test_shm_broadcast.py
 
+- label: Distributed Tests (4 GPUs)
+  #mirror_hardwares: [amd]
+  working_dir: "/vllm-workspace/tests"
+  fast_check: true
+  num_gpus: 4
+  commands:
+  - pytest -v -s distributed/test_pynccl.py
+  # We want to test that models which use 2 GPUs work with 4 GPUs, which is why we duplicate them here.
+  # See https://github.com/vllm-project/vllm/pull/5473#issuecomment-2166601837 for context.
+  - TEST_DIST_MODEL=facebook/opt-125m DISTRIBUTED_EXECUTOR_BACKEND=ray pytest -v -s distributed/test_basic_distributed_correctness.py
+  - TEST_DIST_MODEL=facebook/opt-125m DISTRIBUTED_EXECUTOR_BACKEND=mp pytest -v -s distributed/test_basic_distributed_correctness.py
+  - pytest -v -s spec_decode/e2e/test_integration_dist_tp4.py
+
 - label: Distributed Tests (2 GPUs)
   mirror_hardwares: [amd]
   working_dir: "/vllm-workspace/tests"
@@ -64,29 +96,10 @@ steps:
   - CUDA_VISIBLE_DEVICES=0,1 pytest -v -s test_sharded_state_loader.py
   - CUDA_VISIBLE_DEVICES=0,1 pytest -v -s distributed/test_utils.py
 
-- label: Distributed Tests (4 GPUs)
-  #mirror_hardwares: [amd]
-  working_dir: "/vllm-workspace/tests"
-  num_gpus: 4
-  commands:
-  - pytest -v -s distributed/test_pynccl.py
-  # We want to test that models which use 2 GPUs work with 4 GPUs, which is why we duplicate them here.
-  # See https://github.com/vllm-project/vllm/pull/5473#issuecomment-2166601837 for context.
-  - TEST_DIST_MODEL=facebook/opt-125m DISTRIBUTED_EXECUTOR_BACKEND=ray pytest -v -s distributed/test_basic_distributed_correctness.py
-  - TEST_DIST_MODEL=facebook/opt-125m DISTRIBUTED_EXECUTOR_BACKEND=mp pytest -v -s distributed/test_basic_distributed_correctness.py
-  - pytest -v -s spec_decode/e2e/test_integration_dist_tp4.py
-
 - label: Engine Test
   mirror_hardwares: [amd]
   command: pytest -v -s engine tokenization test_sequence.py test_config.py test_logger.py
 
-- label: Entrypoints Test
-  mirror_hardwares: [amd]
-
-  commands:
-  - pytest -v -s entrypoints/llm
-  - pytest -v -s entrypoints/openai
-
 - label: Examples Test
   working_dir: "/vllm-workspace/examples"
   mirror_hardwares: [amd]
@@ -100,13 +113,6 @@ steps:
     - python3 llava_example.py
     - python3 tensorize_vllm_model.py --model facebook/opt-125m serialize --serialized-directory /tmp/ --suffix v1 && python3 tensorize_vllm_model.py --model facebook/opt-125m deserialize --path-to-tensors /tmp/vllm/facebook/opt-125m/v1/model.tensors
 
-- label: Inputs Test
-  #mirror_hardwares: [amd]
-  commands:
-    - bash ../.buildkite/download-images.sh
-    - pytest -v -s test_inputs.py
-    - pytest -v -s multimodal
-
 - label: Kernels Test %N
   #mirror_hardwares: [amd]
   command: pytest -v -s kernels --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT
@@ -136,13 +142,6 @@ steps:
   mirror_hardwares: [amd]
   command: pytest -v -s test_logits_processor.py
 
-- label: Utils Test
-  command: pytest -v -s test_utils.py
-
-- label: Worker Test
-  mirror_hardwares: [amd]
-  command: pytest -v -s worker
-
 - label: Speculative decoding tests
   #mirror_hardwares: [amd]
   commands:
@@ -165,26 +164,11 @@ steps:
     - export VLLM_WORKER_MULTIPROC_METHOD=spawn
     - pytest -v -s -x lora/test_long_context.py
 
-- label: Tensorizer Test
-  #mirror_hardwares: [amd]
-  command: apt-get install curl libsodium23 && pytest -v -s tensorizer_loader
-
-- label: Metrics Test
-  mirror_hardwares: [amd]
-  command: pytest -v -s metrics
 
 - label: Quantization Test
   #mirror_hardwares: [amd]
   command: pytest -v -s quantization
 
-- label: Tracing Test
-  commands: 
-    - "pip install \
-        opentelemetry-sdk \
-        opentelemetry-api \
-        opentelemetry-exporter-otlp \
-        opentelemetry-semantic-conventions-ai"
-    - pytest -v -s tracing
 
 - label: Benchmarks
   working_dir: "/vllm-workspace/.buildkite"
@@ -200,13 +184,6 @@ steps:
   - export VLLM_WORKER_MULTIPROC_METHOD=spawn
   - bash ./run-tests.sh -c configs/models-small.txt -t 1
 
-- label: Documentation Build
-  working_dir: "/vllm-workspace/test_docs/docs"
-  no_gpu: True
-  commands:
-  - pip install -r requirements-docs.txt
-  - SPHINXOPTS=\"-W\" make html
-
 - label: Distributed Tests (A100)
   gpu: a100
   num_gpus: 4

From 0a9bafad36c1c46d62515e5abfe6acde9707bf5f Mon Sep 17 00:00:00 2001
From: kevin <kevin@anyscale.com>
Date: Tue, 2 Jul 2024 00:22:56 +0000
Subject: [PATCH 09/20] p

Signed-off-by: kevin <kevin@anyscale.com>
---
 .buildkite/test-template-fastcheck.j2 | 38 +++++++++++++--------------
 1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/.buildkite/test-template-fastcheck.j2 b/.buildkite/test-template-fastcheck.j2
index 2862d518f449c..a3e4cf6b00d4b 100644
--- a/.buildkite/test-template-fastcheck.j2
+++ b/.buildkite/test-template-fastcheck.j2
@@ -21,25 +21,6 @@ steps:
           limit: 5
   - wait
 
-  - block: "Run AMD tests"
-    depends_on: ~
-
-  - group: "AMD Tests"
-    depends_on: ~
-    steps:
-    {% for step in steps %}
-    {% if step.mirror_hardwares and "amd" in step.mirror_hardwares %}
-      - label: "AMD: {{ step.label }}"
-        agents:
-          queue: amd
-        command: bash .buildkite/run-amd-test.sh "cd {{ (step.working_dir or default_working_dir) | safe  }} ; {{ step.command  or (step.commands | join(" && ")) | safe }}"
-        env:
-          DOCKER_BUILDKIT: "1"
-        priority: 100
-        soft_fail: true
-    {% endif %}
-    {% endfor %}
-
   - label: "Neuron Test"
     depends_on: ~
     agents:
@@ -171,3 +152,22 @@ steps:
               type: Directory
   {% endif %}
   {% endfor %}
+
+  - block: "Run AMD tests"
+    depends_on: ~
+
+  - group: "AMD Tests"
+    depends_on: ~
+    steps:
+    {% for step in steps %}
+    {% if step.mirror_hardwares and "amd" in step.mirror_hardwares %}
+      - label: "AMD: {{ step.label }}"
+        agents:
+          queue: amd
+        command: bash .buildkite/run-amd-test.sh "cd {{ (step.working_dir or default_working_dir) | safe  }} ; {{ step.command  or (step.commands | join(" && ")) | safe }}"
+        env:
+          DOCKER_BUILDKIT: "1"
+        priority: 100
+        soft_fail: true
+    {% endif %}
+    {% endfor %}

From 7f38872652384aef13489f86950e66dae9e4b8aa Mon Sep 17 00:00:00 2001
From: kevin <kevin@anyscale.com>
Date: Tue, 2 Jul 2024 00:47:54 +0000
Subject: [PATCH 10/20] p

Signed-off-by: kevin <kevin@anyscale.com>
---
 .buildkite/test-template-fastcheck.j2 | 21 ++-------------------
 1 file changed, 2 insertions(+), 19 deletions(-)

diff --git a/.buildkite/test-template-fastcheck.j2 b/.buildkite/test-template-fastcheck.j2
index a3e4cf6b00d4b..fa25e3d4413ba 100644
--- a/.buildkite/test-template-fastcheck.j2
+++ b/.buildkite/test-template-fastcheck.j2
@@ -1,26 +1,8 @@
-{% set docker_image = "public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT" %}
+{% set docker_image = "public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:0a9bafad36c1c46d62515e5abfe6acde9707bf5f" %}
 {% set default_working_dir = "/vllm-workspace/tests" %}
 {% set hf_home = "/root/.cache/huggingface" %}
 
 steps:
-  - label: ":docker: build image"
-    key: image-build
-    agents:
-      queue: cpu_queue
-    commands:
-      - "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7"
-      - "docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --tag {{ docker_image }} --target test --progress plain ."
-      - "docker push {{ docker_image }}"
-    env:
-      DOCKER_BUILDKIT: "1"
-    retry:
-      automatic:
-        - exit_status: -1  # Agent was lost
-          limit: 5
-        - exit_status: -10  # Agent was lost
-          limit: 5
-  - wait
-
   - label: "Neuron Test"
     depends_on: ~
     agents:
@@ -48,6 +30,7 @@ steps:
   {% endif %}
 
   - label: "{{ step.label }}"
+    priority: 10000
     agents:
       {% if step.label == "Documentation Build" %}
       queue: small_cpu_queue

From cea26d54371dfc39bb9da62d96442540e5b983a4 Mon Sep 17 00:00:00 2001
From: kevin <kevin@anyscale.com>
Date: Thu, 11 Jul 2024 23:20:44 +0000
Subject: [PATCH 11/20] p

Signed-off-by: kevin <kevin@anyscale.com>
---
 .buildkite/test-pipeline.yaml         | 130 ++++++++++++++++++++++----
 .buildkite/test-template-fastcheck.j2 |   3 +-
 2 files changed, 113 insertions(+), 20 deletions(-)

diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml
index 5a42de8658e76..0f72748338735 100644
--- a/.buildkite/test-pipeline.yaml
+++ b/.buildkite/test-pipeline.yaml
@@ -7,7 +7,7 @@
 
 
 steps:
-- label: E2E tests
+- label: Regression, Basic Correctness, Entrypoints test
   fast_check: true
   commands:
   - pytest -v -s test_regression.py # Regression
@@ -19,7 +19,7 @@ steps:
   - pytest -v -s entrypoints/llm # Entrypoints
   - pytest -v -s entrypoints/openai
 
-- label: Unit tests
+- label: Async Engine, Inputs, Utils, Worker test
   fast_check: true
   commands:
   - pytest -v -s async_engine # Async Engine
@@ -37,7 +37,7 @@ steps:
   - pytest -v -s core
   - pytest -v -s distributed/test_parallel_state.py
 
-- label: Other small tests
+- label: Tensorizer, Metrics, Tracings test
   fast_check: true
   commands:
   - apt-get install curl libsodium23 && pytest -v -s tensorizer_loader # Tensorizer
@@ -52,6 +52,30 @@ steps:
   - pip install -r requirements-docs.txt
   - SPHINXOPTS=\"-W\" make html
 
+- label: Regression Test
+  mirror_hardwares: [amd]
+  command: pytest -v -s test_regression.py
+  working_dir: "/vllm-workspace/tests" # optional
+
+- label: AsyncEngine Test
+  #mirror_hardwares: [amd]
+  command: pytest -v -s async_engine
+
+- label: Basic Correctness Test
+  mirror_hardwares: [amd]
+  commands:
+  - VLLM_ATTENTION_BACKEND=XFORMERS pytest -v -s basic_correctness/test_basic_correctness.py
+  - VLLM_ATTENTION_BACKEND=FLASH_ATTN pytest -v -s basic_correctness/test_basic_correctness.py
+  - VLLM_ATTENTION_BACKEND=XFORMERS pytest -v -s basic_correctness/test_chunked_prefill.py
+  - VLLM_ATTENTION_BACKEND=FLASH_ATTN pytest -v -s basic_correctness/test_chunked_prefill.py
+  - VLLM_TEST_ENABLE_ARTIFICIAL_PREEMPT=1 pytest -v -s basic_correctness/test_preemption.py
+
+- label: Core Test
+  mirror_hardwares: [amd]
+  commands: 
+  - pytest -v -s core
+  - pytest -v -s distributed/test_parallel_state.py
+
 - label: Distributed Comm Ops Test
   #mirror_hardwares: [amd]
   working_dir: "/vllm-workspace/tests"
@@ -60,19 +84,6 @@ steps:
   - pytest -v -s distributed/test_comm_ops.py
   - pytest -v -s distributed/test_shm_broadcast.py
 
-- label: Distributed Tests (4 GPUs)
-  #mirror_hardwares: [amd]
-  working_dir: "/vllm-workspace/tests"
-  fast_check: true
-  num_gpus: 4
-  commands:
-  - pytest -v -s distributed/test_pynccl.py
-  # We want to test that models which use 2 GPUs work with 4 GPUs, which is why we duplicate them here.
-  # See https://github.com/vllm-project/vllm/pull/5473#issuecomment-2166601837 for context.
-  - TEST_DIST_MODEL=facebook/opt-125m DISTRIBUTED_EXECUTOR_BACKEND=ray pytest -v -s distributed/test_basic_distributed_correctness.py
-  - TEST_DIST_MODEL=facebook/opt-125m DISTRIBUTED_EXECUTOR_BACKEND=mp pytest -v -s distributed/test_basic_distributed_correctness.py
-  - pytest -v -s spec_decode/e2e/test_integration_dist_tp4.py
-
 - label: Distributed Tests (2 GPUs)
   mirror_hardwares: [amd]
   working_dir: "/vllm-workspace/tests"
@@ -96,9 +107,42 @@ steps:
   - CUDA_VISIBLE_DEVICES=0,1 pytest -v -s test_sharded_state_loader.py
   - CUDA_VISIBLE_DEVICES=0,1 pytest -v -s distributed/test_utils.py
 
+- label: Distributed Tests (4 GPUs)
+  #mirror_hardwares: [amd]
+  working_dir: "/vllm-workspace/tests"
+  num_gpus: 4
+  commands:
+  - pytest -v -s distributed/test_pynccl.py
+  # We want to test that models which use 2 GPUs work with 4 GPUs, which is why we duplicate them here.
+  # See https://github.com/vllm-project/vllm/pull/5473#issuecomment-2166601837 for context.
+  - TEST_DIST_MODEL=facebook/opt-125m DISTRIBUTED_EXECUTOR_BACKEND=ray pytest -v -s distributed/test_basic_distributed_correctness.py
+  - TEST_DIST_MODEL=facebook/opt-125m DISTRIBUTED_EXECUTOR_BACKEND=mp pytest -v -s distributed/test_basic_distributed_correctness.py
+  - pytest -v -s spec_decode/e2e/test_integration_dist_tp4.py
+
+- label: Pipeline Parallelism Test
+  working_dir: "/vllm-workspace/tests"
+  num_gpus: 4
+  commands:
+  - TP_SIZE=2 PP_SIZE=2 EAGER_MODE=1 CHUNKED_PREFILL=1 pytest -v -s distributed/test_pipeline_parallel.py
+  - TP_SIZE=2 PP_SIZE=2 EAGER_MODE=1 CHUNKED_PREFILL=0 pytest -v -s distributed/test_pipeline_parallel.py
+  - TP_SIZE=1 PP_SIZE=3 EAGER_MODE=1 CHUNKED_PREFILL=0 pytest -v -s distributed/test_pipeline_parallel.py
+  - PP_SIZE=4 EAGER_MODE=1 CHUNKED_PREFILL=1 pytest -v -s distributed/test_pipeline_parallel.py
+  - PP_SIZE=4 EAGER_MODE=1 CHUNKED_PREFILL=0 pytest -v -s distributed/test_pipeline_parallel.py
+
+
 - label: Engine Test
   mirror_hardwares: [amd]
-  command: pytest -v -s engine tokenization test_sequence.py test_config.py test_logger.py
+  commands: 
+  - pytest -v -s engine test_sequence.py test_config.py test_logger.py
+  # OOM in the CI unless we run this separately
+  - pytest -v -s tokenization
+
+- label: Entrypoints Test
+  mirror_hardwares: [amd]
+
+  commands:
+  - pytest -v -s entrypoints/llm
+  - pytest -v -s entrypoints/openai
 
 - label: Examples Test
   working_dir: "/vllm-workspace/examples"
@@ -113,14 +157,24 @@ steps:
     - python3 llava_example.py
     - python3 tensorize_vllm_model.py --model facebook/opt-125m serialize --serialized-directory /tmp/ --suffix v1 && python3 tensorize_vllm_model.py --model facebook/opt-125m deserialize --path-to-tensors /tmp/vllm/facebook/opt-125m/v1/model.tensors
 
+- label: Inputs Test
+  #mirror_hardwares: [amd]
+  commands:
+    - bash ../.buildkite/download-images.sh
+    - pytest -v -s test_inputs.py
+    - pytest -v -s multimodal
+
 - label: Kernels Test %N
   #mirror_hardwares: [amd]
-  command: pytest -v -s kernels --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT
+  commands:
+    - pip install https://github.com/flashinfer-ai/flashinfer/releases/download/v0.0.7/flashinfer-0.0.7+cu121torch2.3-cp310-cp310-linux_x86_64.whl
+    - pytest -v -s kernels --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT
   parallelism: 4
 
 - label: Models Test
   #mirror_hardwares: [amd]
   commands:
+    - pip install https://github.com/flashinfer-ai/flashinfer/releases/download/v0.0.7/flashinfer-0.0.7+cu121torch2.3-cp310-cp310-linux_x86_64.whl
     - pytest -v -s models -m \"not vlm\"
 
 - label: Vision Language Models Test
@@ -142,6 +196,13 @@ steps:
   mirror_hardwares: [amd]
   command: pytest -v -s test_logits_processor.py
 
+- label: Utils Test
+  command: pytest -v -s test_utils.py
+
+- label: Worker Test
+  mirror_hardwares: [amd]
+  command: pytest -v -s worker
+
 - label: Speculative decoding tests
   #mirror_hardwares: [amd]
   commands:
@@ -164,11 +225,26 @@ steps:
     - export VLLM_WORKER_MULTIPROC_METHOD=spawn
     - pytest -v -s -x lora/test_long_context.py
 
+- label: Tensorizer Test
+  #mirror_hardwares: [amd]
+  command: apt-get install curl libsodium23 && pytest -v -s tensorizer_loader
+
+- label: Metrics Test
+  mirror_hardwares: [amd]
+  command: pytest -v -s metrics
 
 - label: Quantization Test
   #mirror_hardwares: [amd]
   command: pytest -v -s quantization
 
+- label: Tracing Test
+  commands: 
+    - "pip install \
+        opentelemetry-sdk \
+        opentelemetry-api \
+        opentelemetry-exporter-otlp \
+        opentelemetry-semantic-conventions-ai"
+    - pytest -v -s tracing
 
 - label: Benchmarks
   working_dir: "/vllm-workspace/.buildkite"
@@ -184,6 +260,22 @@ steps:
   - export VLLM_WORKER_MULTIPROC_METHOD=spawn
   - bash ./run-tests.sh -c configs/models-small.txt -t 1
 
+- label: LM Eval Large Models
+  gpu: a100
+  num_gpus: 4
+  working_dir: "/vllm-workspace/.buildkite/lm-eval-harness"
+  commands:
+  - pip install lm-eval
+  - export VLLM_WORKER_MULTIPROC_METHOD=spawn
+  - bash ./run-tests.sh -c configs/models-large.txt -t 4
+
+- label: Documentation Build
+  working_dir: "/vllm-workspace/test_docs/docs"
+  no_gpu: True
+  commands:
+  - pip install -r requirements-docs.txt
+  - SPHINXOPTS=\"-W\" make html
+
 - label: Distributed Tests (A100)
   gpu: a100
   num_gpus: 4
@@ -193,7 +285,7 @@ steps:
   - pytest -v -s distributed/test_custom_all_reduce.py
   - TEST_DIST_MODEL=facebook/opt-125m DISTRIBUTED_EXECUTOR_BACKEND=ray pytest -v -s distributed/test_basic_distributed_correctness.py
   - TEST_DIST_MODEL=facebook/opt-125m DISTRIBUTED_EXECUTOR_BACKEND=mp pytest -v -s distributed/test_basic_distributed_correctness.py
-  - pip install https://github.com/flashinfer-ai/flashinfer/releases/download/v0.0.5/flashinfer-0.0.5+cu121torch2.3-cp310-cp310-linux_x86_64.whl
+  - pip install https://github.com/flashinfer-ai/flashinfer/releases/download/v0.0.7/flashinfer-0.0.7+cu121torch2.3-cp310-cp310-linux_x86_64.whl
   - VLLM_ATTENTION_BACKEND=FLASHINFER TEST_DIST_MODEL=facebook/opt-125m DISTRIBUTED_EXECUTOR_BACKEND=ray pytest -v -s distributed/test_basic_distributed_correctness.py
   - VLLM_ATTENTION_BACKEND=FLASHINFER TEST_DIST_MODEL=meta-llama/Meta-Llama-3-8B DISTRIBUTED_EXECUTOR_BACKEND=ray pytest -v -s distributed/test_basic_distributed_correctness.py
   - pytest -v -s -x lora/test_mixtral.py
diff --git a/.buildkite/test-template-fastcheck.j2 b/.buildkite/test-template-fastcheck.j2
index fa25e3d4413ba..4cc53d57c75c1 100644
--- a/.buildkite/test-template-fastcheck.j2
+++ b/.buildkite/test-template-fastcheck.j2
@@ -137,10 +137,11 @@ steps:
   {% endfor %}
 
   - block: "Run AMD tests"
+    key: block-amd-tests
     depends_on: ~
 
   - group: "AMD Tests"
-    depends_on: ~
+    depends_on: block-amd-tests
     steps:
     {% for step in steps %}
     {% if step.mirror_hardwares and "amd" in step.mirror_hardwares %}

From e79d31f464343bd7e2b2757f5507449e799bb7fc Mon Sep 17 00:00:00 2001
From: kevin <kevin@anyscale.com>
Date: Thu, 11 Jul 2024 23:45:49 +0000
Subject: [PATCH 12/20] p

Signed-off-by: kevin <kevin@anyscale.com>
---
 .buildkite/test-pipeline.yaml         | 15 ++----
 .buildkite/test-template-fastcheck.j2 | 72 +++++++++++++++++++++++++--
 2 files changed, 71 insertions(+), 16 deletions(-)

diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml
index 0f72748338735..99cfc1f5ce17f 100644
--- a/.buildkite/test-pipeline.yaml
+++ b/.buildkite/test-pipeline.yaml
@@ -7,18 +7,6 @@
 
 
 steps:
-- label: Regression, Basic Correctness, Entrypoints test
-  fast_check: true
-  commands:
-  - pytest -v -s test_regression.py # Regression
-  - VLLM_ATTENTION_BACKEND=XFORMERS pytest -v -s basic_correctness/test_basic_correctness.py # Basic Correctness
-  - VLLM_ATTENTION_BACKEND=FLASH_ATTN pytest -v -s basic_correctness/test_basic_correctness.py
-  - VLLM_ATTENTION_BACKEND=XFORMERS pytest -v -s basic_correctness/test_chunked_prefill.py
-  - VLLM_ATTENTION_BACKEND=FLASH_ATTN pytest -v -s basic_correctness/test_chunked_prefill.py
-  - VLLM_TEST_ENABLE_ARTIFICIAL_PREEMPT=1 pytest -v -s basic_correctness/test_preemption.py
-  - pytest -v -s entrypoints/llm # Entrypoints
-  - pytest -v -s entrypoints/openai
-
 - label: Async Engine, Inputs, Utils, Worker test
   fast_check: true
   commands:
@@ -54,6 +42,7 @@ steps:
 
 - label: Regression Test
   mirror_hardwares: [amd]
+  fast_check: true
   command: pytest -v -s test_regression.py
   working_dir: "/vllm-workspace/tests" # optional
 
@@ -63,6 +52,7 @@ steps:
 
 - label: Basic Correctness Test
   mirror_hardwares: [amd]
+  fast_check: true
   commands:
   - VLLM_ATTENTION_BACKEND=XFORMERS pytest -v -s basic_correctness/test_basic_correctness.py
   - VLLM_ATTENTION_BACKEND=FLASH_ATTN pytest -v -s basic_correctness/test_basic_correctness.py
@@ -138,6 +128,7 @@ steps:
   - pytest -v -s tokenization
 
 - label: Entrypoints Test
+  fast_check: true
   mirror_hardwares: [amd]
 
   commands:
diff --git a/.buildkite/test-template-fastcheck.j2 b/.buildkite/test-template-fastcheck.j2
index 4cc53d57c75c1..57d5b20def7ee 100644
--- a/.buildkite/test-template-fastcheck.j2
+++ b/.buildkite/test-template-fastcheck.j2
@@ -3,6 +3,23 @@
 {% set hf_home = "/root/.cache/huggingface" %}
 
 steps:
+  - label: ":docker: build image"
+    key: image-build
+    agents:
+      queue: cpu_queue
+    commands:
+      - "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7"
+      - "docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --tag {{ docker_image }} --target test --progress plain ."
+      - "docker push {{ docker_image }}"
+    env:
+      DOCKER_BUILDKIT: "1"
+    retry:
+      automatic:
+        - exit_status: -1  # Agent was lost
+          limit: 5
+        - exit_status: -10  # Agent was lost
+          limit: 5
+
   - label: "Neuron Test"
     depends_on: ~
     agents:
@@ -23,14 +40,61 @@ steps:
     command: bash .buildkite/run-xpu-test.sh
 
   {% for step in steps %}
-  {% if step.gpu != "a100" %}
-  {% if step.fast_check != true %}
+  {% if step.gpu != "a100" and step.fast_check == true %}
+  - label: "{{ step.label }}"
+    depends_on: image-build
+    priority: 10000
+    agents:
+      {% if step.label == "Documentation Build" %}
+      queue: small_cpu_queue
+      {% elif step.no_gpu %}
+      queue: cpu_queue
+      {% elif step.num_gpus == 2 or step.num_gpus == 4 %}
+      queue: gpu_4_queue
+      {% else %}
+      queue: gpu_1_queue
+      {% endif %}
+    soft_fail: {{ step.soft_fail or false }}
+    {% if step.parallelism %}
+    parallelism: {{ step.parallelism }}
+    {% endif %}
+    retry:
+      automatic:
+        - exit_status: -1  # Agent was lost
+          limit: 5
+        - exit_status: -10  # Agent was lost
+          limit: 5
+    plugins:
+      - docker#v5.2.0:
+          image: {{ docker_image }}
+          always-pull: true
+          propagate-environment: true
+          {% if not step.no_gpu %}
+          gpus: all
+          {% endif %}
+          {% if step.label == "Benchmarks" %}
+          mount-buildkite-agent: true
+          {% endif %}
+          command: ["bash", "-c", "cd {{ (step.working_dir or default_working_dir) | safe  }} && {{ step.command  or (step.commands | join(' && ')) | safe }}"]
+          environment:
+            - VLLM_USAGE_SOURCE=ci-test
+            - HF_HOME={{ hf_home }}
+            - HF_TOKEN
+            {% if step.label == "Speculative decoding tests" %}
+            - VLLM_ATTENTION_BACKEND=XFORMERS
+            {% endif %}
+          volumes:
+            - /dev/shm:/dev/shm
+            - {{ hf_home }}:{{ hf_home }}
+  {% endif %}
+  {% endfor %}
+
+  {% for step in steps %}
+  {% if step.gpu != "a100" and step.fast_check != true %}
   - block: "Run {{ step.label }}"
     depends_on: image_build
-  {% endif %}
 
   - label: "{{ step.label }}"
-    priority: 10000
     agents:
       {% if step.label == "Documentation Build" %}
       queue: small_cpu_queue

From 953ee4d0e3df49e5b344530286c233bc801d7edb Mon Sep 17 00:00:00 2001
From: kevin <kevin@anyscale.com>
Date: Fri, 12 Jul 2024 00:00:56 +0000
Subject: [PATCH 13/20] p

Signed-off-by: kevin <kevin@anyscale.com>
---
 .buildkite/test-pipeline.yaml         | 3 ++-
 .buildkite/test-template-fastcheck.j2 | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml
index 99cfc1f5ce17f..6eb6479e31734 100644
--- a/.buildkite/test-pipeline.yaml
+++ b/.buildkite/test-pipeline.yaml
@@ -9,6 +9,7 @@
 steps:
 - label: Async Engine, Inputs, Utils, Worker test
   fast_check: true
+  fast_check_only: true
   commands:
   - pytest -v -s async_engine # Async Engine
   - bash ../.buildkite/download-images.sh # Inputs 
@@ -17,7 +18,6 @@ steps:
   - pytest -v -s test_utils.py # Utils
   - pytest -v -s worker # Worker
 
-
 - label: Core Test
   mirror_hardwares: [amd]
   fast_check: true
@@ -27,6 +27,7 @@ steps:
 
 - label: Tensorizer, Metrics, Tracings test
   fast_check: true
+  fast_check_only: true
   commands:
   - apt-get install curl libsodium23 && pytest -v -s tensorizer_loader # Tensorizer
   - pytest -v -s metrics # Metrics
diff --git a/.buildkite/test-template-fastcheck.j2 b/.buildkite/test-template-fastcheck.j2
index 57d5b20def7ee..11f2ac594c77b 100644
--- a/.buildkite/test-template-fastcheck.j2
+++ b/.buildkite/test-template-fastcheck.j2
@@ -1,4 +1,4 @@
-{% set docker_image = "public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:0a9bafad36c1c46d62515e5abfe6acde9707bf5f" %}
+{% set docker_image = "public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT" %}
 {% set default_working_dir = "/vllm-workspace/tests" %}
 {% set hf_home = "/root/.cache/huggingface" %}
 

From ef39bef864a566cd4df019d4c8f71940f046ad6d Mon Sep 17 00:00:00 2001
From: kevin <kevin@anyscale.com>
Date: Fri, 12 Jul 2024 05:59:05 +0000
Subject: [PATCH 14/20] p

Signed-off-by: kevin <kevin@anyscale.com>
---
 .buildkite/test-pipeline.yaml | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml
index 8e4c2bc08fc6f..9fed7844ba5d3 100644
--- a/.buildkite/test-pipeline.yaml
+++ b/.buildkite/test-pipeline.yaml
@@ -18,13 +18,6 @@ steps:
   - pytest -v -s test_utils.py # Utils
   - pytest -v -s worker # Worker
 
-- label: Core Test
-  mirror_hardwares: [amd]
-  fast_check: true
-  commands: 
-  - pytest -v -s core
-  - pytest -v -s distributed/test_parallel_state.py
-
 - label: Tensorizer, Metrics, Tracings test
   fast_check: true
   fast_check_only: true
@@ -65,6 +58,7 @@ steps:
 
 - label: Core Test
   mirror_hardwares: [amd]
+  fast_check: true
   commands: 
   - pytest -v -s core
   - pytest -v -s distributed/test_parallel_state.py

From 553f5ee080c65a38f9ca599fa171da3aeaa9c258 Mon Sep 17 00:00:00 2001
From: kevin <kevin@anyscale.com>
Date: Fri, 12 Jul 2024 06:02:12 +0000
Subject: [PATCH 15/20] p

Signed-off-by: kevin <kevin@anyscale.com>
---
 .../{test-template-fastcheck.j2 => test-template-fastcheck2.j2}   | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename .buildkite/{test-template-fastcheck.j2 => test-template-fastcheck2.j2} (100%)

diff --git a/.buildkite/test-template-fastcheck.j2 b/.buildkite/test-template-fastcheck2.j2
similarity index 100%
rename from .buildkite/test-template-fastcheck.j2
rename to .buildkite/test-template-fastcheck2.j2

From 0526a92c5497130c73d55c9a46ecb74515b8c517 Mon Sep 17 00:00:00 2001
From: kevin <kevin@anyscale.com>
Date: Fri, 12 Jul 2024 06:08:37 +0000
Subject: [PATCH 16/20] p

Signed-off-by: kevin <kevin@anyscale.com>
---
 .buildkite/test-template-fastcheck2.j2 | 221 -------------------------
 1 file changed, 221 deletions(-)
 delete mode 100644 .buildkite/test-template-fastcheck2.j2

diff --git a/.buildkite/test-template-fastcheck2.j2 b/.buildkite/test-template-fastcheck2.j2
deleted file mode 100644
index 11f2ac594c77b..0000000000000
--- a/.buildkite/test-template-fastcheck2.j2
+++ /dev/null
@@ -1,221 +0,0 @@
-{% set docker_image = "public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT" %}
-{% set default_working_dir = "/vllm-workspace/tests" %}
-{% set hf_home = "/root/.cache/huggingface" %}
-
-steps:
-  - label: ":docker: build image"
-    key: image-build
-    agents:
-      queue: cpu_queue
-    commands:
-      - "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7"
-      - "docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --tag {{ docker_image }} --target test --progress plain ."
-      - "docker push {{ docker_image }}"
-    env:
-      DOCKER_BUILDKIT: "1"
-    retry:
-      automatic:
-        - exit_status: -1  # Agent was lost
-          limit: 5
-        - exit_status: -10  # Agent was lost
-          limit: 5
-
-  - label: "Neuron Test"
-    depends_on: ~
-    agents:
-      queue: neuron
-    command: bash .buildkite/run-neuron-test.sh
-    soft_fail: false
-
-  - label: "Intel CPU Test"
-    depends_on: ~
-    agents:
-      queue: intel-cpu
-    command: bash .buildkite/run-cpu-test.sh
-
-  - label: "Intel GPU Test"
-    depends_on: ~
-    agents:
-      queue: intel-gpu
-    command: bash .buildkite/run-xpu-test.sh
-
-  {% for step in steps %}
-  {% if step.gpu != "a100" and step.fast_check == true %}
-  - label: "{{ step.label }}"
-    depends_on: image-build
-    priority: 10000
-    agents:
-      {% if step.label == "Documentation Build" %}
-      queue: small_cpu_queue
-      {% elif step.no_gpu %}
-      queue: cpu_queue
-      {% elif step.num_gpus == 2 or step.num_gpus == 4 %}
-      queue: gpu_4_queue
-      {% else %}
-      queue: gpu_1_queue
-      {% endif %}
-    soft_fail: {{ step.soft_fail or false }}
-    {% if step.parallelism %}
-    parallelism: {{ step.parallelism }}
-    {% endif %}
-    retry:
-      automatic:
-        - exit_status: -1  # Agent was lost
-          limit: 5
-        - exit_status: -10  # Agent was lost
-          limit: 5
-    plugins:
-      - docker#v5.2.0:
-          image: {{ docker_image }}
-          always-pull: true
-          propagate-environment: true
-          {% if not step.no_gpu %}
-          gpus: all
-          {% endif %}
-          {% if step.label == "Benchmarks" %}
-          mount-buildkite-agent: true
-          {% endif %}
-          command: ["bash", "-c", "cd {{ (step.working_dir or default_working_dir) | safe  }} && {{ step.command  or (step.commands | join(' && ')) | safe }}"]
-          environment:
-            - VLLM_USAGE_SOURCE=ci-test
-            - HF_HOME={{ hf_home }}
-            - HF_TOKEN
-            {% if step.label == "Speculative decoding tests" %}
-            - VLLM_ATTENTION_BACKEND=XFORMERS
-            {% endif %}
-          volumes:
-            - /dev/shm:/dev/shm
-            - {{ hf_home }}:{{ hf_home }}
-  {% endif %}
-  {% endfor %}
-
-  {% for step in steps %}
-  {% if step.gpu != "a100" and step.fast_check != true %}
-  - block: "Run {{ step.label }}"
-    depends_on: image_build
-
-  - label: "{{ step.label }}"
-    agents:
-      {% if step.label == "Documentation Build" %}
-      queue: small_cpu_queue
-      {% elif step.no_gpu %}
-      queue: cpu_queue
-      {% elif step.num_gpus == 2 or step.num_gpus == 4 %}
-      queue: gpu_4_queue
-      {% else %}
-      queue: gpu_1_queue
-      {% endif %}
-    soft_fail: {{ step.soft_fail or false }}
-    {% if step.parallelism %}
-    parallelism: {{ step.parallelism }}
-    {% endif %}
-    retry:
-      automatic:
-        - exit_status: -1  # Agent was lost
-          limit: 5
-        - exit_status: -10  # Agent was lost
-          limit: 5
-    plugins:
-      - docker#v5.2.0:
-          image: {{ docker_image }}
-          always-pull: true
-          propagate-environment: true
-          {% if not step.no_gpu %}
-          gpus: all
-          {% endif %}
-          {% if step.label == "Benchmarks" %}
-          mount-buildkite-agent: true
-          {% endif %}
-          command: ["bash", "-c", "cd {{ (step.working_dir or default_working_dir) | safe  }} && {{ step.command  or (step.commands | join(' && ')) | safe }}"]
-          environment:
-            - VLLM_USAGE_SOURCE=ci-test
-            - HF_HOME={{ hf_home }}
-            - HF_TOKEN
-            {% if step.label == "Speculative decoding tests" %}
-            - VLLM_ATTENTION_BACKEND=XFORMERS
-            {% endif %}
-          volumes:
-            - /dev/shm:/dev/shm
-            - {{ hf_home }}:{{ hf_home }}
-  {% endif %}
-  {% endfor %}
-
-  - block: "Run A100 tests"
-    depends_on: image-build
-
-  {% for step in steps %}
-  {% if step.gpu == "a100" %}
-  - label: "{{ step.label }}"
-    priority: 10000
-    agents:
-      queue: a100-queue
-    soft_fail: {{ step.soft_fail or false }}
-    {% if step.parallelism %}
-    parallelism: {{ step.parallelism }}
-    {% endif %}
-    retry:
-      automatic:
-        - exit_status: -1  # Agent was lost
-          limit: 5
-        - exit_status: -10  # Agent was lost
-          limit: 5
-    plugins:
-    - kubernetes:
-        podSpec:
-          priorityClassName: ci
-          containers:
-          - image: public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:f17f03744ebabed187634baec601ef35094ae14f
-            command: ["bash"]
-            args:
-            - '-c'
-            - "'cd {{ (step.working_dir or default_working_dir) | safe }} && {{ step.command or (step.commands | join(' && ')) | safe }}'"
-            resources:
-              limits:
-                nvidia.com/gpu: {{ step.num_gpus or 1 }}
-            volumeMounts:
-            - name: devshm
-              mountPath: /dev/shm
-            - name: hf-cache
-              mountPath: {{ hf_home }}
-            env:
-            - name: VLLM_USAGE_SOURCE
-              value: ci-test
-            - name: HF_HOME
-              value: {{ hf_home }}
-            - name: HF_TOKEN
-              valueFrom:
-                secretKeyRef:
-                  name: hf-token-secret
-                  key: token
-          nodeSelector:
-            nvidia.com/gpu.product: NVIDIA-A100-SXM4-80GB
-          volumes:
-          - name: devshm
-            emptyDir:
-              medium: Memory
-          - name: hf-cache
-            hostPath:
-              path: {{ hf_home }}
-              type: Directory
-  {% endif %}
-  {% endfor %}
-
-  - block: "Run AMD tests"
-    key: block-amd-tests
-    depends_on: ~
-
-  - group: "AMD Tests"
-    depends_on: block-amd-tests
-    steps:
-    {% for step in steps %}
-    {% if step.mirror_hardwares and "amd" in step.mirror_hardwares %}
-      - label: "AMD: {{ step.label }}"
-        agents:
-          queue: amd
-        command: bash .buildkite/run-amd-test.sh "cd {{ (step.working_dir or default_working_dir) | safe  }} ; {{ step.command  or (step.commands | join(" && ")) | safe }}"
-        env:
-          DOCKER_BUILDKIT: "1"
-        priority: 100
-        soft_fail: true
-    {% endif %}
-    {% endfor %}

From dddc6b55695e1e435e9b96561e61e0f1f2be7d62 Mon Sep 17 00:00:00 2001
From: kevin <kevin@anyscale.com>
Date: Fri, 12 Jul 2024 06:53:15 +0000
Subject: [PATCH 17/20] p

Signed-off-by: kevin <kevin@anyscale.com>
---
 .buildkite/test-pipeline.yaml | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml
index 9fed7844ba5d3..4f5abee304b39 100644
--- a/.buildkite/test-pipeline.yaml
+++ b/.buildkite/test-pipeline.yaml
@@ -7,7 +7,7 @@
 
 
 steps:
-- label: Async Engine, Inputs, Utils, Worker test
+- label: Async Engine, Inputs, Utils, Worker Test
   fast_check: true
   fast_check_only: true
   commands:
@@ -18,7 +18,7 @@ steps:
   - pytest -v -s test_utils.py # Utils
   - pytest -v -s worker # Worker
 
-- label: Tensorizer, Metrics, Tracings test
+- label: Tensorizer, Metrics, Tracing Test
   fast_check: true
   fast_check_only: true
   commands:
@@ -30,9 +30,6 @@ steps:
       opentelemetry-exporter-otlp \
       opentelemetry-semantic-conventions-ai" # Tracings
   - pytest -v -s tracing
-  - cd /vllm-workspace/test_docs/docs
-  - pip install -r requirements-docs.txt
-  - SPHINXOPTS=\"-W\" make html
 
 - label: Regression Test
   mirror_hardwares: [amd]
@@ -260,6 +257,7 @@ steps:
 - label: Documentation Build
   working_dir: "/vllm-workspace/test_docs/docs"
   no_gpu: True
+  fast_check: true
   commands:
   - pip install -r requirements-docs.txt
   - SPHINXOPTS=\"-W\" make html

From c034e205c51bf71088c375acb3b1cb8fc0abe7b8 Mon Sep 17 00:00:00 2001
From: kevin <kevin@anyscale.com>
Date: Fri, 12 Jul 2024 06:56:05 +0000
Subject: [PATCH 18/20] p

Signed-off-by: kevin <kevin@anyscale.com>
---
 .buildkite/test-pipeline.yaml | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml
index 4f5abee304b39..7e65cdb7bbfc4 100644
--- a/.buildkite/test-pipeline.yaml
+++ b/.buildkite/test-pipeline.yaml
@@ -7,7 +7,7 @@
 
 
 steps:
-- label: Async Engine, Inputs, Utils, Worker Test
+- label: Async Engine, Inputs, Utils, Worker test
   fast_check: true
   fast_check_only: true
   commands:
@@ -18,7 +18,7 @@ steps:
   - pytest -v -s test_utils.py # Utils
   - pytest -v -s worker # Worker
 
-- label: Tensorizer, Metrics, Tracing Test
+- label: Tensorizer, Metrics, Tracing, Doc Build test
   fast_check: true
   fast_check_only: true
   commands:
@@ -30,6 +30,9 @@ steps:
       opentelemetry-exporter-otlp \
       opentelemetry-semantic-conventions-ai" # Tracings
   - pytest -v -s tracing
+  - cd /vllm-workspace/test_docs/docs # Doc build
+  - pip install -r requirements-docs.txt
+  - SPHINXOPTS=\"-W\" make html
 
 - label: Regression Test
   mirror_hardwares: [amd]
@@ -95,6 +98,7 @@ steps:
   #mirror_hardwares: [amd]
   working_dir: "/vllm-workspace/tests"
   num_gpus: 4
+  fast_check: true
   commands:
   - pytest -v -s distributed/test_pynccl.py
   # We want to test that models which use 2 GPUs work with 4 GPUs, which is why we duplicate them here.
@@ -257,7 +261,6 @@ steps:
 - label: Documentation Build
   working_dir: "/vllm-workspace/test_docs/docs"
   no_gpu: True
-  fast_check: true
   commands:
   - pip install -r requirements-docs.txt
   - SPHINXOPTS=\"-W\" make html

From cb6a198f14501e9b6ad104bd2e809d4f67343989 Mon Sep 17 00:00:00 2001
From: kevin <kevin@anyscale.com>
Date: Fri, 12 Jul 2024 06:58:50 +0000
Subject: [PATCH 19/20] p

Signed-off-by: kevin <kevin@anyscale.com>
---
 .buildkite/test-pipeline.yaml | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml
index 7e65cdb7bbfc4..9eb4035d871f2 100644
--- a/.buildkite/test-pipeline.yaml
+++ b/.buildkite/test-pipeline.yaml
@@ -18,7 +18,7 @@ steps:
   - pytest -v -s test_utils.py # Utils
   - pytest -v -s worker # Worker
 
-- label: Tensorizer, Metrics, Tracing, Doc Build test
+- label: Tensorizer, Metrics, Tracingtest
   fast_check: true
   fast_check_only: true
   commands:
@@ -30,9 +30,6 @@ steps:
       opentelemetry-exporter-otlp \
       opentelemetry-semantic-conventions-ai" # Tracings
   - pytest -v -s tracing
-  - cd /vllm-workspace/test_docs/docs # Doc build
-  - pip install -r requirements-docs.txt
-  - SPHINXOPTS=\"-W\" make html
 
 - label: Regression Test
   mirror_hardwares: [amd]
@@ -260,6 +257,7 @@ steps:
 
 - label: Documentation Build
   working_dir: "/vllm-workspace/test_docs/docs"
+  fast_check: true
   no_gpu: True
   commands:
   - pip install -r requirements-docs.txt

From 75801a7873ab16bc624844b07162d67e9dc3cd98 Mon Sep 17 00:00:00 2001
From: kevin <kevin@anyscale.com>
Date: Fri, 12 Jul 2024 07:34:10 +0000
Subject: [PATCH 20/20] p

Signed-off-by: kevin <kevin@anyscale.com>
---
 .buildkite/test-pipeline.yaml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml
index 9eb4035d871f2..9f388b6f8443d 100644
--- a/.buildkite/test-pipeline.yaml
+++ b/.buildkite/test-pipeline.yaml
@@ -7,7 +7,7 @@
 
 
 steps:
-- label: Async Engine, Inputs, Utils, Worker test
+- label: Async Engine, Inputs, Utils, Worker Test
   fast_check: true
   fast_check_only: true
   commands:
@@ -18,7 +18,7 @@ steps:
   - pytest -v -s test_utils.py # Utils
   - pytest -v -s worker # Worker
 
-- label: Tensorizer, Metrics, Tracingtest
+- label: Tensorizer, Metrics, Tracing Test
   fast_check: true
   fast_check_only: true
   commands:
@@ -28,7 +28,7 @@ steps:
       opentelemetry-sdk \
       opentelemetry-api \
       opentelemetry-exporter-otlp \
-      opentelemetry-semantic-conventions-ai" # Tracings
+      opentelemetry-semantic-conventions-ai" # Tracing
   - pytest -v -s tracing
 
 - label: Regression Test