From 7d4ae0bbbaca34476f3ebb6b0c4e2b8a19367aec Mon Sep 17 00:00:00 2001 From: kevin Date: Mon, 1 Jul 2024 23:17:15 +0000 Subject: [PATCH 01/20] p Signed-off-by: kevin --- .buildkite/test-template.j2 | 173 ++++++++++++++++++++++++++++++++++++ 1 file changed, 173 insertions(+) create mode 100644 .buildkite/test-template.j2 diff --git a/.buildkite/test-template.j2 b/.buildkite/test-template.j2 new file mode 100644 index 0000000000000..df4c34b0d729a --- /dev/null +++ b/.buildkite/test-template.j2 @@ -0,0 +1,173 @@ +{% set docker_image = "public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT" %} +{% set default_working_dir = "/vllm-workspace/tests" %} +{% set hf_home = "/root/.cache/huggingface" %} + +steps: + - label: ":docker: build image" + key: image-build + agents: + queue: cpu_queue + commands: + - "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7" + - "docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --tag {{ docker_image }} --target test --progress plain ." + - "docker push {{ docker_image }}" + env: + DOCKER_BUILDKIT: "1" + retry: + automatic: + - exit_status: -1 # Agent was lost + limit: 5 + - exit_status: -10 # Agent was lost + limit: 5 + - wait + + - label: "Neuron Test" + depends_on: ~ + agents: + queue: neuron + command: bash .buildkite/run-neuron-test.sh + soft_fail: false + + - label: "Intel CPU Test" + depends_on: ~ + agents: + queue: intel-cpu + command: bash .buildkite/run-cpu-test.sh + + - label: "Intel GPU Test" + depends_on: ~ + agents: + queue: intel-gpu + command: bash .buildkite/run-xpu-test.sh + + {% for step in steps %} + {% if $BUILDKITE_PIPELINE_SLUG == "ci-aws" or ($BUILDKITE_PIPELINE_SLUG == "fastcheck" and {{ step.fast_check or false }}) %} + {% if step.gpu != "a100" %} + - label: "{{ step.label }}" + agents: + {% if step.label == "Documentation Build" %} + queue: small_cpu_queue + {% elif step.no_gpu %} + queue: cpu_queue + {% elif step.num_gpus == 2 or step.num_gpus == 4 %} + queue: gpu_4_queue + {% else %} + queue: gpu_1_queue + {% endif %} + soft_fail: {{ step.soft_fail or false }} + {% if step.parallelism %} + parallelism: {{ step.parallelism }} + {% endif %} + retry: + automatic: + - exit_status: -1 # Agent was lost + limit: 5 + - exit_status: -10 # Agent was lost + limit: 5 + plugins: + - docker#v5.2.0: + image: {{ docker_image }} + always-pull: true + propagate-environment: true + {% if not step.no_gpu %} + gpus: all + {% endif %} + {% if step.label == "Benchmarks" %} + mount-buildkite-agent: true + {% endif %} + command: ["bash", "-c", "cd {{ (step.working_dir or default_working_dir) | safe }} && {{ step.command or (step.commands | join(' && ')) | safe }}"] + environment: + - VLLM_USAGE_SOURCE=ci-test + - HF_HOME={{ hf_home }} + - HF_TOKEN + {% if step.label == "Speculative decoding tests" %} + - VLLM_ATTENTION_BACKEND=XFORMERS + {% endif %} + volumes: + - /dev/shm:/dev/shm + - {{ hf_home }}:{{ hf_home }} + {% endif %} + {% endif %} + {% endfor %} + + - block: "Run A100 tests" + depends_on: image-build + + {% for step in steps %} + {% if step.gpu == "a100" %} + - label: "{{ step.label }}" + priority: 10000 + agents: + queue: a100-queue + soft_fail: {{ step.soft_fail or false }} + {% if step.parallelism %} + parallelism: {{ step.parallelism }} + {% endif %} + retry: + automatic: + - exit_status: -1 # Agent was lost + limit: 5 + - exit_status: -10 # Agent was lost + limit: 5 + plugins: + - kubernetes: + podSpec: + priorityClassName: ci + containers: + - image: public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:f17f03744ebabed187634baec601ef35094ae14f + command: ["bash"] + args: + - '-c' + - "'cd {{ (step.working_dir or default_working_dir) | safe }} && {{ step.command or (step.commands | join(' && ')) | safe }}'" + resources: + limits: + nvidia.com/gpu: {{ step.num_gpus or 1 }} + volumeMounts: + - name: devshm + mountPath: /dev/shm + - name: hf-cache + mountPath: {{ hf_home }} + env: + - name: VLLM_USAGE_SOURCE + value: ci-test + - name: HF_HOME + value: {{ hf_home }} + - name: HF_TOKEN + valueFrom: + secretKeyRef: + name: hf-token-secret + key: token + nodeSelector: + nvidia.com/gpu.product: NVIDIA-A100-SXM4-80GB + volumes: + - name: devshm + emptyDir: + medium: Memory + - name: hf-cache + hostPath: + path: {{ hf_home }} + type: Directory + + {% if $BUILDKITE_PIPELINE_SLUG == "fastcheck" %} + - block: "Run AMD tests" + depends_on: ~ + {% endif %} + + - group: "AMD Tests" + {% if $BUILDKITE_PIPELINE_SLUG != "fastcheck" %}depends_on: ~{% endif %} + steps: + {% for step in steps %} + {% if step.mirror_hardwares and "amd" in step.mirror_hardwares %} + - label: "AMD: {{ step.label }}" + agents: + queue: amd + command: bash .buildkite/run-amd-test.sh "cd {{ (step.working_dir or default_working_dir) | safe }} ; {{ step.command or (step.commands | join(" && ")) | safe }}" + env: + DOCKER_BUILDKIT: "1" + priority: 100 + soft_fail: true + {% endif %} + {% endfor %} + + {% endif %} + {% endfor %} \ No newline at end of file From fcf34e6cc0b5d0850c906f1d428595c9751159be Mon Sep 17 00:00:00 2001 From: kevin Date: Mon, 1 Jul 2024 23:18:14 +0000 Subject: [PATCH 02/20] p Signed-off-by: kevin --- .buildkite/test-pipeline.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index c51702886f394..2e0867b6fd482 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -8,6 +8,7 @@ steps: - label: Regression Test + fastcheck: true mirror_hardwares: [amd] command: pytest -v -s test_regression.py working_dir: "/vllm-workspace/tests" # optional @@ -15,6 +16,7 @@ steps: - label: AsyncEngine Test #mirror_hardwares: [amd] command: pytest -v -s async_engine + fastcheck: true - label: Basic Correctness Test mirror_hardwares: [amd] From 0c6d7b3850f1c5721aea68f0fc2a05e7c4d31b05 Mon Sep 17 00:00:00 2001 From: kevin Date: Mon, 1 Jul 2024 23:23:28 +0000 Subject: [PATCH 03/20] p Signed-off-by: kevin --- .buildkite/test-template.j2 | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.buildkite/test-template.j2 b/.buildkite/test-template.j2 index df4c34b0d729a..c322d75db3411 100644 --- a/.buildkite/test-template.j2 +++ b/.buildkite/test-template.j2 @@ -41,7 +41,7 @@ steps: command: bash .buildkite/run-xpu-test.sh {% for step in steps %} - {% if $BUILDKITE_PIPELINE_SLUG == "ci-aws" or ($BUILDKITE_PIPELINE_SLUG == "fastcheck" and {{ step.fast_check or false }}) %} + {% if env.BUILDKITE_PIPELINE_SLUG == "ci-aws" or (env.BUILDKITE_PIPELINE_SLUG == "fastcheck" and step.fast_check|default(false)) %} {% if step.gpu != "a100" %} - label: "{{ step.label }}" agents: @@ -148,13 +148,12 @@ steps: path: {{ hf_home }} type: Directory - {% if $BUILDKITE_PIPELINE_SLUG == "fastcheck" %} + {% if env.BUILDKITE_PIPELINE_SLUG == "fastcheck" %} - block: "Run AMD tests" depends_on: ~ {% endif %} - group: "AMD Tests" - {% if $BUILDKITE_PIPELINE_SLUG != "fastcheck" %}depends_on: ~{% endif %} steps: {% for step in steps %} {% if step.mirror_hardwares and "amd" in step.mirror_hardwares %} @@ -168,6 +167,7 @@ steps: soft_fail: true {% endif %} {% endfor %} + {% if env.BUILDKITE_PIPELINE_SLUG != "fastcheck" %}depends_on: ~{% endif %} {% endif %} {% endfor %} \ No newline at end of file From 3d57156a9251935aebe61404a7f87593c32eee06 Mon Sep 17 00:00:00 2001 From: kevin Date: Mon, 1 Jul 2024 23:29:28 +0000 Subject: [PATCH 04/20] p Signed-off-by: kevin --- .buildkite/test-template.j2 | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.buildkite/test-template.j2 b/.buildkite/test-template.j2 index c322d75db3411..091a4feb61901 100644 --- a/.buildkite/test-template.j2 +++ b/.buildkite/test-template.j2 @@ -41,7 +41,7 @@ steps: command: bash .buildkite/run-xpu-test.sh {% for step in steps %} - {% if env.BUILDKITE_PIPELINE_SLUG == "ci-aws" or (env.BUILDKITE_PIPELINE_SLUG == "fastcheck" and step.fast_check|default(false)) %} + {% if env["BUILDKITE_PIPELINE_SLUG"] == "ci-aws" or (env["BUILDKITE_PIPELINE_SLUG"] == "fastcheck" and step.fast_check|default(false)) %} {% if step.gpu != "a100" %} - label: "{{ step.label }}" agents: @@ -148,7 +148,7 @@ steps: path: {{ hf_home }} type: Directory - {% if env.BUILDKITE_PIPELINE_SLUG == "fastcheck" %} + {% if env["BUILDKITE_PIPELINE_SLUG"] == "fastcheck" %} - block: "Run AMD tests" depends_on: ~ {% endif %} @@ -167,7 +167,7 @@ steps: soft_fail: true {% endif %} {% endfor %} - {% if env.BUILDKITE_PIPELINE_SLUG != "fastcheck" %}depends_on: ~{% endif %} + {% if env["BUILDKITE_PIPELINE_SLUG"] != "fastcheck" %}depends_on: ~{% endif %} {% endif %} {% endfor %} \ No newline at end of file From d6579e39b5303455d35263ee7df3aab313f9000d Mon Sep 17 00:00:00 2001 From: kevin Date: Mon, 1 Jul 2024 23:55:55 +0000 Subject: [PATCH 05/20] p Signed-off-by: kevin --- ...template.j2 => test-template-fastcheck.j2} | 50 +++++++++---------- 1 file changed, 25 insertions(+), 25 deletions(-) rename .buildkite/{test-template.j2 => test-template-fastcheck.j2} (94%) diff --git a/.buildkite/test-template.j2 b/.buildkite/test-template-fastcheck.j2 similarity index 94% rename from .buildkite/test-template.j2 rename to .buildkite/test-template-fastcheck.j2 index 091a4feb61901..c02efe675f6af 100644 --- a/.buildkite/test-template.j2 +++ b/.buildkite/test-template-fastcheck.j2 @@ -21,6 +21,25 @@ steps: limit: 5 - wait + - block: "Run AMD tests" + depends_on: ~ + + - group: "AMD Tests" + depends_on: ~ + steps: + {% for step in steps %} + {% if step.mirror_hardwares and "amd" in step.mirror_hardwares %} + - label: "AMD: {{ step.label }}" + agents: + queue: amd + command: bash .buildkite/run-amd-test.sh "cd {{ (step.working_dir or default_working_dir) | safe }} ; {{ step.command or (step.commands | join(" && ")) | safe }}" + env: + DOCKER_BUILDKIT: "1" + priority: 100 + soft_fail: true + {% endif %} + {% endfor %} + - label: "Neuron Test" depends_on: ~ agents: @@ -41,8 +60,12 @@ steps: command: bash .buildkite/run-xpu-test.sh {% for step in steps %} - {% if env["BUILDKITE_PIPELINE_SLUG"] == "ci-aws" or (env["BUILDKITE_PIPELINE_SLUG"] == "fastcheck" and step.fast_check|default(false)) %} {% if step.gpu != "a100" %} + {% if step.fast_check != "true" %} + - block: "Run {{ step.label }}" + depends_on: image_build + {% endif %} + - label: "{{ step.label }}" agents: {% if step.label == "Documentation Build" %} @@ -87,7 +110,6 @@ steps: - /dev/shm:/dev/shm - {{ hf_home }}:{{ hf_home }} {% endif %} - {% endif %} {% endfor %} - block: "Run A100 tests" @@ -147,27 +169,5 @@ steps: hostPath: path: {{ hf_home }} type: Directory - - {% if env["BUILDKITE_PIPELINE_SLUG"] == "fastcheck" %} - - block: "Run AMD tests" - depends_on: ~ {% endif %} - - - group: "AMD Tests" - steps: - {% for step in steps %} - {% if step.mirror_hardwares and "amd" in step.mirror_hardwares %} - - label: "AMD: {{ step.label }}" - agents: - queue: amd - command: bash .buildkite/run-amd-test.sh "cd {{ (step.working_dir or default_working_dir) | safe }} ; {{ step.command or (step.commands | join(" && ")) | safe }}" - env: - DOCKER_BUILDKIT: "1" - priority: 100 - soft_fail: true - {% endif %} - {% endfor %} - {% if env["BUILDKITE_PIPELINE_SLUG"] != "fastcheck" %}depends_on: ~{% endif %} - - {% endif %} - {% endfor %} \ No newline at end of file + {% endfor %} From fd54c9832c9b8b3636c8e4e6f989eaa9d5672be6 Mon Sep 17 00:00:00 2001 From: kevin Date: Mon, 1 Jul 2024 23:59:27 +0000 Subject: [PATCH 06/20] p Signed-off-by: kevin --- .buildkite/test-pipeline.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index 2e0867b6fd482..a34cc0717e16d 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -8,7 +8,7 @@ steps: - label: Regression Test - fastcheck: true + fast_check: true mirror_hardwares: [amd] command: pytest -v -s test_regression.py working_dir: "/vllm-workspace/tests" # optional @@ -16,7 +16,7 @@ steps: - label: AsyncEngine Test #mirror_hardwares: [amd] command: pytest -v -s async_engine - fastcheck: true + fast_check: true - label: Basic Correctness Test mirror_hardwares: [amd] From fcdf09cdbeb4e654e0ae79bc50012904c1063c33 Mon Sep 17 00:00:00 2001 From: kevin Date: Tue, 2 Jul 2024 00:05:39 +0000 Subject: [PATCH 07/20] P Signed-off-by: kevin --- .buildkite/test-template-fastcheck.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.buildkite/test-template-fastcheck.j2 b/.buildkite/test-template-fastcheck.j2 index c02efe675f6af..2862d518f449c 100644 --- a/.buildkite/test-template-fastcheck.j2 +++ b/.buildkite/test-template-fastcheck.j2 @@ -61,7 +61,7 @@ steps: {% for step in steps %} {% if step.gpu != "a100" %} - {% if step.fast_check != "true" %} + {% if step.fast_check != true %} - block: "Run {{ step.label }}" depends_on: image_build {% endif %} From c8c9e2c2f98334d16628161d4b7e042385073570 Mon Sep 17 00:00:00 2001 From: kevin Date: Tue, 2 Jul 2024 00:19:11 +0000 Subject: [PATCH 08/20] p Signed-off-by: kevin --- .buildkite/test-pipeline.yaml | 113 ++++++++++++++-------------------- 1 file changed, 45 insertions(+), 68 deletions(-) diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index a34cc0717e16d..5a42de8658e76 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -7,32 +7,51 @@ steps: -- label: Regression Test +- label: E2E tests fast_check: true - mirror_hardwares: [amd] - command: pytest -v -s test_regression.py - working_dir: "/vllm-workspace/tests" # optional - -- label: AsyncEngine Test - #mirror_hardwares: [amd] - command: pytest -v -s async_engine - fast_check: true - -- label: Basic Correctness Test - mirror_hardwares: [amd] commands: - - VLLM_ATTENTION_BACKEND=XFORMERS pytest -v -s basic_correctness/test_basic_correctness.py + - pytest -v -s test_regression.py # Regression + - VLLM_ATTENTION_BACKEND=XFORMERS pytest -v -s basic_correctness/test_basic_correctness.py # Basic Correctness - VLLM_ATTENTION_BACKEND=FLASH_ATTN pytest -v -s basic_correctness/test_basic_correctness.py - VLLM_ATTENTION_BACKEND=XFORMERS pytest -v -s basic_correctness/test_chunked_prefill.py - VLLM_ATTENTION_BACKEND=FLASH_ATTN pytest -v -s basic_correctness/test_chunked_prefill.py - VLLM_TEST_ENABLE_ARTIFICIAL_PREEMPT=1 pytest -v -s basic_correctness/test_preemption.py + - pytest -v -s entrypoints/llm # Entrypoints + - pytest -v -s entrypoints/openai + +- label: Unit tests + fast_check: true + commands: + - pytest -v -s async_engine # Async Engine + - bash ../.buildkite/download-images.sh # Inputs + - pytest -v -s test_inputs.py + - pytest -v -s multimodal + - pytest -v -s test_utils.py # Utils + - pytest -v -s worker # Worker + - label: Core Test mirror_hardwares: [amd] + fast_check: true commands: - pytest -v -s core - pytest -v -s distributed/test_parallel_state.py +- label: Other small tests + fast_check: true + commands: + - apt-get install curl libsodium23 && pytest -v -s tensorizer_loader # Tensorizer + - pytest -v -s metrics # Metrics + - "pip install \ + opentelemetry-sdk \ + opentelemetry-api \ + opentelemetry-exporter-otlp \ + opentelemetry-semantic-conventions-ai" # Tracings + - pytest -v -s tracing + - cd /vllm-workspace/test_docs/docs + - pip install -r requirements-docs.txt + - SPHINXOPTS=\"-W\" make html + - label: Distributed Comm Ops Test #mirror_hardwares: [amd] working_dir: "/vllm-workspace/tests" @@ -41,6 +60,19 @@ steps: - pytest -v -s distributed/test_comm_ops.py - pytest -v -s distributed/test_shm_broadcast.py +- label: Distributed Tests (4 GPUs) + #mirror_hardwares: [amd] + working_dir: "/vllm-workspace/tests" + fast_check: true + num_gpus: 4 + commands: + - pytest -v -s distributed/test_pynccl.py + # We want to test that models which use 2 GPUs work with 4 GPUs, which is why we duplicate them here. + # See https://github.com/vllm-project/vllm/pull/5473#issuecomment-2166601837 for context. + - TEST_DIST_MODEL=facebook/opt-125m DISTRIBUTED_EXECUTOR_BACKEND=ray pytest -v -s distributed/test_basic_distributed_correctness.py + - TEST_DIST_MODEL=facebook/opt-125m DISTRIBUTED_EXECUTOR_BACKEND=mp pytest -v -s distributed/test_basic_distributed_correctness.py + - pytest -v -s spec_decode/e2e/test_integration_dist_tp4.py + - label: Distributed Tests (2 GPUs) mirror_hardwares: [amd] working_dir: "/vllm-workspace/tests" @@ -64,29 +96,10 @@ steps: - CUDA_VISIBLE_DEVICES=0,1 pytest -v -s test_sharded_state_loader.py - CUDA_VISIBLE_DEVICES=0,1 pytest -v -s distributed/test_utils.py -- label: Distributed Tests (4 GPUs) - #mirror_hardwares: [amd] - working_dir: "/vllm-workspace/tests" - num_gpus: 4 - commands: - - pytest -v -s distributed/test_pynccl.py - # We want to test that models which use 2 GPUs work with 4 GPUs, which is why we duplicate them here. - # See https://github.com/vllm-project/vllm/pull/5473#issuecomment-2166601837 for context. - - TEST_DIST_MODEL=facebook/opt-125m DISTRIBUTED_EXECUTOR_BACKEND=ray pytest -v -s distributed/test_basic_distributed_correctness.py - - TEST_DIST_MODEL=facebook/opt-125m DISTRIBUTED_EXECUTOR_BACKEND=mp pytest -v -s distributed/test_basic_distributed_correctness.py - - pytest -v -s spec_decode/e2e/test_integration_dist_tp4.py - - label: Engine Test mirror_hardwares: [amd] command: pytest -v -s engine tokenization test_sequence.py test_config.py test_logger.py -- label: Entrypoints Test - mirror_hardwares: [amd] - - commands: - - pytest -v -s entrypoints/llm - - pytest -v -s entrypoints/openai - - label: Examples Test working_dir: "/vllm-workspace/examples" mirror_hardwares: [amd] @@ -100,13 +113,6 @@ steps: - python3 llava_example.py - python3 tensorize_vllm_model.py --model facebook/opt-125m serialize --serialized-directory /tmp/ --suffix v1 && python3 tensorize_vllm_model.py --model facebook/opt-125m deserialize --path-to-tensors /tmp/vllm/facebook/opt-125m/v1/model.tensors -- label: Inputs Test - #mirror_hardwares: [amd] - commands: - - bash ../.buildkite/download-images.sh - - pytest -v -s test_inputs.py - - pytest -v -s multimodal - - label: Kernels Test %N #mirror_hardwares: [amd] command: pytest -v -s kernels --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT @@ -136,13 +142,6 @@ steps: mirror_hardwares: [amd] command: pytest -v -s test_logits_processor.py -- label: Utils Test - command: pytest -v -s test_utils.py - -- label: Worker Test - mirror_hardwares: [amd] - command: pytest -v -s worker - - label: Speculative decoding tests #mirror_hardwares: [amd] commands: @@ -165,26 +164,11 @@ steps: - export VLLM_WORKER_MULTIPROC_METHOD=spawn - pytest -v -s -x lora/test_long_context.py -- label: Tensorizer Test - #mirror_hardwares: [amd] - command: apt-get install curl libsodium23 && pytest -v -s tensorizer_loader - -- label: Metrics Test - mirror_hardwares: [amd] - command: pytest -v -s metrics - label: Quantization Test #mirror_hardwares: [amd] command: pytest -v -s quantization -- label: Tracing Test - commands: - - "pip install \ - opentelemetry-sdk \ - opentelemetry-api \ - opentelemetry-exporter-otlp \ - opentelemetry-semantic-conventions-ai" - - pytest -v -s tracing - label: Benchmarks working_dir: "/vllm-workspace/.buildkite" @@ -200,13 +184,6 @@ steps: - export VLLM_WORKER_MULTIPROC_METHOD=spawn - bash ./run-tests.sh -c configs/models-small.txt -t 1 -- label: Documentation Build - working_dir: "/vllm-workspace/test_docs/docs" - no_gpu: True - commands: - - pip install -r requirements-docs.txt - - SPHINXOPTS=\"-W\" make html - - label: Distributed Tests (A100) gpu: a100 num_gpus: 4 From 0a9bafad36c1c46d62515e5abfe6acde9707bf5f Mon Sep 17 00:00:00 2001 From: kevin Date: Tue, 2 Jul 2024 00:22:56 +0000 Subject: [PATCH 09/20] p Signed-off-by: kevin --- .buildkite/test-template-fastcheck.j2 | 38 +++++++++++++-------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/.buildkite/test-template-fastcheck.j2 b/.buildkite/test-template-fastcheck.j2 index 2862d518f449c..a3e4cf6b00d4b 100644 --- a/.buildkite/test-template-fastcheck.j2 +++ b/.buildkite/test-template-fastcheck.j2 @@ -21,25 +21,6 @@ steps: limit: 5 - wait - - block: "Run AMD tests" - depends_on: ~ - - - group: "AMD Tests" - depends_on: ~ - steps: - {% for step in steps %} - {% if step.mirror_hardwares and "amd" in step.mirror_hardwares %} - - label: "AMD: {{ step.label }}" - agents: - queue: amd - command: bash .buildkite/run-amd-test.sh "cd {{ (step.working_dir or default_working_dir) | safe }} ; {{ step.command or (step.commands | join(" && ")) | safe }}" - env: - DOCKER_BUILDKIT: "1" - priority: 100 - soft_fail: true - {% endif %} - {% endfor %} - - label: "Neuron Test" depends_on: ~ agents: @@ -171,3 +152,22 @@ steps: type: Directory {% endif %} {% endfor %} + + - block: "Run AMD tests" + depends_on: ~ + + - group: "AMD Tests" + depends_on: ~ + steps: + {% for step in steps %} + {% if step.mirror_hardwares and "amd" in step.mirror_hardwares %} + - label: "AMD: {{ step.label }}" + agents: + queue: amd + command: bash .buildkite/run-amd-test.sh "cd {{ (step.working_dir or default_working_dir) | safe }} ; {{ step.command or (step.commands | join(" && ")) | safe }}" + env: + DOCKER_BUILDKIT: "1" + priority: 100 + soft_fail: true + {% endif %} + {% endfor %} From 7f38872652384aef13489f86950e66dae9e4b8aa Mon Sep 17 00:00:00 2001 From: kevin Date: Tue, 2 Jul 2024 00:47:54 +0000 Subject: [PATCH 10/20] p Signed-off-by: kevin --- .buildkite/test-template-fastcheck.j2 | 21 ++------------------- 1 file changed, 2 insertions(+), 19 deletions(-) diff --git a/.buildkite/test-template-fastcheck.j2 b/.buildkite/test-template-fastcheck.j2 index a3e4cf6b00d4b..fa25e3d4413ba 100644 --- a/.buildkite/test-template-fastcheck.j2 +++ b/.buildkite/test-template-fastcheck.j2 @@ -1,26 +1,8 @@ -{% set docker_image = "public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT" %} +{% set docker_image = "public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:0a9bafad36c1c46d62515e5abfe6acde9707bf5f" %} {% set default_working_dir = "/vllm-workspace/tests" %} {% set hf_home = "/root/.cache/huggingface" %} steps: - - label: ":docker: build image" - key: image-build - agents: - queue: cpu_queue - commands: - - "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7" - - "docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --tag {{ docker_image }} --target test --progress plain ." - - "docker push {{ docker_image }}" - env: - DOCKER_BUILDKIT: "1" - retry: - automatic: - - exit_status: -1 # Agent was lost - limit: 5 - - exit_status: -10 # Agent was lost - limit: 5 - - wait - - label: "Neuron Test" depends_on: ~ agents: @@ -48,6 +30,7 @@ steps: {% endif %} - label: "{{ step.label }}" + priority: 10000 agents: {% if step.label == "Documentation Build" %} queue: small_cpu_queue From cea26d54371dfc39bb9da62d96442540e5b983a4 Mon Sep 17 00:00:00 2001 From: kevin Date: Thu, 11 Jul 2024 23:20:44 +0000 Subject: [PATCH 11/20] p Signed-off-by: kevin --- .buildkite/test-pipeline.yaml | 130 ++++++++++++++++++++++---- .buildkite/test-template-fastcheck.j2 | 3 +- 2 files changed, 113 insertions(+), 20 deletions(-) diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index 5a42de8658e76..0f72748338735 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -7,7 +7,7 @@ steps: -- label: E2E tests +- label: Regression, Basic Correctness, Entrypoints test fast_check: true commands: - pytest -v -s test_regression.py # Regression @@ -19,7 +19,7 @@ steps: - pytest -v -s entrypoints/llm # Entrypoints - pytest -v -s entrypoints/openai -- label: Unit tests +- label: Async Engine, Inputs, Utils, Worker test fast_check: true commands: - pytest -v -s async_engine # Async Engine @@ -37,7 +37,7 @@ steps: - pytest -v -s core - pytest -v -s distributed/test_parallel_state.py -- label: Other small tests +- label: Tensorizer, Metrics, Tracings test fast_check: true commands: - apt-get install curl libsodium23 && pytest -v -s tensorizer_loader # Tensorizer @@ -52,6 +52,30 @@ steps: - pip install -r requirements-docs.txt - SPHINXOPTS=\"-W\" make html +- label: Regression Test + mirror_hardwares: [amd] + command: pytest -v -s test_regression.py + working_dir: "/vllm-workspace/tests" # optional + +- label: AsyncEngine Test + #mirror_hardwares: [amd] + command: pytest -v -s async_engine + +- label: Basic Correctness Test + mirror_hardwares: [amd] + commands: + - VLLM_ATTENTION_BACKEND=XFORMERS pytest -v -s basic_correctness/test_basic_correctness.py + - VLLM_ATTENTION_BACKEND=FLASH_ATTN pytest -v -s basic_correctness/test_basic_correctness.py + - VLLM_ATTENTION_BACKEND=XFORMERS pytest -v -s basic_correctness/test_chunked_prefill.py + - VLLM_ATTENTION_BACKEND=FLASH_ATTN pytest -v -s basic_correctness/test_chunked_prefill.py + - VLLM_TEST_ENABLE_ARTIFICIAL_PREEMPT=1 pytest -v -s basic_correctness/test_preemption.py + +- label: Core Test + mirror_hardwares: [amd] + commands: + - pytest -v -s core + - pytest -v -s distributed/test_parallel_state.py + - label: Distributed Comm Ops Test #mirror_hardwares: [amd] working_dir: "/vllm-workspace/tests" @@ -60,19 +84,6 @@ steps: - pytest -v -s distributed/test_comm_ops.py - pytest -v -s distributed/test_shm_broadcast.py -- label: Distributed Tests (4 GPUs) - #mirror_hardwares: [amd] - working_dir: "/vllm-workspace/tests" - fast_check: true - num_gpus: 4 - commands: - - pytest -v -s distributed/test_pynccl.py - # We want to test that models which use 2 GPUs work with 4 GPUs, which is why we duplicate them here. - # See https://github.com/vllm-project/vllm/pull/5473#issuecomment-2166601837 for context. - - TEST_DIST_MODEL=facebook/opt-125m DISTRIBUTED_EXECUTOR_BACKEND=ray pytest -v -s distributed/test_basic_distributed_correctness.py - - TEST_DIST_MODEL=facebook/opt-125m DISTRIBUTED_EXECUTOR_BACKEND=mp pytest -v -s distributed/test_basic_distributed_correctness.py - - pytest -v -s spec_decode/e2e/test_integration_dist_tp4.py - - label: Distributed Tests (2 GPUs) mirror_hardwares: [amd] working_dir: "/vllm-workspace/tests" @@ -96,9 +107,42 @@ steps: - CUDA_VISIBLE_DEVICES=0,1 pytest -v -s test_sharded_state_loader.py - CUDA_VISIBLE_DEVICES=0,1 pytest -v -s distributed/test_utils.py +- label: Distributed Tests (4 GPUs) + #mirror_hardwares: [amd] + working_dir: "/vllm-workspace/tests" + num_gpus: 4 + commands: + - pytest -v -s distributed/test_pynccl.py + # We want to test that models which use 2 GPUs work with 4 GPUs, which is why we duplicate them here. + # See https://github.com/vllm-project/vllm/pull/5473#issuecomment-2166601837 for context. + - TEST_DIST_MODEL=facebook/opt-125m DISTRIBUTED_EXECUTOR_BACKEND=ray pytest -v -s distributed/test_basic_distributed_correctness.py + - TEST_DIST_MODEL=facebook/opt-125m DISTRIBUTED_EXECUTOR_BACKEND=mp pytest -v -s distributed/test_basic_distributed_correctness.py + - pytest -v -s spec_decode/e2e/test_integration_dist_tp4.py + +- label: Pipeline Parallelism Test + working_dir: "/vllm-workspace/tests" + num_gpus: 4 + commands: + - TP_SIZE=2 PP_SIZE=2 EAGER_MODE=1 CHUNKED_PREFILL=1 pytest -v -s distributed/test_pipeline_parallel.py + - TP_SIZE=2 PP_SIZE=2 EAGER_MODE=1 CHUNKED_PREFILL=0 pytest -v -s distributed/test_pipeline_parallel.py + - TP_SIZE=1 PP_SIZE=3 EAGER_MODE=1 CHUNKED_PREFILL=0 pytest -v -s distributed/test_pipeline_parallel.py + - PP_SIZE=4 EAGER_MODE=1 CHUNKED_PREFILL=1 pytest -v -s distributed/test_pipeline_parallel.py + - PP_SIZE=4 EAGER_MODE=1 CHUNKED_PREFILL=0 pytest -v -s distributed/test_pipeline_parallel.py + + - label: Engine Test mirror_hardwares: [amd] - command: pytest -v -s engine tokenization test_sequence.py test_config.py test_logger.py + commands: + - pytest -v -s engine test_sequence.py test_config.py test_logger.py + # OOM in the CI unless we run this separately + - pytest -v -s tokenization + +- label: Entrypoints Test + mirror_hardwares: [amd] + + commands: + - pytest -v -s entrypoints/llm + - pytest -v -s entrypoints/openai - label: Examples Test working_dir: "/vllm-workspace/examples" @@ -113,14 +157,24 @@ steps: - python3 llava_example.py - python3 tensorize_vllm_model.py --model facebook/opt-125m serialize --serialized-directory /tmp/ --suffix v1 && python3 tensorize_vllm_model.py --model facebook/opt-125m deserialize --path-to-tensors /tmp/vllm/facebook/opt-125m/v1/model.tensors +- label: Inputs Test + #mirror_hardwares: [amd] + commands: + - bash ../.buildkite/download-images.sh + - pytest -v -s test_inputs.py + - pytest -v -s multimodal + - label: Kernels Test %N #mirror_hardwares: [amd] - command: pytest -v -s kernels --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT + commands: + - pip install https://github.com/flashinfer-ai/flashinfer/releases/download/v0.0.7/flashinfer-0.0.7+cu121torch2.3-cp310-cp310-linux_x86_64.whl + - pytest -v -s kernels --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT parallelism: 4 - label: Models Test #mirror_hardwares: [amd] commands: + - pip install https://github.com/flashinfer-ai/flashinfer/releases/download/v0.0.7/flashinfer-0.0.7+cu121torch2.3-cp310-cp310-linux_x86_64.whl - pytest -v -s models -m \"not vlm\" - label: Vision Language Models Test @@ -142,6 +196,13 @@ steps: mirror_hardwares: [amd] command: pytest -v -s test_logits_processor.py +- label: Utils Test + command: pytest -v -s test_utils.py + +- label: Worker Test + mirror_hardwares: [amd] + command: pytest -v -s worker + - label: Speculative decoding tests #mirror_hardwares: [amd] commands: @@ -164,11 +225,26 @@ steps: - export VLLM_WORKER_MULTIPROC_METHOD=spawn - pytest -v -s -x lora/test_long_context.py +- label: Tensorizer Test + #mirror_hardwares: [amd] + command: apt-get install curl libsodium23 && pytest -v -s tensorizer_loader + +- label: Metrics Test + mirror_hardwares: [amd] + command: pytest -v -s metrics - label: Quantization Test #mirror_hardwares: [amd] command: pytest -v -s quantization +- label: Tracing Test + commands: + - "pip install \ + opentelemetry-sdk \ + opentelemetry-api \ + opentelemetry-exporter-otlp \ + opentelemetry-semantic-conventions-ai" + - pytest -v -s tracing - label: Benchmarks working_dir: "/vllm-workspace/.buildkite" @@ -184,6 +260,22 @@ steps: - export VLLM_WORKER_MULTIPROC_METHOD=spawn - bash ./run-tests.sh -c configs/models-small.txt -t 1 +- label: LM Eval Large Models + gpu: a100 + num_gpus: 4 + working_dir: "/vllm-workspace/.buildkite/lm-eval-harness" + commands: + - pip install lm-eval + - export VLLM_WORKER_MULTIPROC_METHOD=spawn + - bash ./run-tests.sh -c configs/models-large.txt -t 4 + +- label: Documentation Build + working_dir: "/vllm-workspace/test_docs/docs" + no_gpu: True + commands: + - pip install -r requirements-docs.txt + - SPHINXOPTS=\"-W\" make html + - label: Distributed Tests (A100) gpu: a100 num_gpus: 4 @@ -193,7 +285,7 @@ steps: - pytest -v -s distributed/test_custom_all_reduce.py - TEST_DIST_MODEL=facebook/opt-125m DISTRIBUTED_EXECUTOR_BACKEND=ray pytest -v -s distributed/test_basic_distributed_correctness.py - TEST_DIST_MODEL=facebook/opt-125m DISTRIBUTED_EXECUTOR_BACKEND=mp pytest -v -s distributed/test_basic_distributed_correctness.py - - pip install https://github.com/flashinfer-ai/flashinfer/releases/download/v0.0.5/flashinfer-0.0.5+cu121torch2.3-cp310-cp310-linux_x86_64.whl + - pip install https://github.com/flashinfer-ai/flashinfer/releases/download/v0.0.7/flashinfer-0.0.7+cu121torch2.3-cp310-cp310-linux_x86_64.whl - VLLM_ATTENTION_BACKEND=FLASHINFER TEST_DIST_MODEL=facebook/opt-125m DISTRIBUTED_EXECUTOR_BACKEND=ray pytest -v -s distributed/test_basic_distributed_correctness.py - VLLM_ATTENTION_BACKEND=FLASHINFER TEST_DIST_MODEL=meta-llama/Meta-Llama-3-8B DISTRIBUTED_EXECUTOR_BACKEND=ray pytest -v -s distributed/test_basic_distributed_correctness.py - pytest -v -s -x lora/test_mixtral.py diff --git a/.buildkite/test-template-fastcheck.j2 b/.buildkite/test-template-fastcheck.j2 index fa25e3d4413ba..4cc53d57c75c1 100644 --- a/.buildkite/test-template-fastcheck.j2 +++ b/.buildkite/test-template-fastcheck.j2 @@ -137,10 +137,11 @@ steps: {% endfor %} - block: "Run AMD tests" + key: block-amd-tests depends_on: ~ - group: "AMD Tests" - depends_on: ~ + depends_on: block-amd-tests steps: {% for step in steps %} {% if step.mirror_hardwares and "amd" in step.mirror_hardwares %} From e79d31f464343bd7e2b2757f5507449e799bb7fc Mon Sep 17 00:00:00 2001 From: kevin Date: Thu, 11 Jul 2024 23:45:49 +0000 Subject: [PATCH 12/20] p Signed-off-by: kevin --- .buildkite/test-pipeline.yaml | 15 ++---- .buildkite/test-template-fastcheck.j2 | 72 +++++++++++++++++++++++++-- 2 files changed, 71 insertions(+), 16 deletions(-) diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index 0f72748338735..99cfc1f5ce17f 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -7,18 +7,6 @@ steps: -- label: Regression, Basic Correctness, Entrypoints test - fast_check: true - commands: - - pytest -v -s test_regression.py # Regression - - VLLM_ATTENTION_BACKEND=XFORMERS pytest -v -s basic_correctness/test_basic_correctness.py # Basic Correctness - - VLLM_ATTENTION_BACKEND=FLASH_ATTN pytest -v -s basic_correctness/test_basic_correctness.py - - VLLM_ATTENTION_BACKEND=XFORMERS pytest -v -s basic_correctness/test_chunked_prefill.py - - VLLM_ATTENTION_BACKEND=FLASH_ATTN pytest -v -s basic_correctness/test_chunked_prefill.py - - VLLM_TEST_ENABLE_ARTIFICIAL_PREEMPT=1 pytest -v -s basic_correctness/test_preemption.py - - pytest -v -s entrypoints/llm # Entrypoints - - pytest -v -s entrypoints/openai - - label: Async Engine, Inputs, Utils, Worker test fast_check: true commands: @@ -54,6 +42,7 @@ steps: - label: Regression Test mirror_hardwares: [amd] + fast_check: true command: pytest -v -s test_regression.py working_dir: "/vllm-workspace/tests" # optional @@ -63,6 +52,7 @@ steps: - label: Basic Correctness Test mirror_hardwares: [amd] + fast_check: true commands: - VLLM_ATTENTION_BACKEND=XFORMERS pytest -v -s basic_correctness/test_basic_correctness.py - VLLM_ATTENTION_BACKEND=FLASH_ATTN pytest -v -s basic_correctness/test_basic_correctness.py @@ -138,6 +128,7 @@ steps: - pytest -v -s tokenization - label: Entrypoints Test + fast_check: true mirror_hardwares: [amd] commands: diff --git a/.buildkite/test-template-fastcheck.j2 b/.buildkite/test-template-fastcheck.j2 index 4cc53d57c75c1..57d5b20def7ee 100644 --- a/.buildkite/test-template-fastcheck.j2 +++ b/.buildkite/test-template-fastcheck.j2 @@ -3,6 +3,23 @@ {% set hf_home = "/root/.cache/huggingface" %} steps: + - label: ":docker: build image" + key: image-build + agents: + queue: cpu_queue + commands: + - "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7" + - "docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --tag {{ docker_image }} --target test --progress plain ." + - "docker push {{ docker_image }}" + env: + DOCKER_BUILDKIT: "1" + retry: + automatic: + - exit_status: -1 # Agent was lost + limit: 5 + - exit_status: -10 # Agent was lost + limit: 5 + - label: "Neuron Test" depends_on: ~ agents: @@ -23,14 +40,61 @@ steps: command: bash .buildkite/run-xpu-test.sh {% for step in steps %} - {% if step.gpu != "a100" %} - {% if step.fast_check != true %} + {% if step.gpu != "a100" and step.fast_check == true %} + - label: "{{ step.label }}" + depends_on: image-build + priority: 10000 + agents: + {% if step.label == "Documentation Build" %} + queue: small_cpu_queue + {% elif step.no_gpu %} + queue: cpu_queue + {% elif step.num_gpus == 2 or step.num_gpus == 4 %} + queue: gpu_4_queue + {% else %} + queue: gpu_1_queue + {% endif %} + soft_fail: {{ step.soft_fail or false }} + {% if step.parallelism %} + parallelism: {{ step.parallelism }} + {% endif %} + retry: + automatic: + - exit_status: -1 # Agent was lost + limit: 5 + - exit_status: -10 # Agent was lost + limit: 5 + plugins: + - docker#v5.2.0: + image: {{ docker_image }} + always-pull: true + propagate-environment: true + {% if not step.no_gpu %} + gpus: all + {% endif %} + {% if step.label == "Benchmarks" %} + mount-buildkite-agent: true + {% endif %} + command: ["bash", "-c", "cd {{ (step.working_dir or default_working_dir) | safe }} && {{ step.command or (step.commands | join(' && ')) | safe }}"] + environment: + - VLLM_USAGE_SOURCE=ci-test + - HF_HOME={{ hf_home }} + - HF_TOKEN + {% if step.label == "Speculative decoding tests" %} + - VLLM_ATTENTION_BACKEND=XFORMERS + {% endif %} + volumes: + - /dev/shm:/dev/shm + - {{ hf_home }}:{{ hf_home }} + {% endif %} + {% endfor %} + + {% for step in steps %} + {% if step.gpu != "a100" and step.fast_check != true %} - block: "Run {{ step.label }}" depends_on: image_build - {% endif %} - label: "{{ step.label }}" - priority: 10000 agents: {% if step.label == "Documentation Build" %} queue: small_cpu_queue From 953ee4d0e3df49e5b344530286c233bc801d7edb Mon Sep 17 00:00:00 2001 From: kevin Date: Fri, 12 Jul 2024 00:00:56 +0000 Subject: [PATCH 13/20] p Signed-off-by: kevin --- .buildkite/test-pipeline.yaml | 3 ++- .buildkite/test-template-fastcheck.j2 | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index 99cfc1f5ce17f..6eb6479e31734 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -9,6 +9,7 @@ steps: - label: Async Engine, Inputs, Utils, Worker test fast_check: true + fast_check_only: true commands: - pytest -v -s async_engine # Async Engine - bash ../.buildkite/download-images.sh # Inputs @@ -17,7 +18,6 @@ steps: - pytest -v -s test_utils.py # Utils - pytest -v -s worker # Worker - - label: Core Test mirror_hardwares: [amd] fast_check: true @@ -27,6 +27,7 @@ steps: - label: Tensorizer, Metrics, Tracings test fast_check: true + fast_check_only: true commands: - apt-get install curl libsodium23 && pytest -v -s tensorizer_loader # Tensorizer - pytest -v -s metrics # Metrics diff --git a/.buildkite/test-template-fastcheck.j2 b/.buildkite/test-template-fastcheck.j2 index 57d5b20def7ee..11f2ac594c77b 100644 --- a/.buildkite/test-template-fastcheck.j2 +++ b/.buildkite/test-template-fastcheck.j2 @@ -1,4 +1,4 @@ -{% set docker_image = "public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:0a9bafad36c1c46d62515e5abfe6acde9707bf5f" %} +{% set docker_image = "public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT" %} {% set default_working_dir = "/vllm-workspace/tests" %} {% set hf_home = "/root/.cache/huggingface" %} From ef39bef864a566cd4df019d4c8f71940f046ad6d Mon Sep 17 00:00:00 2001 From: kevin Date: Fri, 12 Jul 2024 05:59:05 +0000 Subject: [PATCH 14/20] p Signed-off-by: kevin --- .buildkite/test-pipeline.yaml | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index 8e4c2bc08fc6f..9fed7844ba5d3 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -18,13 +18,6 @@ steps: - pytest -v -s test_utils.py # Utils - pytest -v -s worker # Worker -- label: Core Test - mirror_hardwares: [amd] - fast_check: true - commands: - - pytest -v -s core - - pytest -v -s distributed/test_parallel_state.py - - label: Tensorizer, Metrics, Tracings test fast_check: true fast_check_only: true @@ -65,6 +58,7 @@ steps: - label: Core Test mirror_hardwares: [amd] + fast_check: true commands: - pytest -v -s core - pytest -v -s distributed/test_parallel_state.py From 553f5ee080c65a38f9ca599fa171da3aeaa9c258 Mon Sep 17 00:00:00 2001 From: kevin Date: Fri, 12 Jul 2024 06:02:12 +0000 Subject: [PATCH 15/20] p Signed-off-by: kevin --- .../{test-template-fastcheck.j2 => test-template-fastcheck2.j2} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename .buildkite/{test-template-fastcheck.j2 => test-template-fastcheck2.j2} (100%) diff --git a/.buildkite/test-template-fastcheck.j2 b/.buildkite/test-template-fastcheck2.j2 similarity index 100% rename from .buildkite/test-template-fastcheck.j2 rename to .buildkite/test-template-fastcheck2.j2 From 0526a92c5497130c73d55c9a46ecb74515b8c517 Mon Sep 17 00:00:00 2001 From: kevin Date: Fri, 12 Jul 2024 06:08:37 +0000 Subject: [PATCH 16/20] p Signed-off-by: kevin --- .buildkite/test-template-fastcheck2.j2 | 221 ------------------------- 1 file changed, 221 deletions(-) delete mode 100644 .buildkite/test-template-fastcheck2.j2 diff --git a/.buildkite/test-template-fastcheck2.j2 b/.buildkite/test-template-fastcheck2.j2 deleted file mode 100644 index 11f2ac594c77b..0000000000000 --- a/.buildkite/test-template-fastcheck2.j2 +++ /dev/null @@ -1,221 +0,0 @@ -{% set docker_image = "public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:$BUILDKITE_COMMIT" %} -{% set default_working_dir = "/vllm-workspace/tests" %} -{% set hf_home = "/root/.cache/huggingface" %} - -steps: - - label: ":docker: build image" - key: image-build - agents: - queue: cpu_queue - commands: - - "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7" - - "docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --tag {{ docker_image }} --target test --progress plain ." - - "docker push {{ docker_image }}" - env: - DOCKER_BUILDKIT: "1" - retry: - automatic: - - exit_status: -1 # Agent was lost - limit: 5 - - exit_status: -10 # Agent was lost - limit: 5 - - - label: "Neuron Test" - depends_on: ~ - agents: - queue: neuron - command: bash .buildkite/run-neuron-test.sh - soft_fail: false - - - label: "Intel CPU Test" - depends_on: ~ - agents: - queue: intel-cpu - command: bash .buildkite/run-cpu-test.sh - - - label: "Intel GPU Test" - depends_on: ~ - agents: - queue: intel-gpu - command: bash .buildkite/run-xpu-test.sh - - {% for step in steps %} - {% if step.gpu != "a100" and step.fast_check == true %} - - label: "{{ step.label }}" - depends_on: image-build - priority: 10000 - agents: - {% if step.label == "Documentation Build" %} - queue: small_cpu_queue - {% elif step.no_gpu %} - queue: cpu_queue - {% elif step.num_gpus == 2 or step.num_gpus == 4 %} - queue: gpu_4_queue - {% else %} - queue: gpu_1_queue - {% endif %} - soft_fail: {{ step.soft_fail or false }} - {% if step.parallelism %} - parallelism: {{ step.parallelism }} - {% endif %} - retry: - automatic: - - exit_status: -1 # Agent was lost - limit: 5 - - exit_status: -10 # Agent was lost - limit: 5 - plugins: - - docker#v5.2.0: - image: {{ docker_image }} - always-pull: true - propagate-environment: true - {% if not step.no_gpu %} - gpus: all - {% endif %} - {% if step.label == "Benchmarks" %} - mount-buildkite-agent: true - {% endif %} - command: ["bash", "-c", "cd {{ (step.working_dir or default_working_dir) | safe }} && {{ step.command or (step.commands | join(' && ')) | safe }}"] - environment: - - VLLM_USAGE_SOURCE=ci-test - - HF_HOME={{ hf_home }} - - HF_TOKEN - {% if step.label == "Speculative decoding tests" %} - - VLLM_ATTENTION_BACKEND=XFORMERS - {% endif %} - volumes: - - /dev/shm:/dev/shm - - {{ hf_home }}:{{ hf_home }} - {% endif %} - {% endfor %} - - {% for step in steps %} - {% if step.gpu != "a100" and step.fast_check != true %} - - block: "Run {{ step.label }}" - depends_on: image_build - - - label: "{{ step.label }}" - agents: - {% if step.label == "Documentation Build" %} - queue: small_cpu_queue - {% elif step.no_gpu %} - queue: cpu_queue - {% elif step.num_gpus == 2 or step.num_gpus == 4 %} - queue: gpu_4_queue - {% else %} - queue: gpu_1_queue - {% endif %} - soft_fail: {{ step.soft_fail or false }} - {% if step.parallelism %} - parallelism: {{ step.parallelism }} - {% endif %} - retry: - automatic: - - exit_status: -1 # Agent was lost - limit: 5 - - exit_status: -10 # Agent was lost - limit: 5 - plugins: - - docker#v5.2.0: - image: {{ docker_image }} - always-pull: true - propagate-environment: true - {% if not step.no_gpu %} - gpus: all - {% endif %} - {% if step.label == "Benchmarks" %} - mount-buildkite-agent: true - {% endif %} - command: ["bash", "-c", "cd {{ (step.working_dir or default_working_dir) | safe }} && {{ step.command or (step.commands | join(' && ')) | safe }}"] - environment: - - VLLM_USAGE_SOURCE=ci-test - - HF_HOME={{ hf_home }} - - HF_TOKEN - {% if step.label == "Speculative decoding tests" %} - - VLLM_ATTENTION_BACKEND=XFORMERS - {% endif %} - volumes: - - /dev/shm:/dev/shm - - {{ hf_home }}:{{ hf_home }} - {% endif %} - {% endfor %} - - - block: "Run A100 tests" - depends_on: image-build - - {% for step in steps %} - {% if step.gpu == "a100" %} - - label: "{{ step.label }}" - priority: 10000 - agents: - queue: a100-queue - soft_fail: {{ step.soft_fail or false }} - {% if step.parallelism %} - parallelism: {{ step.parallelism }} - {% endif %} - retry: - automatic: - - exit_status: -1 # Agent was lost - limit: 5 - - exit_status: -10 # Agent was lost - limit: 5 - plugins: - - kubernetes: - podSpec: - priorityClassName: ci - containers: - - image: public.ecr.aws/q9t5s3a7/vllm-ci-test-repo:f17f03744ebabed187634baec601ef35094ae14f - command: ["bash"] - args: - - '-c' - - "'cd {{ (step.working_dir or default_working_dir) | safe }} && {{ step.command or (step.commands | join(' && ')) | safe }}'" - resources: - limits: - nvidia.com/gpu: {{ step.num_gpus or 1 }} - volumeMounts: - - name: devshm - mountPath: /dev/shm - - name: hf-cache - mountPath: {{ hf_home }} - env: - - name: VLLM_USAGE_SOURCE - value: ci-test - - name: HF_HOME - value: {{ hf_home }} - - name: HF_TOKEN - valueFrom: - secretKeyRef: - name: hf-token-secret - key: token - nodeSelector: - nvidia.com/gpu.product: NVIDIA-A100-SXM4-80GB - volumes: - - name: devshm - emptyDir: - medium: Memory - - name: hf-cache - hostPath: - path: {{ hf_home }} - type: Directory - {% endif %} - {% endfor %} - - - block: "Run AMD tests" - key: block-amd-tests - depends_on: ~ - - - group: "AMD Tests" - depends_on: block-amd-tests - steps: - {% for step in steps %} - {% if step.mirror_hardwares and "amd" in step.mirror_hardwares %} - - label: "AMD: {{ step.label }}" - agents: - queue: amd - command: bash .buildkite/run-amd-test.sh "cd {{ (step.working_dir or default_working_dir) | safe }} ; {{ step.command or (step.commands | join(" && ")) | safe }}" - env: - DOCKER_BUILDKIT: "1" - priority: 100 - soft_fail: true - {% endif %} - {% endfor %} From dddc6b55695e1e435e9b96561e61e0f1f2be7d62 Mon Sep 17 00:00:00 2001 From: kevin Date: Fri, 12 Jul 2024 06:53:15 +0000 Subject: [PATCH 17/20] p Signed-off-by: kevin --- .buildkite/test-pipeline.yaml | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index 9fed7844ba5d3..4f5abee304b39 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -7,7 +7,7 @@ steps: -- label: Async Engine, Inputs, Utils, Worker test +- label: Async Engine, Inputs, Utils, Worker Test fast_check: true fast_check_only: true commands: @@ -18,7 +18,7 @@ steps: - pytest -v -s test_utils.py # Utils - pytest -v -s worker # Worker -- label: Tensorizer, Metrics, Tracings test +- label: Tensorizer, Metrics, Tracing Test fast_check: true fast_check_only: true commands: @@ -30,9 +30,6 @@ steps: opentelemetry-exporter-otlp \ opentelemetry-semantic-conventions-ai" # Tracings - pytest -v -s tracing - - cd /vllm-workspace/test_docs/docs - - pip install -r requirements-docs.txt - - SPHINXOPTS=\"-W\" make html - label: Regression Test mirror_hardwares: [amd] @@ -260,6 +257,7 @@ steps: - label: Documentation Build working_dir: "/vllm-workspace/test_docs/docs" no_gpu: True + fast_check: true commands: - pip install -r requirements-docs.txt - SPHINXOPTS=\"-W\" make html From c034e205c51bf71088c375acb3b1cb8fc0abe7b8 Mon Sep 17 00:00:00 2001 From: kevin Date: Fri, 12 Jul 2024 06:56:05 +0000 Subject: [PATCH 18/20] p Signed-off-by: kevin --- .buildkite/test-pipeline.yaml | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index 4f5abee304b39..7e65cdb7bbfc4 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -7,7 +7,7 @@ steps: -- label: Async Engine, Inputs, Utils, Worker Test +- label: Async Engine, Inputs, Utils, Worker test fast_check: true fast_check_only: true commands: @@ -18,7 +18,7 @@ steps: - pytest -v -s test_utils.py # Utils - pytest -v -s worker # Worker -- label: Tensorizer, Metrics, Tracing Test +- label: Tensorizer, Metrics, Tracing, Doc Build test fast_check: true fast_check_only: true commands: @@ -30,6 +30,9 @@ steps: opentelemetry-exporter-otlp \ opentelemetry-semantic-conventions-ai" # Tracings - pytest -v -s tracing + - cd /vllm-workspace/test_docs/docs # Doc build + - pip install -r requirements-docs.txt + - SPHINXOPTS=\"-W\" make html - label: Regression Test mirror_hardwares: [amd] @@ -95,6 +98,7 @@ steps: #mirror_hardwares: [amd] working_dir: "/vllm-workspace/tests" num_gpus: 4 + fast_check: true commands: - pytest -v -s distributed/test_pynccl.py # We want to test that models which use 2 GPUs work with 4 GPUs, which is why we duplicate them here. @@ -257,7 +261,6 @@ steps: - label: Documentation Build working_dir: "/vllm-workspace/test_docs/docs" no_gpu: True - fast_check: true commands: - pip install -r requirements-docs.txt - SPHINXOPTS=\"-W\" make html From cb6a198f14501e9b6ad104bd2e809d4f67343989 Mon Sep 17 00:00:00 2001 From: kevin Date: Fri, 12 Jul 2024 06:58:50 +0000 Subject: [PATCH 19/20] p Signed-off-by: kevin --- .buildkite/test-pipeline.yaml | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index 7e65cdb7bbfc4..9eb4035d871f2 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -18,7 +18,7 @@ steps: - pytest -v -s test_utils.py # Utils - pytest -v -s worker # Worker -- label: Tensorizer, Metrics, Tracing, Doc Build test +- label: Tensorizer, Metrics, Tracingtest fast_check: true fast_check_only: true commands: @@ -30,9 +30,6 @@ steps: opentelemetry-exporter-otlp \ opentelemetry-semantic-conventions-ai" # Tracings - pytest -v -s tracing - - cd /vllm-workspace/test_docs/docs # Doc build - - pip install -r requirements-docs.txt - - SPHINXOPTS=\"-W\" make html - label: Regression Test mirror_hardwares: [amd] @@ -260,6 +257,7 @@ steps: - label: Documentation Build working_dir: "/vllm-workspace/test_docs/docs" + fast_check: true no_gpu: True commands: - pip install -r requirements-docs.txt From 75801a7873ab16bc624844b07162d67e9dc3cd98 Mon Sep 17 00:00:00 2001 From: kevin Date: Fri, 12 Jul 2024 07:34:10 +0000 Subject: [PATCH 20/20] p Signed-off-by: kevin --- .buildkite/test-pipeline.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index 9eb4035d871f2..9f388b6f8443d 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -7,7 +7,7 @@ steps: -- label: Async Engine, Inputs, Utils, Worker test +- label: Async Engine, Inputs, Utils, Worker Test fast_check: true fast_check_only: true commands: @@ -18,7 +18,7 @@ steps: - pytest -v -s test_utils.py # Utils - pytest -v -s worker # Worker -- label: Tensorizer, Metrics, Tracingtest +- label: Tensorizer, Metrics, Tracing Test fast_check: true fast_check_only: true commands: @@ -28,7 +28,7 @@ steps: opentelemetry-sdk \ opentelemetry-api \ opentelemetry-exporter-otlp \ - opentelemetry-semantic-conventions-ai" # Tracings + opentelemetry-semantic-conventions-ai" # Tracing - pytest -v -s tracing - label: Regression Test