diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index 8badc16d0cb75..2c7dd9f304b9d 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -33,9 +33,9 @@ steps: - label: Entrypoints Test command: pytest -v -s entrypoints -- label: Kernels Test - command: pytest -v -s kernels - soft_fail: true +- label: Kernels Test %N + command: pytest -v -s kernels --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT + parallelism: 4 - label: Models Test commands: @@ -55,8 +55,9 @@ steps: - label: Speculative decoding tests command: pytest -v -s spec_decode -- label: LoRA Test - command: pytest -v -s lora --forked +- label: LoRA Test %N + command: pytest -v -s lora --forked --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT + parallelism: 4 - label: Metrics Test command: pytest -v -s metrics diff --git a/.buildkite/test-template.j2 b/.buildkite/test-template.j2 index 7c1cf2b5a9b39..b5853a2f39383 100644 --- a/.buildkite/test-template.j2 +++ b/.buildkite/test-template.j2 @@ -20,6 +20,9 @@ steps: agents: queue: kubernetes soft_fail: {{ step.soft_fail or false }} + {% if step.parallelism %} + parallelism: {{ step.parallelism }} + {% endif %} retry: automatic: - exit_status: -1 # Agent was lost diff --git a/requirements-dev.txt b/requirements-dev.txt index 5502c97d014ac..51fa57f068003 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -16,6 +16,7 @@ pytest pytest-forked pytest-asyncio pytest-rerunfailures +pytest-shard httpx einops # required for MPT openai