Skip to content
This repository has been archived by the owner on Oct 11, 2024. It is now read-only.

Commit

Permalink
add some guards for pypi push (#327)
Browse files Browse the repository at this point in the history
1. Added an input argument push_binaries_to_pypi to allow not pushing to
nm pypi automatically. It's 'yes' for nightly and 'no' for release by
default.

2. Require all tests to pass before uploading.

---------

Co-authored-by: dhuangnm <[email protected]>
  • Loading branch information
dhuangnm and dhuangnm authored Jun 23, 2024
1 parent 046eb08 commit 9b2e107
Show file tree
Hide file tree
Showing 5 changed files with 36 additions and 11 deletions.
26 changes: 15 additions & 11 deletions .github/workflows/nm-build-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ on:
description: "categories: REMOTE, NIGHTLY, WEEKLY, RELEASE"
type: string
default: "REMOTE"
push_binaries_to_pypi:
description: "When set to true, built wheels and tar.gz will be pushed to neuralmagic pypi if all tests pass"
type: string
default: 'no'
python:
description: "python version, e.g. 3.10.12"
type: string
Expand Down Expand Up @@ -121,17 +125,6 @@ jobs:
# test_skip_env_vars: ${{ inputs.test_skip_env_vars }}
# secrets: inherit

UPLOAD:
needs: [TEST-SOLO]
if: contains(fromJSON('["NIGHTLY", "WEEKLY", "RELEASE"]'), inputs.wf_category)
uses: ./.github/workflows/nm-upload-assets-to-gcp.yml
with:
label: ${{ inputs.build_label }}
timeout: ${{ inputs.build_timeout }}
gitref: ${{ github.ref }}
python: ${{ inputs.python }}
secrets: inherit

BENCHMARK:
needs: [BUILD]
if: success()
Expand All @@ -158,3 +151,14 @@ jobs:
whl: ${{ needs.BUILD.outputs.whl }}
lm_eval_configuration: ${{ inputs.lm_eval_configuration }}
secrets: inherit

UPLOAD:
needs: [TEST-SOLO, BENCHMARK, LM-EVAL-SOLO]
if: ${{ contains(fromJSON('["NIGHTLY", "WEEKLY", "RELEASE"]'), inputs.wf_category) && inputs.push_binaries_to_pypi == 'yes' }}
uses: ./.github/workflows/nm-upload-assets-to-gcp.yml
with:
label: ${{ inputs.build_label }}
timeout: ${{ inputs.build_timeout }}
gitref: ${{ github.ref }}
python: ${{ inputs.python }}
secrets: inherit
8 changes: 8 additions & 0 deletions .github/workflows/nm-nightly.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@ on:
- 'true'
- 'false'
default: 'false'
push_binaries_to_pypi:
description: "When set to yes, built wheels and tar.gz will be pushed to neuralmagic pypi if all tests pass"
type: string
default: 'yes'

jobs:

Expand All @@ -23,6 +27,7 @@ jobs:
wf_category: NIGHTLY
python: 3.8.17
gitref: ${{ github.ref }}
push_binaries_to_pypi: ${{ inputs.push_binaries_to_pypi }}

test_label_solo: gcp-k8s-l4-solo
test_label_multi: ignore
Expand All @@ -45,6 +50,7 @@ jobs:
wf_category: NIGHTLY
python: 3.9.17
gitref: ${{ github.ref }}
push_binaries_to_pypi: ${{ inputs.push_binaries_to_pypi }}

test_label_solo: gcp-k8s-l4-solo
test_label_multi: ignore
Expand All @@ -67,6 +73,7 @@ jobs:
wf_category: NIGHTLY
python: 3.10.12
gitref: ${{ github.ref }}
push_binaries_to_pypi: ${{ inputs.push_binaries_to_pypi }}

test_label_solo: gcp-k8s-l4-solo
test_label_multi: ignore
Expand All @@ -89,6 +96,7 @@ jobs:
wf_category: NIGHTLY
python: 3.11.4
gitref: ${{ github.ref }}
push_binaries_to_pypi: ${{ inputs.push_binaries_to_pypi }}

test_label_solo: gcp-k8s-l4-solo
test_label_multi: ignore
Expand Down
8 changes: 8 additions & 0 deletions .github/workflows/nm-release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@ on:
- 'true'
- 'false'
default: 'false'
push_binaries_to_pypi:
description: "When set to yes, built wheels and tar.gz will be pushed to neuralmagic pypi if all tests pass"
type: string
default: 'no'

jobs:

Expand All @@ -19,6 +23,7 @@ jobs:
wf_category: 'RELEASE'
python: 3.8.17
gitref: ${{ github.ref }}
push_binaries_to_pypi: ${{ inputs.push_binaries_to_pypi }}

test_label_solo: gcp-k8s-l4-solo
test_label_multi: ignore
Expand All @@ -41,6 +46,7 @@ jobs:
wf_category: 'RELEASE'
python: 3.9.17
gitref: ${{ github.ref }}
push_binaries_to_pypi: ${{ inputs.push_binaries_to_pypi }}

test_label_solo: gcp-k8s-l4-solo
test_label_multi: ignore
Expand All @@ -63,6 +69,7 @@ jobs:
wf_category: 'RELEASE'
python: 3.10.12
gitref: ${{ github.ref }}
push_binaries_to_pypi: ${{ inputs.push_binaries_to_pypi }}

test_label_solo: gcp-k8s-l4-solo
test_label_multi: ignore
Expand All @@ -85,6 +92,7 @@ jobs:
wf_category: 'RELEASE'
python: 3.11.4
gitref: ${{ github.ref }}
push_binaries_to_pypi: ${{ inputs.push_binaries_to_pypi }}

test_label_solo: gcp-k8s-l4-solo
test_label_multi: ignore
Expand Down
4 changes: 4 additions & 0 deletions .github/workflows/nm-remote-push.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ jobs:
with:
python: 3.8.17
gitref: ${{ github.ref }}
push_binaries_to_pypi: 'no'

test_label_solo: gcp-k8s-l4-solo
test_label_multi: ignore
Expand All @@ -37,6 +38,7 @@ jobs:
with:
python: 3.9.17
gitref: ${{ github.ref }}
push_binaries_to_pypi: 'no'

test_label_solo: gcp-k8s-l4-solo
test_label_multi: ignore
Expand All @@ -57,6 +59,7 @@ jobs:
with:
python: 3.10.12
gitref: ${{ github.ref }}
push_binaries_to_pypi: 'no'

test_label_solo: gcp-k8s-l4-solo
test_label_multi: ignore
Expand All @@ -77,6 +80,7 @@ jobs:
with:
python: 3.11.4
gitref: ${{ github.ref }}
push_binaries_to_pypi: 'no'

test_label_solo: gcp-k8s-l4-solo
test_label_multi: ignore
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/nm-weekly.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ jobs:
wf_category: WEEKLY
python: 3.10.12
gitref: ${{ github.ref }}
push_binaries_to_pypi: 'no'

test_label_solo: aws-avx2-32G-a10g-24G
test_label_multi: aws-avx2-192G-4-a10g-96G
Expand Down

4 comments on commit 9b2e107

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

bigger_is_better

Benchmark suite Current: 9b2e107 Previous: f99739b Ratio
{"name": "request_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.0", "python_version": "3.9.17 (main, Jun 7 2023, 12:34:12) \n[GCC 11.3.0]", "torch_version": "2.3.0+cu121"} 2.477164797003392 prompts/s 2.486273818549482 prompts/s 1.00
{"name": "token_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.0", "python_version": "3.9.17 (main, Jun 7 2023, 12:34:12) \n[GCC 11.3.0]", "torch_version": "2.3.0+cu121"} 951.2312820493024 tokens/s 954.7291463230013 tokens/s 1.00

This comment was automatically generated by workflow using github-action-benchmark.

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

bigger_is_better

Benchmark suite Current: 9b2e107 Previous: f99739b Ratio
{"name": "request_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.0", "python_version": "3.8.17 (default, Jun 7 2023, 12:29:56) \n[GCC 11.3.0]", "torch_version": "2.3.0+cu121"} 2.4650892695074256 prompts/s
{"name": "token_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.0", "python_version": "3.8.17 (default, Jun 7 2023, 12:29:56) \n[GCC 11.3.0]", "torch_version": "2.3.0+cu121"} 946.5942794908514 tokens/s

This comment was automatically generated by workflow using github-action-benchmark.

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

bigger_is_better

Benchmark suite Current: 9b2e107 Previous: f99739b Ratio
{"name": "request_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.0", "python_version": "3.11.4 (main, Jun 7 2023, 11:01:02) [GCC 11.3.0]", "torch_version": "2.3.0+cu121"} 2.524427780824926 prompts/s
{"name": "token_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.0", "python_version": "3.11.4 (main, Jun 7 2023, 11:01:02) [GCC 11.3.0]", "torch_version": "2.3.0+cu121"} 969.3802678367716 tokens/s

This comment was automatically generated by workflow using github-action-benchmark.

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

bigger_is_better

Benchmark suite Current: 9b2e107 Previous: f99739b Ratio
{"name": "request_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.0", "python_version": "3.10.12 (main, Jun 7 2023, 13:43:11) [GCC 11.3.0]", "torch_version": "2.3.0+cu121"} 2.477025972502708 prompts/s
{"name": "token_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.0", "python_version": "3.10.12 (main, Jun 7 2023, 13:43:11) [GCC 11.3.0]", "torch_version": "2.3.0+cu121"} 951.1779734410399 tokens/s

This comment was automatically generated by workflow using github-action-benchmark.

Please sign in to comment.