add some guards for pypi push (#327)

1. Added an input argument push_binaries_to_pypi to allow not pushing to nm pypi automatically. It's 'yes' for nightly and 'no' for release by default. 2. Require all tests to pass before uploading. --------- Co-authored-by: dhuangnm <[email protected]>
neuralmagic · Jun 23, 2024 · 9b2e107 · 9b2e107 · github-actions · Jun 24, 2024
1 parent 046eb08
commit 9b2e107
Show file tree

Hide file tree

Showing 5 changed files with 36 additions and 11 deletions.
diff --git a/.github/workflows/nm-build-test.yml b/.github/workflows/nm-build-test.yml
@@ -7,6 +7,10 @@ on:
         description: "categories: REMOTE, NIGHTLY, WEEKLY, RELEASE"
         type: string
         default: "REMOTE"
+      push_binaries_to_pypi:
+        description: "When set to true, built wheels and tar.gz will be pushed to neuralmagic pypi if all tests pass"
+        type: string
+        default: 'no'
       python:
         description: "python version, e.g. 3.10.12"
         type: string
@@ -121,17 +125,6 @@ jobs:
     #         test_skip_env_vars: ${{ inputs.test_skip_env_vars }}
     #     secrets: inherit
 
-    UPLOAD:
-        needs: [TEST-SOLO]
-        if: contains(fromJSON('["NIGHTLY", "WEEKLY", "RELEASE"]'), inputs.wf_category)
-        uses: ./.github/workflows/nm-upload-assets-to-gcp.yml
-        with:
-            label: ${{ inputs.build_label }}
-            timeout: ${{ inputs.build_timeout }}
-            gitref: ${{ github.ref }}
-            python: ${{ inputs.python }}
-        secrets: inherit
-
     BENCHMARK:
         needs: [BUILD]
         if: success()
@@ -158,3 +151,14 @@ jobs:
         whl: ${{ needs.BUILD.outputs.whl }}
         lm_eval_configuration: ${{ inputs.lm_eval_configuration }}
       secrets: inherit
+
+    UPLOAD:
+        needs: [TEST-SOLO, BENCHMARK, LM-EVAL-SOLO]
+        if: ${{ contains(fromJSON('["NIGHTLY", "WEEKLY", "RELEASE"]'), inputs.wf_category) && inputs.push_binaries_to_pypi == 'yes' }}
+        uses: ./.github/workflows/nm-upload-assets-to-gcp.yml
+        with:
+            label: ${{ inputs.build_label }}
+            timeout: ${{ inputs.build_timeout }}
+            gitref: ${{ github.ref }}
+            python: ${{ inputs.python }}
+        secrets: inherit
diff --git a/.github/workflows/nm-nightly.yml b/.github/workflows/nm-nightly.yml
@@ -14,6 +14,10 @@ on:
                     - 'true'
                     - 'false'
                 default: 'false'
+            push_binaries_to_pypi:
+                description: "When set to yes, built wheels and tar.gz will be pushed to neuralmagic pypi if all tests pass"
+                type: string
+                default: 'yes'
 
 jobs:
 
@@ -23,6 +27,7 @@ jobs:
             wf_category: NIGHTLY
             python: 3.8.17
             gitref: ${{ github.ref }}
+            push_binaries_to_pypi: ${{ inputs.push_binaries_to_pypi }}
 
             test_label_solo: gcp-k8s-l4-solo
             test_label_multi: ignore
@@ -45,6 +50,7 @@ jobs:
             wf_category: NIGHTLY
             python: 3.9.17
             gitref: ${{ github.ref }}
+            push_binaries_to_pypi: ${{ inputs.push_binaries_to_pypi }}
 
             test_label_solo: gcp-k8s-l4-solo
             test_label_multi: ignore
@@ -67,6 +73,7 @@ jobs:
             wf_category: NIGHTLY
             python: 3.10.12
             gitref: ${{ github.ref }}
+            push_binaries_to_pypi: ${{ inputs.push_binaries_to_pypi }}
 
             test_label_solo: gcp-k8s-l4-solo
             test_label_multi: ignore
@@ -89,6 +96,7 @@ jobs:
             wf_category: NIGHTLY
             python: 3.11.4
             gitref: ${{ github.ref }}
+            push_binaries_to_pypi: ${{ inputs.push_binaries_to_pypi }}
 
             test_label_solo: gcp-k8s-l4-solo
             test_label_multi: ignore

diff --git a/.github/workflows/nm-release.yml b/.github/workflows/nm-release.yml
@@ -10,6 +10,10 @@ on:
           - 'true'
           - 'false'
         default: 'false'
+      push_binaries_to_pypi:
+        description: "When set to yes, built wheels and tar.gz will be pushed to neuralmagic pypi if all tests pass"
+        type: string
+        default: 'no'
 
 jobs:
 
@@ -19,6 +23,7 @@ jobs:
       wf_category: 'RELEASE'
       python: 3.8.17
       gitref: ${{ github.ref }}
+      push_binaries_to_pypi: ${{ inputs.push_binaries_to_pypi }}
 
       test_label_solo: gcp-k8s-l4-solo
       test_label_multi: ignore
@@ -41,6 +46,7 @@ jobs:
       wf_category: 'RELEASE'
       python: 3.9.17
       gitref: ${{ github.ref }}
+      push_binaries_to_pypi: ${{ inputs.push_binaries_to_pypi }}
 
       test_label_solo: gcp-k8s-l4-solo
       test_label_multi: ignore
@@ -63,6 +69,7 @@ jobs:
       wf_category: 'RELEASE'
       python: 3.10.12
       gitref: ${{ github.ref }}
+      push_binaries_to_pypi: ${{ inputs.push_binaries_to_pypi }}
 
       test_label_solo: gcp-k8s-l4-solo
       test_label_multi: ignore
@@ -85,6 +92,7 @@ jobs:
       wf_category: 'RELEASE'
       python: 3.11.4
       gitref: ${{ github.ref }}
+      push_binaries_to_pypi: ${{ inputs.push_binaries_to_pypi }}
 
       test_label_solo: gcp-k8s-l4-solo
       test_label_multi: ignore

diff --git a/.github/workflows/nm-remote-push.yml b/.github/workflows/nm-remote-push.yml
@@ -17,6 +17,7 @@ jobs:
         with:
             python: 3.8.17
             gitref: ${{ github.ref }}
+            push_binaries_to_pypi: 'no'
 
             test_label_solo: gcp-k8s-l4-solo
             test_label_multi: ignore
@@ -37,6 +38,7 @@ jobs:
         with:
             python: 3.9.17
             gitref: ${{ github.ref }}
+            push_binaries_to_pypi: 'no'
 
             test_label_solo: gcp-k8s-l4-solo
             test_label_multi: ignore
@@ -57,6 +59,7 @@ jobs:
         with:
             python: 3.10.12
             gitref: ${{ github.ref }}
+            push_binaries_to_pypi: 'no'
 
             test_label_solo: gcp-k8s-l4-solo
             test_label_multi: ignore
@@ -77,6 +80,7 @@ jobs:
         with:
             python: 3.11.4
             gitref: ${{ github.ref }}
+            push_binaries_to_pypi: 'no'
 
             test_label_solo: gcp-k8s-l4-solo
             test_label_multi: ignore

diff --git a/.github/workflows/nm-weekly.yml b/.github/workflows/nm-weekly.yml
@@ -23,6 +23,7 @@ jobs:
       wf_category: WEEKLY
       python: 3.10.12
       gitref: ${{ github.ref }}
+      push_binaries_to_pypi: 'no'
 
       test_label_solo: aws-avx2-32G-a10g-24G
       test_label_multi: aws-avx2-192G-4-a10g-96G
Benchmark suite	Current: `9b2e107`	Previous: `f99739b`	Ratio
`{"name": "request_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.0", "python_version": "3.9.17 (main, Jun 7 2023, 12:34:12) \n[GCC 11.3.0]", "torch_version": "2.3.0+cu121"}`	`2.477164797003392` prompts/s	`2.486273818549482` prompts/s	`1.00`
`{"name": "token_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.0", "python_version": "3.9.17 (main, Jun 7 2023, 12:34:12) \n[GCC 11.3.0]", "torch_version": "2.3.0+cu121"}`	`951.2312820493024` tokens/s	`954.7291463230013` tokens/s	`1.00`
Benchmark suite	Current: `9b2e107`	Previous: `f99739b`	Ratio
`{"name": "request_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.0", "python_version": "3.8.17 (default, Jun 7 2023, 12:29:56) \n[GCC 11.3.0]", "torch_version": "2.3.0+cu121"}`	`2.4650892695074256` prompts/s
`{"name": "token_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.0", "python_version": "3.8.17 (default, Jun 7 2023, 12:29:56) \n[GCC 11.3.0]", "torch_version": "2.3.0+cu121"}`	`946.5942794908514` tokens/s
Benchmark suite	Current: `9b2e107`	Previous: `f99739b`	Ratio
`{"name": "request_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.0", "python_version": "3.11.4 (main, Jun 7 2023, 11:01:02) [GCC 11.3.0]", "torch_version": "2.3.0+cu121"}`	`2.524427780824926` prompts/s
`{"name": "token_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.0", "python_version": "3.11.4 (main, Jun 7 2023, 11:01:02) [GCC 11.3.0]", "torch_version": "2.3.0+cu121"}`	`969.3802678367716` tokens/s
Benchmark suite	Current: `9b2e107`	Previous: `f99739b`	Ratio
`{"name": "request_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.0", "python_version": "3.10.12 (main, Jun 7 2023, 13:43:11) [GCC 11.3.0]", "torch_version": "2.3.0+cu121"}`	`2.477025972502708` prompts/s
`{"name": "token_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.0", "python_version": "3.10.12 (main, Jun 7 2023, 13:43:11) [GCC 11.3.0]", "torch_version": "2.3.0+cu121"}`	`951.1779734410399` tokens/s