pass the correct config file

neuralmagic · Jun 23, 2024 · df3e138 · df3e138 · github-actions · Jun 24, 2024
1 parent 19163d6
commit df3e138
Show file tree

Hide file tree

Showing 4 changed files with 13 additions and 13 deletions.
diff --git a/.github/workflows/nm-nightly.yml b/.github/workflows/nm-nightly.yml
@@ -36,7 +36,7 @@ jobs:
 
             lm_eval_label_solo: gcp-k8s-l4-solo
             lm_eval_label_multi: gcp-k8s-l4-duo
-            lm_eval_configuration: ./.github/lm-eval-configs/full-small-models.yaml
+            lm_eval_configuration: ./.github/lm-eval-configs/full-small-models.txt
             lm_eval_timeout: 60
         secrets: inherit
 
@@ -59,7 +59,7 @@ jobs:
 
             lm_eval_label_solo: gcp-k8s-l4-solo
             lm_eval_label_multi: gcp-k8s-l4-duo
-            lm_eval_configuration: ./.github/lm-eval-configs/full-small-models.yaml
+            lm_eval_configuration: ./.github/lm-eval-configs/full-small-models.txt
             lm_eval_timeout: 60
         secrets: inherit
 
@@ -82,7 +82,7 @@ jobs:
 
             lm_eval_label_solo: gcp-k8s-l4-solo
             lm_eval_label_multi: gcp-k8s-l4-duo
-            lm_eval_configuration: ./.github/lm-eval-configs/full-small-models.yaml
+            lm_eval_configuration: ./.github/lm-eval-configs/full-small-models.txt
             lm_eval_timeout: 60
         secrets: inherit
 
@@ -106,6 +106,6 @@ jobs:
 
             lm_eval_label_solo: gcp-k8s-l4-solo
             lm_eval_label_multi: gcp-k8s-l4-duo
-            lm_eval_configuration: ./.github/lm-eval-configs/full-small-models.yaml
+            lm_eval_configuration: ./.github/lm-eval-configs/full-small-models.txt
             lm_eval_timeout: 60
         secrets: inherit
diff --git a/.github/workflows/nm-release.yml b/.github/workflows/nm-release.yml
@@ -32,7 +32,7 @@ jobs:
 
       lm_eval_label_solo: gcp-k8s-l4-solo
       lm_eval_label_multi: gcp-k8s-l4-duo
-      lm_eval_configuration: ./.github/lm-eval-configs/full-small-models.yaml
+      lm_eval_configuration: ./.github/lm-eval-configs/full-small-models.txt
       lm_eval_timeout: 60
     secrets: inherit
 
@@ -55,7 +55,7 @@ jobs:
 
       lm_eval_label_solo: gcp-k8s-l4-solo
       lm_eval_label_multi: gcp-k8s-l4-duo
-      lm_eval_configuration: ./.github/lm-eval-configs/full-small-models.yaml
+      lm_eval_configuration: ./.github/lm-eval-configs/full-small-models.txt
       lm_eval_timeout: 60
     secrets: inherit
 
@@ -78,7 +78,7 @@ jobs:
 
       lm_eval_label_solo: gcp-k8s-l4-solo
       lm_eval_label_multi: gcp-k8s-l4-duo
-      lm_eval_configuration: ./.github/lm-eval-configs/full-small-models.yaml
+      lm_eval_configuration: ./.github/lm-eval-configs/full-small-models.txt
       lm_eval_timeout: 60
     secrets: inherit
 
@@ -101,6 +101,6 @@ jobs:
 
       lm_eval_label_solo: gcp-k8s-l4-solo
       lm_eval_label_multi: gcp-k8s-l4-duo
-      lm_eval_configuration: ./.github/lm-eval-configs/full-small-models.yaml
+      lm_eval_configuration: ./.github/lm-eval-configs/full-small-models.txt
       lm_eval_timeout: 60
     secrets: inherit
diff --git a/.github/workflows/nm-remote-push.yml b/.github/workflows/nm-remote-push.yml
@@ -29,7 +29,7 @@ jobs:
 
             lm_eval_label_solo: gcp-k8s-l4-solo
             lm_eval_label_multi: gcp-k8s-l4-duo
-            lm_eval_configuration: ./.github/lm-eval-configs/smoke-small-models.yaml
+            lm_eval_configuration: ./.github/lm-eval-configs/smoke-small-models.txt
             lm_eval_timeout: 60
         secrets: inherit
 
@@ -50,7 +50,7 @@ jobs:
 
             lm_eval_label_solo: gcp-k8s-l4-solo
             lm_eval_label_multi: gcp-k8s-l4-duo
-            lm_eval_configuration: ./.github/lm-eval-configs/smoke-small-models.yaml
+            lm_eval_configuration: ./.github/lm-eval-configs/smoke-small-models.txt
             lm_eval_timeout: 60
         secrets: inherit
 
@@ -71,7 +71,7 @@ jobs:
 
             lm_eval_label_solo: gcp-k8s-l4-solo
             lm_eval_label_multi: gcp-k8s-l4-duo
-            lm_eval_configuration: ./.github/lm-eval-configs/smoke-small-models.yaml
+            lm_eval_configuration: ./.github/lm-eval-configs/smoke-small-models.txt
             lm_eval_timeout: 60
         secrets: inherit
 
@@ -92,6 +92,6 @@ jobs:
 
             lm_eval_label_solo: gcp-k8s-l4-solo
             lm_eval_label_multi: gcp-k8s-l4-duo
-            lm_eval_configuration: ./.github/lm-eval-configs/smoke-small-models.yaml
+            lm_eval_configuration: ./.github/lm-eval-configs/smoke-small-models.txt
             lm_eval_timeout: 60
         secrets: inherit
diff --git a/.github/workflows/nm-weekly.yml b/.github/workflows/nm-weekly.yml
@@ -36,6 +36,6 @@ jobs:
 
       lm_eval_label_solo: gcp-k8s-l4-solo
       lm_eval_label_multi: gcp-k8s-l4-duo
-      lm_eval_configuration: ./.github/lm-eval-configs/full-small-models.yaml
+      lm_eval_configuration: ./.github/lm-eval-configs/full-small-models.txt
       lm_eval_timeout: 60
     secrets: inherit
Benchmark suite	Current: `df3e138`	Previous: `f99739b`	Ratio
`{"name": "request_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.0", "python_version": "3.10.12 (main, Jun 7 2023, 13:43:11) [GCC 11.3.0]", "torch_version": "2.3.0+cu121"}`	`2.464738765365027` prompts/s
`{"name": "token_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.0", "python_version": "3.10.12 (main, Jun 7 2023, 13:43:11) [GCC 11.3.0]", "torch_version": "2.3.0+cu121"}`	`946.4596859001704` tokens/s
Benchmark suite	Current: `df3e138`	Previous: `f99739b`	Ratio
`{"name": "request_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.0", "python_version": "3.11.4 (main, Jun 7 2023, 11:01:02) [GCC 11.3.0]", "torch_version": "2.3.0+cu121"}`	`2.5349917547061427` prompts/s
`{"name": "token_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.0", "python_version": "3.11.4 (main, Jun 7 2023, 11:01:02) [GCC 11.3.0]", "torch_version": "2.3.0+cu121"}`	`973.4368338071588` tokens/s
Benchmark suite	Current: `df3e138`	Previous: `f99739b`	Ratio
`{"name": "request_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.0", "python_version": "3.9.17 (main, Jun 7 2023, 12:34:12) \n[GCC 11.3.0]", "torch_version": "2.3.0+cu121"}`	`2.459243133911894` prompts/s	`2.486273818549482` prompts/s	`1.01`
`{"name": "token_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.0", "python_version": "3.9.17 (main, Jun 7 2023, 12:34:12) \n[GCC 11.3.0]", "torch_version": "2.3.0+cu121"}`	`944.3493634221674` tokens/s	`954.7291463230013` tokens/s	`1.01`
Benchmark suite	Current: `df3e138`	Previous: `f99739b`	Ratio
`{"name": "request_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.0", "python_version": "3.8.17 (default, Jun 7 2023, 12:29:56) \n[GCC 11.3.0]", "torch_version": "2.3.0+cu121"}`	`2.4948127653833683` prompts/s
`{"name": "token_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.0", "python_version": "3.8.17 (default, Jun 7 2023, 12:29:56) \n[GCC 11.3.0]", "torch_version": "2.3.0+cu121"}`	`958.0081019072135` tokens/s