Fix nightly - 03/18/2024 (vllm-project#136)

SUMMARY: Miscellaneous changes to fix nightly: * Benchmarks: - Add a benchmark name so the alert-triggering is correct - Don't skip `github-action-benchmark` failure based on previous failure - Shorten metric names so `github-action-benchmark` doesn't hit the github comment size threshold * Nightly-SOLO: - Fix code-coverage artifact name * Misc: - Add extra information to `github-action-benchmark` JSON that is useful in the UI TEST PLAN: Manual testing --------- Co-authored-by: Varun Sundar Rabindranath <[email protected]>
yukavio · Mar 18, 2024 · f90ec1c · f90ec1c
1 parent 800ef55
commit f90ec1c
Show file tree

Hide file tree

Showing 4 changed files with 26 additions and 4 deletions.
diff --git a/.github/actions/nm-github-action-benchmark/action.yml b/.github/actions/nm-github-action-benchmark/action.yml
@@ -1,6 +1,8 @@
 name: nm github-action benchmark
 description: 'Use github-action-benchmark to visualize input json'
 inputs:
+  gh_action_benchmark_name:
+    description: "Name of the benchmark. Metrics are grouped by benchmark names. github_action_benchmark alert-trigger looks for the previous benchmark value in the benchmark-name group on the previous commit"
   gh_action_benchmark_json_file_path:
     description: "Path to the benchmark json file to upload (Note that this JSON should be in a `github-action-benchmark` consumable format - This is typically the output of  neuralmagic/benchmarks/scripts/logging/gha_benchmark_logging.py)"
     type: string
@@ -33,6 +35,7 @@ runs:
     - name: push to gh pages
       uses: benchmark-action/github-action-benchmark@v1
       with:
+        name: ${{ inputs.gh_action_benchmark_name }}
         output-file-path: ${{ inputs.gh_action_benchmark_json_file_path }}
         tool: ${{ inputs.gh_action_benchmark_tool }}
         gh-pages-branch: ${{ inputs.gh_pages_branch }}
@@ -43,7 +46,7 @@ runs:
         # Add a commit comment comparing the current benchmark with the previous.
         comment-always: true
         # Create an alert when some value has regressed more than 10% 
-        alert-threshold: "10%"
+        alert-threshold: "110%"
         # Mark the workflow as a failure when some alert is triggered
         fail-on-alert: true
         # Add a commit comment describing what triggered the alert

diff --git a/.github/workflows/build-test.yml b/.github/workflows/build-test.yml
@@ -139,7 +139,7 @@ jobs:
               uses: actions/upload-artifact@v4
               if: success() || failure()
               with:
-                name: cc-nm-vllm-html
+                name: ${{ github.run_id }}-${{ inputs.label }}-cc-nm-vllm-html
                 path: cc-nm-vllm-html
                 retention-days: 15
 

diff --git a/.github/workflows/nm-benchmark.yml b/.github/workflows/nm-benchmark.yml
@@ -182,7 +182,9 @@ jobs:
 
       - name: nm-github-action-benchmark(bigger_is_better.json)
         uses: ./.github/actions/nm-github-action-benchmark
+        if: success() || failure()
         with:
+          gh_action_benchmark_name: "bigger_is_better"
           gh_action_benchmark_json_file_path:  "downloads/bigger_is_better.json"
           gh_action_benchmark_tool: "customBiggerIsBetter"
           gh_pages_branch: "nm-gh-pages"
@@ -191,7 +193,9 @@ jobs:
 
       - name: nm-github-action-benchmark(smaller_is_better.json)
         uses: ./.github/actions/nm-github-action-benchmark
+        if: success() || failure()
         with:
+          gh_action_benchmark_name: "smaller_is_better"
           gh_action_benchmark_json_file_path:  "downloads/smaller_is_better.json"
           gh_action_benchmark_tool: "customSmallerIsBetter"
           gh_pages_branch: "nm-gh-pages"

diff --git a/neuralmagic/benchmarks/scripts/logging/gha_benchmark_logging.py b/neuralmagic/benchmarks/scripts/logging/gha_benchmark_logging.py
@@ -41,23 +41,38 @@ def extra_from_benchmark_result(br: BenchmarkResult) -> dict:
             br.get(BenchmarkResult.SCRIPT_NAME_KEY_),
             BenchmarkResult.SCRIPT_ARGS_KEY_:
             br.get(BenchmarkResult.SCRIPT_ARGS_KEY_),
+            BenchmarkResult.DATE_KEY_:
+            br.get(BenchmarkResult.DATE_KEY_),
+            BenchmarkResult.MODEL_KEY_:
+            br.get(BenchmarkResult.MODEL_KEY_),
+            BenchmarkResult.DATASET_KEY_:
+            br.get(BenchmarkResult.DATASET_KEY_)
         }
         return extra
 
     @staticmethod
     def from_metric_template(metric_template: MetricTemplate, extra: dict):
         # Unique names map to unique charts / benchmarks. Pass it as a JSON
         # string with enough information so we may deconstruct it at the UI.
+        # TODO (varun) : Convert all additional information in name into a hash
+        # if this becomes too cumbersome.
+        benchmarking_context = \
+                extra.get(BenchmarkResult.BENCHMARKING_CONTEXT_KEY_)
         name = {
             "name":
             metric_template.key,
             BenchmarkResult.DESCRIPTION_KEY_:
             extra.get(BenchmarkResult.DESCRIPTION_KEY_),
             BenchmarkResult.GPU_DESCRIPTION_KEY_:
             extra.get(BenchmarkResult.GPU_DESCRIPTION_KEY_),
-            BenchmarkResult.BENCHMARKING_CONTEXT_KEY_:
-            extra.get(BenchmarkResult.BENCHMARKING_CONTEXT_KEY_)
+            "vllm_version":
+            benchmarking_context.get("vllm_version"),
+            "python_version":
+            benchmarking_context.get("python_version"),
+            "torch_version":
+            benchmarking_context.get("torch_version")
         }
+
         return GHARecord(name=f"{json.dumps(name)}",
                          unit=metric_template.unit,
                          value=metric_template.value,