feat(ci): save benchmark results (#395)

Syndica · Nov 25, 2024 · 82a2418 · 82a2418
1 parent 19e533c
commit 82a2418
Show file tree

Hide file tree

Showing 9 changed files with 438 additions and 137 deletions.
diff --git a/.github/workflows/check.yml b/.github/workflows/check.yml
@@ -89,7 +89,29 @@ jobs:
         run: |
           python scripts/parse_kcov.py kcov-output/test/coverage.json
 
+  gossip:
+    strategy:
+      matrix:
+        os: [ubuntu-latest]
+    runs-on: ${{matrix.os}}
+    timeout-minutes: 60
+    steps:
+      - name: checkout
+        uses: actions/checkout@v2
+        with:
+          submodules: recursive
+      - name: setup-zig
+        uses: mlugg/setup-zig@v1
+        with:
+          version: 0.13.0
+
+      - name: build release
+        run: zig build -Doptimize=ReleaseSafe
+      - name: run gossip
+        run: bash scripts/gossip_test.sh 120 # in seconds
+
   benchmarks:
+    if: ${{ github.ref != 'refs/heads/main' }}
     strategy:
       matrix:
         os: [ubuntu-latest]
@@ -104,11 +126,38 @@ jobs:
         uses: mlugg/setup-zig@v1
         with:
           version: 0.13.0
-
       - name: benchmarks
-        run: zig build -Doptimize=ReleaseSafe benchmark -- all
+        run: zig build -Doptimize=ReleaseSafe benchmark -- all --metrics
 
-  gossip:
+      # Download previous benchmark result from cache (if exists)
+      - name: Download previous benchmark data
+        uses: actions/cache@v4
+        with:
+          path: ./cache
+          key: ${{ runner.os }}-benchmark
+
+      # Run `github-action-benchmark` action
+      - name: Store benchmark result
+        uses: benchmark-action/github-action-benchmark@v1
+        with:
+          # What benchmark tool the output.txt came from
+          tool: "customSmallerIsBetter"
+          # Where the output from the benchmark tool is stored
+          output-file-path: results/output.json
+          # Where the previous data file is stored
+          external-data-json-path: ./cache/benchmark-data.json
+          # Workflow will fail when an alert happens
+          fail-on-alert: true
+          # GitHub API token to make a commit comment
+          github-token: ${{ secrets.GITHUB_TOKEN }}
+          # Enable alert commit comment
+          comment-on-alert: true
+          # Upload the updated cache file for the next job by actions/cache
+          # only when running on the main branch
+          save-data-file: false
+
+  main_benchmarks:
+    if: ${{ github.ref == 'refs/heads/main' }}
     strategy:
       matrix:
         os: [ubuntu-latest]
@@ -119,12 +168,32 @@ jobs:
         uses: actions/checkout@v2
         with:
           submodules: recursive
-      - name: setup-zig
+      - name: setup zig
         uses: mlugg/setup-zig@v1
         with:
           version: 0.13.0
+      - name: benchmarks
+        run: zig build -Doptimize=ReleaseSafe benchmark -- all --metrics
 
-      - name: build release
-        run: zig build -Doptimize=ReleaseSafe
-      - name: run gossip
-        run: bash scripts/gossip_test.sh 120 # in seconds
+      # Download previous benchmark result from cache (if exists)
+      - name: Download previous benchmark data
+        uses: actions/cache@v4
+        with:
+          path: ./cache
+          key: ${{ runner.os }}-benchmark
+
+      # Run `github-action-benchmark` action
+      - name: Store benchmark result
+        uses: benchmark-action/github-action-benchmark@v1
+        with:
+          # What benchmark tool the output.txt came from
+          tool: "customSmallerIsBetter"
+          # Where the output from the benchmark tool is stored
+          output-file-path: results/output.json
+          # Where the previous data file is stored
+          external-data-json-path: ./cache/benchmark-data.json
+          # Workflow will fail when an alert happens
+          fail-on-alert: true
+          # Upload the updated cache file for the next job by actions/cache
+          # only when running on the main branch (see if:)
+          save-data-file: true
diff --git a/docs/benchmarks.md b/docs/benchmarks.md
@@ -1,21 +1,21 @@
-# benchmarks 
+# benchmarks
 
-- run all benchmarks: `./zig-out/bin/benchmark` 
+- run all benchmarks: `./zig-out/bin/benchmark`
 - filter specific cases: `./zig-out/bin/benchmark accounts_db_readwrite`
 - benchmark results are written to csv in `results/`
     - this includes the average stats and the raw runtimes
 
 ### dev note
 
-if you want to support multiple return values, you need to include BenchTimeUnits as the first parameter 
+if you want to support multiple return values, you need to include BenchTimeUnits as the first parameter
 to know what time unit we are expecting.
 
 ### example output
 
 #### average stats
 ```
-benchmark, read_time_min, read_time_max, read_time_mean, read_time_variance, benchmark, write_time_min, write_time_max, write_time_mean, write_time_variance, 
-readWriteAccounts(100k accounts (1_slot - ram index - ram accounts)), 172156041, 158767959, 162868245, 15183799545214, 303852750, 286908417, 292925858, 39820330697776, 
+benchmark, read_time_min, read_time_max, read_time_mean, read_time_variance, benchmark, write_time_min, write_time_max, write_time_mean, write_time_variance,
+readWriteAccounts(100k accounts (1_slot - ram index - ram accounts)), 172156041, 158767959, 162868245, 15183799545214, 303852750, 286908417, 292925858, 39820330697776,
 readWriteAccounts(100k accounts (1_slot - disk index - ram accounts)), 165480250, 156170500, 160821658, 7611019088428, 319935833, 286708833, 304248199, 113169780175088,
 ```
 
@@ -47,4 +47,10 @@ python scripts/view_bench.py readWriteAccounts_runtimes.csv readWriteAccounts_ru
 
 ![example_benchmark_viz](imgs/bench_eg.png)
 - each point on y-axis=0 is a runtime
-- the point on y-axis=1 is the mean with the bar surrounding it being the standard deviation
+- the point on y-axis=1 is the mean with the bar surrounding it being the standard deviation
+
+# tracking benchmarks overtime
+
+two main scripts are used
+- `scripts/collect_benchmarks.sh` is periodically called using a cron job to run the benchmarks on new git commits
+- `scripts/benchmark_server.py` is run as a server to visualize the results over time
diff --git a/scripts/benchmark_server.py b/scripts/benchmark_server.py
@@ -0,0 +1,73 @@
+import os
+import json
+import plotly.express as px
+from dash import Dash, html, dcc, callback, Output, Input
+
+cached_timestamp = None
+cached_layout = None
+path = "results/metrics/"
+
+def server_layout():
+    layout = [
+        html.H1(children='Sig: Benchmarks', style={'textAlign':'center'}),
+    ]
+
+    # hash all the files together
+    latest_timestamp = 0
+    for file_path in os.listdir(path):
+        # results/metrics/output-$git_commit-$timestamp.json
+        # parse file name
+        timestamp = int(file_path.split("-")[2].split(".")[0])
+        latest_timestamp = max(latest_timestamp, timestamp)
+
+    if latest_timestamp == cached_timestamp:
+        return cached_layout
+
+    # for each file in the directory
+    all_metrics = {}
+    for file_path in os.listdir(path):
+        # results/metrics/output-$git_commit-$timestamp.json
+        # parse file_path name
+        commit = file_path.split("-")[1]
+        timestamp = file_path.split("-")[2].split(".")[0]
+
+        commit_metrics = json.load(open(path + file_path))
+        for metric in commit_metrics:
+            metric["timestamp"] = timestamp
+            metric["commit"] = commit
+            key = metric["name"]
+            if key in all_metrics:
+                all_metrics[key].append(metric)
+            else:
+                all_metrics[key] = [metric]
+
+    for key in all_metrics:
+        data = all_metrics[key]
+        if len(data) == 0: continue
+        title = data[0]["name"]
+        fig = px.line(
+            x=[d['timestamp'] for d in data],
+            y=[d['value'] for d in data],
+            title=title,
+            hover_data={"commit": [d['commit'] for d in data]},
+            markers=True,
+        )
+        fig.update_layout(
+            xaxis_title="Timestamp",
+            yaxis_title=data[0]["unit"],
+        )
+        layout.append(dcc.Graph(figure=fig))
+
+    cached_layout = layout
+    return layout
+
+
+app = Dash()
+# re-runs on each page refresh
+app.layout = server_layout
+
+if __name__ == '__main__':
+    if not os.path.exists(path):
+        os.makedirs(path)
+
+    app.run(debug=True, host='0.0.0.0')
diff --git a/scripts/collect_benchmarks.sh b/scripts/collect_benchmarks.sh
@@ -0,0 +1,19 @@
+# pull the latest changes
+git pull
+
+git_commit=$(git rev-parse HEAD)
+timestamp=$(date +%s)
+result_dir="results/metrics"
+result_file="${result_dir}/output-${git_commit}-*.json"
+
+if ls $result_file 1> /dev/null 2>&1; then
+  echo "Results for commit $git_commit already exist. Skipping benchmark."
+else
+  # Run the benchmark only if the result file doesn't exist
+  zig build -Doptimize=ReleaseSafe -Dno-run benchmark
+  ./zig-out/bin/benchmark --metrics all
+
+  mkdir -p "$result_dir"
+  mv results/output.json "${result_dir}/output-${git_commit}-${timestamp}.json"
+  echo "Benchmark results saved to ${result_dir}/output-${git_commit}-${timestamp}.json"
+fi