From 2fa9569dd9e35eaf9dc843889a1fce15d1a9dfc3 Mon Sep 17 00:00:00 2001
From: Songki Choi <songki.choi@intel.com>
Date: Fri, 5 Jan 2024 15:08:37 +0900
Subject: [PATCH] Add performance benchmark github action workflow (#2762)

---
 .github/workflows/perf.yml             | 79 ++++++++++++++++++++++++++
 .github/workflows/run_tests_in_tox.yml |  1 +
 tests/perf/conftest.py                 | 18 ++++--
 3 files changed, 93 insertions(+), 5 deletions(-)
 create mode 100644 .github/workflows/perf.yml

diff --git a/.github/workflows/perf.yml b/.github/workflows/perf.yml
new file mode 100644
index 00000000000..7e494e86e80
--- /dev/null
+++ b/.github/workflows/perf.yml
@@ -0,0 +1,79 @@
+name: Performance Benchmark Test
+
+on:
+  workflow_dispatch: # run on request (no need for PR)
+    inputs:
+      benchmark-type:
+        type: choice
+        description: Benchmark type
+        options:
+        - accuracy
+        - speed
+        required: true
+      model-type:
+        type: choice
+        description: Model type to run benchmark
+        options:
+        - default  # speed, balance, accuracy models only
+        - all      # default + other models
+        default: all
+      data-size:
+        type: choice
+        description: Dataset size to run benchmark
+        options:
+        - small
+        - medium
+        - large
+        - all
+        default: all
+      num-repeat:
+        description: Overrides default per-data-size number of repeat setting
+        default: 0
+      num-epoch:
+        description: Overrides default per-model number of epoch setting
+        default: 0
+      eval-upto:
+        type: choice
+        description: The last operation to evaluate. 'optimize' means all.
+        options:
+        - train
+        - export
+        - optimize
+        default: train
+
+jobs:
+  Regression-Tests:
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - toxenv_task: "iseg"
+            task: "instance_segmentation"
+          - toxenv_task: "seg"
+            task: "semantic_segmentation"
+          - toxenv_task: "det"
+            task: "detection"
+          - toxenv_task: "ano"
+            task: "anomaly"
+          - toxenv_task: "cls"
+            task: "classification"
+    name: Perf-Test-py310-${{ matrix.toxenv_task }}
+    uses: ./.github/workflows/run_tests_in_tox.yml
+    with:
+      python-version: "3.10"
+      toxenv-pyver: "py310"
+      toxenv-task: ${{ matrix.toxenv_task }}
+      tests-dir: >
+        tests/perf/test_${{ matrix.task }}.py
+        -k ${{ inputs.benchmark-type }}
+        --model-type ${{ inputs.model-type }}
+        --data-root /home/validation/data/new/
+        --data-size ${{ inputs.data-size }}
+        --num-repeat ${{ inputs.num-repeat }}
+        --num-epoch ${{ inputs.num-epoch }}
+        --summary-csv .tox/perf-${ inputs.benchmark-type }}-benchmark-${{ matrix.toxenv_task }}.csv
+      runs-on: "['self-hosted', 'Linux', 'X64', 'dmount']"
+      task: ${{ matrix.task }}
+      timeout-minutes: 8640
+      upload-artifact: true
+      artifact-prefix: perf-${{ inputs.benchmark-type }}-benchmark
diff --git a/.github/workflows/run_tests_in_tox.yml b/.github/workflows/run_tests_in_tox.yml
index df3829d6d71..40556df609d 100644
--- a/.github/workflows/run_tests_in_tox.yml
+++ b/.github/workflows/run_tests_in_tox.yml
@@ -59,5 +59,6 @@ jobs:
           path: |
             .tox/tests-${{ inputs.toxenv-task }}-${{ inputs.toxenv-pyver }}-${{ inputs.toxenv-ptver }}.csv
             .tox/tests-reg_${{ inputs.task }}*.csv
+            .tox/perf-*.csv
         # Use always() to always run this step to publish test results when there are test failures
         if: ${{ inputs.upload-artifact && always() }}
diff --git a/tests/perf/conftest.py b/tests/perf/conftest.py
index 0d831d50dd1..d4be1d7e5ef 100644
--- a/tests/perf/conftest.py
+++ b/tests/perf/conftest.py
@@ -61,8 +61,12 @@ def pytest_addoption(parser):
     parser.addoption(
         "--output-root",
         action="store",
-        default="exp/perf",
-        help="Output root directory.",
+        help="Output root directory. Defaults to temp directory.",
+    )
+    parser.addoption(
+        "--summary-csv",
+        action="store",
+        help="Path to output summary cvs file. Defaults to {output-root}/benchmark-summary.csv",
     )
     parser.addoption(
         "--dry-run",
@@ -73,9 +77,11 @@ def pytest_addoption(parser):
 
 
 @pytest.fixture(scope="session")
-def fxt_output_root(request: pytest.FixtureRequest) -> Path:
+def fxt_output_root(request: pytest.FixtureRequest, tmp_path_factory: pytest.TempPathFactory) -> Path:
     """Output root + date + short commit hash."""
     output_root = request.config.getoption("--output-root")
+    if output_root is None:
+        output_root = tmp_path_factory.mktemp("otx-benchmark")
     data_str = datetime.now().strftime("%Y%m%d-%H%M%S")
     commit_str = subprocess.check_output(["git", "rev-parse", "--short", "HEAD"]).decode("ascii").strip()
     return Path(output_root) / (data_str + "-" + commit_str)
@@ -134,13 +140,15 @@ def fxt_benchmark(request: pytest.FixtureRequest, fxt_output_root: Path) -> OTXB
 
 
 @pytest.fixture(scope="session", autouse=True)
-def fxt_benchmark_summary(fxt_output_root: Path):
+def fxt_benchmark_summary(request: pytest.FixtureRequest, fxt_output_root: Path):
     """Summarize all results at the end of test session."""
     yield
     all_results = OTXBenchmark.load_result(fxt_output_root)
     if all_results is not None:
         print("=" * 20, "[Benchmark summary]")
         print(all_results)
-        output_path = fxt_output_root / "benchmark-summary.csv"
+        output_path = request.config.getoption("--summary-csv")
+        if not output_path:
+            output_path = fxt_output_root / "benchmark-summary.csv"
         all_results.to_csv(output_path, index=False)
         print(f"  -> Saved to {output_path}.")