mlcommons · arjunsuresh · Nov 19, 2024 · Nov 8, 2024 · Nov 8, 2024 · Nov 8, 2024
@@ -2,11 +2,11 @@ name: MLPerf Inference AMD implementations
 
 on:
   schedule:
-    - cron: "29 4 * * *" #to be adjusted
+    - cron: "46 11 * * *" #to be adjusted
 
 jobs:
-  build_nvidia:
-      if: github.repository_owner == 'gateoverflow'
+  run_amd:
+      if: github.repository_owner == 'gateoverflow_off'
       runs-on: [ self-hosted, linux, x64, GO-spr ]
       strategy:
         fail-fast: false
@@ -16,11 +16,11 @@ jobs:
       steps:
       - name: Test MLPerf Inference AMD (build only) ${{ matrix.model }}
         run: |
-          if [ -f "gh_action_conda/bin/deactivate" ]; then source gh_action_conda/bin/deactivate; fi
-          python3 -m venv gh_action_conda
-          source gh_action_conda/bin/activate
+          if [ -f "gh_action/bin/deactivate" ]; then source gh_action/bin/deactivate; fi
+          python3 -m venv gh_action
+          source gh_action/bin/activate
           export CM_REPOS=$HOME/GH_CM
           pip install --upgrade cm4mlops
-          pip install tabulate
-          cm run script --tags=run-mlperf,inference,_all-scenarios,_full,_r4.1-dev  --execution_mode=valid  --pull_changes=yes --pull_inference_changes=yes --model=${{ matrix.model }} --submitter="MLCommons" --hw_name=IntelSPR.24c  --implementation=amd    --backend=pytorch    --category=datacenter --division=open --scenario=Offline  --docker_dt=yes --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --adr.compiler.tags=gcc --device=rocm  --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --clean  --docker --quiet
+          cm pull repo
+          cm run script --tags=run-mlperf,inference,_all-scenarios,_full,_r4.1-dev  --execution_mode=valid  --pull_changes=yes --pull_inference_changes=yes --model=${{ matrix.model }} --submitter="MLCommons" --hw_name=IntelSPR.24c  --implementation=amd    --backend=pytorch    --category=datacenter --division=open --scenario=Offline  --docker_dt=yes --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --adr.compiler.tags=gcc --device=rocm --use_dataset_from_host=yes  --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --clean  --docker --quiet --docker_skip_run_cmd=yes
           # cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/mlperf_inference_unofficial_submissions_v5.0 --repo_branch=main --commit_message="Results from GH action on SPR.24c" --quiet --submission_dir=$HOME/gh_action_submissions --hw_name=IntelSPR.24c
@@ -17,14 +17,13 @@ jobs:
       matrix:
         os: [ubuntu-latest, windows-latest, macos-latest]
         python-version: [ "3.12" ]
-        division: ["closed", "open"]
+        division: ["closed", "open", "closed-open"]
         category: ["datacenter", "edge"]
-        case: ["case-3", "case-7"]
+        case: ["case-3", "case-7", "case-8"]
         action: ["run", "docker"]
         exclude:
           - os: macos-latest
           - os: windows-latest
-          - division: "open"
           - category: "edge"
     steps:
     - uses: actions/checkout@v4
@@ -47,6 +46,9 @@ jobs:
         elif [ "${{ matrix.case }}" == "case-7" ]; then
           #results_dir="submission_generation_tests/case-7/"
           description="Submission generation (sut_info.json incomplete, SUT folder name in required format)"
+        elif [ "${{ matrix.case }}" == "case-8" ]; then
+          #results_dir="submission_generation_tests/case-8/"
+          description="Submission generation (system_meta.json not found in results folder)"
         fi
         # Dynamically set the log group to simulate a dynamic step name
         echo "::group::$description"

@@ -2,10 +2,10 @@ name: MLPerf Inference Intel implementations
 
 on:
   schedule:
-    - cron: "29 1 * * *" #to be adjusted
+    - cron: "29 1 * * 4" #to be adjusted
 
 jobs:
-  build_nvidia:
+  run_intel:
       if: github.repository_owner == 'gateoverflow'
       runs-on: [ self-hosted, linux, x64, GO-spr ]
       strategy:
@@ -22,5 +22,5 @@ jobs:
           export CM_REPOS=$HOME/GH_CM
           pip install --upgrade cm4mlops
           pip install tabulate
-          cm run script --tags=run-mlperf,inference,_all-scenarios,_submission,_full,_r4.1-dev --preprocess_submission=yes --execution_mode=valid  --pull_changes=yes --pull_inference_changes=yes --model=${{ matrix.model }} --submitter="MLCommons" --hw_name=IntelSPR.24c  --implementation=intel    --backend=pytorch    --category=datacenter --division=open --scenario=Offline  --docker_dt=yes --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --adr.compiler.tags=gcc --device=cpu  --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --clean  --docker --quiet
+          cm run script --tags=run-mlperf,inference,_all-scenarios,_submission,_full,_r4.1-dev --preprocess_submission=yes --execution_mode=valid  --pull_changes=yes --pull_inference_changes=yes --model=${{ matrix.model }} --submitter="MLCommons" --hw_name=IntelSPR.24c  --implementation=intel    --backend=pytorch    --category=datacenter --division=open --scenario=Offline  --docker_dt=yes --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --adr.compiler.tags=gcc --device=cpu --use_dataset_from_host=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --clean  --docker --quiet
           cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/mlperf_inference_unofficial_submissions_v5.0 --repo_branch=main --commit_message="Results from GH action on SPR.24c" --quiet --submission_dir=$HOME/gh_action_submissions --hw_name=IntelSPR.24c
@@ -5,7 +5,7 @@ name: MLPerf inference LLAMA 2 70B
 
 on:
   schedule:
-    - cron: "30 2 * * 4"
+    - cron: "59 04 * * *"
 
 jobs:
   build_reference:
@@ -17,9 +17,10 @@ jobs:
         python-version: [ "3.12" ]
         backend: [ "pytorch" ]
         device: [ "cpu" ]
+        precision: [ "bfloat16" ]
 
     steps:
-    - name: Install dependencies
+    - name: Test MLPerf Inference LLAMA 2 70B reference implementation
       run: |
         source gh_action/bin/deactivate || python3 -m venv gh_action
         source gh_action/bin/activate
@@ -28,7 +29,7 @@ jobs:
         pip install tabulate 
         cm pull repo
         pip install "huggingface_hub[cli]"
+        git config --global credential.helper store
         huggingface-cli login --token ${{ secrets.HF_TOKEN }} --add-to-git-credential
-    - name: Test MLPerf Inference LLAMA 2 70B reference implementation
-      run: |
-        cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --model=llama2-70b-99 --implementation=reference --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }}  --docker --quiet --test_query_count=1 --target_qps=1 --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --adr.compiler.tags=gcc --hw_name=gh_action --docker_dt=yes  --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --env.CM_MLPERF_MODEL_LLAMA2_70B_DOWNLOAD_TO_HOST=yes --adr.inference-src.tags=_repo.https://github.com/anandhu-eng/inference.git --clean
+        cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --model=llama2-70b-99 --implementation=reference --backend=${{ matrix.backend }} --precision=${{  matrix.precision }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }}  --docker --quiet --test_query_count=1 --target_qps=0.001 --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --adr.compiler.tags=gcc --hw_name=gh_action --docker_dt=yes  --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --env.CM_MLPERF_MODEL_LLAMA2_70B_DOWNLOAD_TO_HOST=yes  --clean
+        cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/mlperf_inference_test_submissions_v5.0 --repo_branch=main --commit_message="Results from self hosted Github actions" --quiet --submission_dir=$HOME/gh_action_submissions
@@ -5,18 +5,19 @@ name: MLPerf inference MIXTRAL-8x7B
 
 on:
   schedule:
-    - cron: "45 10 * * *"   # 30th minute and 20th hour => 20:30 UTC => 2 AM IST 
+    - cron: "32 22 * * *"   # 30th minute and 20th hour => 20:30 UTC => 2 AM IST 
 
 jobs:
   build_reference:
     if: github.repository_owner == 'gateoverflow'
-    runs-on: [ self-hosted, GO-spr, linux, x64 ]
+    runs-on: [ self-hosted, phoenix, linux, x64 ]
     strategy:
       fail-fast: false
       matrix:
         python-version: [ "3.12" ]
         backend: [ "pytorch" ]
         device: [ "cpu" ]
+        precision: [ "float16" ]
 
     steps:
     - name: Test MLPerf Inference MIXTRAL-8X7B reference implementation
@@ -26,7 +27,8 @@ jobs:
         export CM_REPOS=$HOME/GH_CM
         pip install cm4mlops
         pip install "huggingface_hub[cli]"
+        git config --global credential.helper store
         huggingface-cli login --token ${{ secrets.HF_TOKEN }} --add-to-git-credential
         cm pull repo
-        cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --model=mixtral-8x7b --implementation=reference --batch_size=1 --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --adr.compiler.tags=gcc --hw_name=gh_action --docker_dt=yes  --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --docker --quiet --test_query_count=1 --target_qps=1 --clean --env.CM_MLPERF_MODEL_MIXTRAL_8X7B_DOWNLOAD_TO_HOST=yes --env.CM_MLPERF_DATASET_MIXTRAL_8X7B_DOWNLOAD_TO_HOST=yes
-        cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/mlperf_inference_test_submissions_v5.0 --repo_branch=main --commit_message="Results from self hosted Github actions - GO-i9" --quiet --submission_dir=$HOME/gh_action_submissions
+        cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --model=mixtral-8x7b --implementation=reference --batch_size=1 --precision=${{ matrix.precision }} --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --adr.compiler.tags=gcc --hw_name=gh_action --docker_dt=yes  --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --docker --quiet --test_query_count=1 --target_qps=0.001 --clean --env.CM_MLPERF_MODEL_MIXTRAL_8X7B_DOWNLOAD_TO_HOST=yes --env.CM_MLPERF_DATASET_MIXTRAL_8X7B_DOWNLOAD_TO_HOST=yes
+        cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/mlperf_inference_test_submissions_v5.0 --repo_branch=main --commit_message="Results from self hosted Github actions - GO-phoenix" --quiet --submission_dir=$HOME/gh_action_submissions
@@ -1,7 +1,7 @@
 name: MLPerf inference SDXL
 on:
   schedule:
-    - cron: "30 2 * * *"
+    - cron: "19 17 * * *"
 
 jobs:
   build_reference:
@@ -21,5 +21,5 @@ jobs:
         export CM_REPOS=$HOME/GH_CM
         python3 -m pip install cm4mlops
         cm pull repo
-        cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --docker --model=sdxl --backend=${{ matrix.backend }} --device=cuda --scenario=Offline --test_query_count=1 --precision=${{ matrix.precision }} --target_qps=1 --quiet --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --adr.compiler.tags=gcc --hw_name=gh_action --docker_dt=yes  --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions  --env.CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes --clean
+        cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --docker --model=sdxl --backend=${{ matrix.backend }} --device=cuda --scenario=Offline --test_query_count=1 --precision=${{ matrix.precision }} --adr.mlperf-implementation.tags=_branch.dev  --quiet --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --adr.compiler.tags=gcc --hw_name=gh_action --docker_dt=yes  --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions  --env.CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes --clean
         cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/mlperf_inference_test_submissions_v5.0 --repo_branch=main --commit_message="Results from self hosted Github actions - NVIDIARTX4090" --quiet --submission_dir=$HOME/gh_action_submissions
@@ -2,25 +2,43 @@ name: MLPerf Inference Nvidia implementations
 
 on:
   schedule:
-    - cron: "49 19 * * *" #to be adjusted
+    - cron: "54 22 * * *" #to be adjusted
 
 jobs:
-  build_nvidia:
+  run_nvidia:
       if: github.repository_owner == 'gateoverflow'
-      runs-on: [ self-hosted, linux, x64, GO-spr ]
+      runs-on:
+       - self-hosted
+       - linux
+       - x64
+       - cuda
+       - ${{ matrix.system }}
       strategy:
         fail-fast: false
         matrix:
+          system: [ "GO-spr", "phoenix", "i9" ] 
           python-version: [ "3.12" ]
-          model: [ "resnet50",  "retinanet",  "bert-99", "bert-99.9", "gptj-99.9", "3d-unet-99.9" ]
+          model: [ "resnet50",  "retinanet",  "bert-99", "bert-99.9", "gptj-99.9", "3d-unet-99.9", "sdxl" ]
+          exclude:
+           - model: gptj-99.9
+
       steps:
       - name: Test MLPerf Inference NVIDIA ${{ matrix.model }}
         run: |
+          # Set hw_name based on matrix.system
+          if [ "${{ matrix.system }}" = "GO-spr" ]; then
+            hw_name="RTX4090x2"
+          else
+            hw_name="RTX4090x1"
+          fi
+
           if [ -f "gh_action/bin/deactivate" ]; then source gh_action/bin/deactivate; fi
           python3 -m venv gh_action
           source gh_action/bin/activate
           export CM_REPOS=$HOME/GH_CM
           pip install --upgrade cm4mlops
-          pip install tabulate
-          cm run script --tags=run-mlperf,inference,_all-scenarios,_submission,_full,_r4.1-dev --preprocess_submission=yes --execution_mode=valid --gpu_name=rtx_4090 --pull_changes=yes --pull_inference_changes=yes --model=${{ matrix.model }} --submitter="MLCommons" --hw_name=RTX4090x2  --implementation=nvidia    --backend=tensorrt    --category=datacenter,edge --division=closed  --docker_dt=yes --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --adr.compiler.tags=gcc --device=cuda --use_dataset_from_host=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --clean  --docker --quiet
-          cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/mlperf_inference_unofficial_submissions_v5.0 --repo_branch=main --commit_message="Results from GH action on NVIDIA_RTX4090x2" --quiet --submission_dir=$HOME/gh_action_submissions --hw_name=RTX4090x2
+          cm pull repo
+
+          cm run script --tags=run-mlperf,inference,_all-scenarios,_submission,_full,_r4.1-dev --preprocess_submission=yes --adr.submission-checker-src.tags=_branch.dev --execution_mode=valid --gpu_name=rtx_4090 --pull_changes=yes --pull_inference_changes=yes --model=${{ matrix.model }} --submitter="MLCommons" --hw_name=$hw_name --implementation=nvidia --backend=tensorrt --category=datacenter,edge --division=closed  --docker_dt=yes --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --adr.compiler.tags=gcc --device=cuda --use_dataset_from_host=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --clean  --docker --quiet
+
+          cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/mlperf_inference_unofficial_submissions_v5.0 --repo_branch=main --commit_message="Results from GH action on NVIDIA_$hw_name" --quiet --submission_dir=$HOME/gh_action_submissions --hw_name=$hw_name
@@ -2,7 +2,7 @@ name: MLPerf inference SDXL (SCC)
 
 on:
   schedule:
-    - cron: "35 19 * * *"
+    - cron: "20 01 * * *"
 
 jobs:
   build_reference:
@@ -54,5 +54,5 @@ jobs:
           cm pull repo
           cm run script --tags=run-mlperf,inference,_find-performance,_r4.1-dev,_short,_scc24-base --pull_changes=yes --model=sdxl --implementation=nvidia --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --precision=${{ matrix.precision }} --docker --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --docker_dt=yes --pull_changes --quiet --results_dir=$HOME/scc_gh_action_results --submission_dir=$HOME/scc_gh_action_submissions --env.CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes --hw_name=go-spr --custom_system_nvidia=yes --clean
           cm run script --tags=run-mlperf,inference,_r4.1-dev,_short,_scc24-base --model=sdxl --implementation=nvidia --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --precision=${{ matrix.precision }} --docker --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --docker_dt=yes --quiet --results_dir=$HOME/scc_gh_action_results --submission_dir=$HOME/scc_gh_action_submissions --precision=float16 --env.CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes --clean
-          cm run script --tags=generate,inference,submission --clean --preprocess_submission=yes --run-checker --tar=yes --env.CM_TAR_OUTFILE=submission.tar.gz --division=open --category=datacenter --run_style=test --adr.submission-checker.tags=_short-run --quiet --submitter=MLCommons --submission_dir=$HOME/scc_gh_action_submissions --results_dir=$HOME/scc_gh_action_results/test_results
+          cm run script --tags=generate,inference,submission --clean --run-checker --tar=yes --env.CM_TAR_OUTFILE=submission.tar.gz --division=open --category=datacenter --run_style=test --adr.submission-checker.tags=_short-run --quiet --submitter=MLCommons --submission_dir=$HOME/scc_gh_action_submissions --results_dir=$HOME/scc_gh_action_results/test_results
           cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/cm4mlperf-inference --repo_branch=mlperf-inference-results-scc24 --commit_message="Results from self hosted Github actions - NVIDIARTX4090" --quiet --submission_dir=$HOME/scc_gh_action_submissions
@@ -1 +1 @@
-0.3.25
+0.3.26