Merge pull request #495 from GATEOverflow/mlperf-inference

Fixes for Nvidia MLPerf inference gptj,sdxl
mlcommons · Nov 7, 2024 · 05f4bf8 · 05f4bf8
2 parents fb357c0 + 69daefe
commit 05f4bf8
Show file tree

Hide file tree

Showing 5 changed files with 32 additions and 5 deletions.
diff --git a/.github/workflows/test-mlperf-inference-intel b/.github/workflows/test-mlperf-inference-intel
@@ -0,0 +1,25 @@
+name: MLPerf Inference Intel implementations
+
+on:
+  schedule:
+    - cron: "49 2 * * *" #to be adjusted
+
+jobs:
+  build_nvidia:
+      if: github.repository_owner == 'gateoverflow'
+      runs-on: [ self-hosted, linux, x64, GO-spr ]
+      strategy:
+        fail-fast: false
+        matrix:
+          python-version: [ "3.12" ]
+          model: [ "resnet50", "bert-99" ]
+      steps:
+      - name: Test MLPerf Inference Intel ${{ matrix.model }}
+        run: |
+          if [ -f "gh_action/bin/deactivate" ]; then source gh_action/bin/deactivate; fi
+          python3 -m venv gh_action
+          source gh_action/bin/activate
+          export CM_REPOS=$HOME/GH_CM
+          pip install --upgrade cm4mlops
+          cm run script --tags=run-mlperf,inference,_all-scenarios,_submission,_full,_r4.1-dev --preprocess_submission=yes --execution_mode=valid  --pull_changes=yes --pull_inference_changes=yes --model=${{ matrix.model }} --submitter="MLCommons" --hw_name=RTX4090x2  --implementation=intel    --backend=pytorch    --category=datacenter --division=open --scenario=Offline  --docker_dt=yes --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --adr.compiler.tags=gcc --device=cpu  --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --clean  --docker --quiet
+          cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/mlperf_inference_unofficial_submissions_v5.0 --repo_branch=main --commit_message="Results from GH action on SPR.24c" --quiet --submission_dir=$HOME/gh_action_submissions --hw_name=IntelSPR.24c
diff --git a/.github/workflows/test-nvidia-mlperf-implementation.yml b/.github/workflows/test-nvidia-mlperf-implementation.yml
@@ -2,7 +2,7 @@ name: MLPerf Inference Nvidia implementations
 
 on:
   schedule:
-    - cron: "29 20 * * *" #to be adjusted
+    - cron: "49 19 * * *" #to be adjusted
 
 jobs:
   build_nvidia:

diff --git a/.github/workflows/test-scc24-sdxl.yaml b/.github/workflows/test-scc24-sdxl.yaml
@@ -29,7 +29,7 @@ jobs:
         cm pull repo
         cm run script --tags=run-mlperf,inference,_find-performance,_r4.1-dev,_short,_scc24-base --pull_changes=yes --model=sdxl --implementation=reference --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --precision=${{ matrix.precision }} --docker --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --docker_dt=yes --quiet --results_dir=$HOME/scc_gh_action_results --submission_dir=$HOME/scc_gh_action_submissions --precision=float16 --env.CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes --clean
         cm run script --tags=run-mlperf,inference,_r4.1-dev,_short,_scc24-base --model=sdxl --implementation=reference --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --precision=${{ matrix.precision }} --docker --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --docker_dt=yes --quiet --results_dir=$HOME/scc_gh_action_results --submission_dir=$HOME/scc_gh_action_submissions --precision=float16 --env.CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes --clean
-        cm run script --tags=generate,inference,submission --clean --preprocess_submission=yes --run-checker --tar=yes --env.CM_TAR_OUTFILE=submission.tar.gz --division=open --category=datacenter --run_style=test --adr.submission-checker.tags=_short-run --quiet --submitter=MLCommons --submission_dir=$HOME/scc_gh_action_submissions --results_dir=$HOME/scc_gh_action_results/test_results
+        cm run script --tags=generate,inference,submission --clean  --run-checker --tar=yes --env.CM_TAR_OUTFILE=submission.tar.gz --division=open --category=datacenter --run_style=test --adr.submission-checker.tags=_short-run --quiet --submitter=MLCommons --submission_dir=$HOME/scc_gh_action_submissions --results_dir=$HOME/scc_gh_action_results/test_results
         cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/cm4mlperf-inference --repo_branch=mlperf-inference-results-scc24 --commit_message="Results from self hosted Github actions - NVIDIARTX4090" --quiet --submission_dir=$HOME/scc_gh_action_submissions
         
   build_nvidia:

diff --git a/script/app-mlperf-inference-mlcommons-python/_cm.yaml b/script/app-mlperf-inference-mlcommons-python/_cm.yaml
@@ -926,12 +926,13 @@ variations:
     env:
       CM_MODEL: stable-diffusion-xl
       CM_NUM_THREADS: "1"
+    adr:
+      mlperf-implementation:
+        tags: _branch.dev
     deps:
       - tags: get,generic-python-lib,_package.diffusers
         names:
           - diffusers
-        version_max: "0.30.3"
-        version_max_usable: "0.30.3"
       - tags: get,generic-python-lib,_package.transformers
         names:
           - transformers

diff --git a/script/pull-git-repo/run.sh b/script/pull-git-repo/run.sh
@@ -11,6 +11,7 @@ test $? -eq 0 || exit $?
 
 echo ${CM_GIT_PULL_CMD}
 eval ${CM_GIT_PULL_CMD}
-test $? -eq 0 || exit $?
+#don't fail if there are local changes
+#test $? -eq 0 || exit $?
 
 cd $CUR_DIR