Skip to content

Commit

Permalink
Merge pull request #495 from GATEOverflow/mlperf-inference
Browse files Browse the repository at this point in the history
Fixes for Nvidia MLPerf inference gptj,sdxl
  • Loading branch information
arjunsuresh authored Nov 7, 2024
2 parents fb357c0 + 69daefe commit 05f4bf8
Show file tree
Hide file tree
Showing 5 changed files with 32 additions and 5 deletions.
25 changes: 25 additions & 0 deletions .github/workflows/test-mlperf-inference-intel
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
name: MLPerf Inference Intel implementations

on:
schedule:
- cron: "49 2 * * *" #to be adjusted

jobs:
build_nvidia:
if: github.repository_owner == 'gateoverflow'
runs-on: [ self-hosted, linux, x64, GO-spr ]
strategy:
fail-fast: false
matrix:
python-version: [ "3.12" ]
model: [ "resnet50", "bert-99" ]
steps:
- name: Test MLPerf Inference Intel ${{ matrix.model }}
run: |
if [ -f "gh_action/bin/deactivate" ]; then source gh_action/bin/deactivate; fi
python3 -m venv gh_action
source gh_action/bin/activate
export CM_REPOS=$HOME/GH_CM
pip install --upgrade cm4mlops
cm run script --tags=run-mlperf,inference,_all-scenarios,_submission,_full,_r4.1-dev --preprocess_submission=yes --execution_mode=valid --pull_changes=yes --pull_inference_changes=yes --model=${{ matrix.model }} --submitter="MLCommons" --hw_name=RTX4090x2 --implementation=intel --backend=pytorch --category=datacenter --division=open --scenario=Offline --docker_dt=yes --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --adr.compiler.tags=gcc --device=cpu --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --clean --docker --quiet
cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/mlperf_inference_unofficial_submissions_v5.0 --repo_branch=main --commit_message="Results from GH action on SPR.24c" --quiet --submission_dir=$HOME/gh_action_submissions --hw_name=IntelSPR.24c
2 changes: 1 addition & 1 deletion .github/workflows/test-nvidia-mlperf-implementation.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ name: MLPerf Inference Nvidia implementations

on:
schedule:
- cron: "29 20 * * *" #to be adjusted
- cron: "49 19 * * *" #to be adjusted

jobs:
build_nvidia:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test-scc24-sdxl.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ jobs:
cm pull repo
cm run script --tags=run-mlperf,inference,_find-performance,_r4.1-dev,_short,_scc24-base --pull_changes=yes --model=sdxl --implementation=reference --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --precision=${{ matrix.precision }} --docker --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --docker_dt=yes --quiet --results_dir=$HOME/scc_gh_action_results --submission_dir=$HOME/scc_gh_action_submissions --precision=float16 --env.CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes --clean
cm run script --tags=run-mlperf,inference,_r4.1-dev,_short,_scc24-base --model=sdxl --implementation=reference --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --precision=${{ matrix.precision }} --docker --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --docker_dt=yes --quiet --results_dir=$HOME/scc_gh_action_results --submission_dir=$HOME/scc_gh_action_submissions --precision=float16 --env.CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes --clean
cm run script --tags=generate,inference,submission --clean --preprocess_submission=yes --run-checker --tar=yes --env.CM_TAR_OUTFILE=submission.tar.gz --division=open --category=datacenter --run_style=test --adr.submission-checker.tags=_short-run --quiet --submitter=MLCommons --submission_dir=$HOME/scc_gh_action_submissions --results_dir=$HOME/scc_gh_action_results/test_results
cm run script --tags=generate,inference,submission --clean --run-checker --tar=yes --env.CM_TAR_OUTFILE=submission.tar.gz --division=open --category=datacenter --run_style=test --adr.submission-checker.tags=_short-run --quiet --submitter=MLCommons --submission_dir=$HOME/scc_gh_action_submissions --results_dir=$HOME/scc_gh_action_results/test_results
cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/cm4mlperf-inference --repo_branch=mlperf-inference-results-scc24 --commit_message="Results from self hosted Github actions - NVIDIARTX4090" --quiet --submission_dir=$HOME/scc_gh_action_submissions
build_nvidia:
Expand Down
5 changes: 3 additions & 2 deletions script/app-mlperf-inference-mlcommons-python/_cm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -926,12 +926,13 @@ variations:
env:
CM_MODEL: stable-diffusion-xl
CM_NUM_THREADS: "1"
adr:
mlperf-implementation:
tags: _branch.dev
deps:
- tags: get,generic-python-lib,_package.diffusers
names:
- diffusers
version_max: "0.30.3"
version_max_usable: "0.30.3"
- tags: get,generic-python-lib,_package.transformers
names:
- transformers
Expand Down
3 changes: 2 additions & 1 deletion script/pull-git-repo/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ test $? -eq 0 || exit $?

echo ${CM_GIT_PULL_CMD}
eval ${CM_GIT_PULL_CMD}
test $? -eq 0 || exit $?
#don't fail if there are local changes
#test $? -eq 0 || exit $?

cd $CUR_DIR

0 comments on commit 05f4bf8

Please sign in to comment.