Skip to content

Commit

Permalink
Merge pull request #562 from GATEOverflow/mlperf-inference
Browse files Browse the repository at this point in the history
Added new docker image name for Nvidia MLPerf inference LLM models
  • Loading branch information
arjunsuresh authored Nov 16, 2024
2 parents 7d34691 + ea8ac4b commit 8e75c89
Show file tree
Hide file tree
Showing 5 changed files with 8 additions and 30 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ name: MLPerf Inference AMD implementations

on:
schedule:
- cron: "29 4 * * *" #to be adjusted
- cron: "46 11 * * *" #to be adjusted

jobs:
run_amd:
Expand All @@ -21,6 +21,6 @@ jobs:
source gh_action/bin/activate
export CM_REPOS=$HOME/GH_CM
pip install --upgrade cm4mlops
pip install tabulate
cm run script --tags=run-mlperf,inference,_all-scenarios,_full,_r4.1-dev --execution_mode=valid --pull_changes=yes --pull_inference_changes=yes --model=${{ matrix.model }} --submitter="MLCommons" --hw_name=IntelSPR.24c --implementation=amd --backend=pytorch --category=datacenter --division=open --scenario=Offline --docker_dt=yes --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --adr.compiler.tags=gcc --device=rocm --use_dataset_from_host=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --clean --docker --quiet
cm pull repo
cm run script --tags=run-mlperf,inference,_all-scenarios,_full,_r4.1-dev --execution_mode=valid --pull_changes=yes --pull_inference_changes=yes --model=${{ matrix.model }} --submitter="MLCommons" --hw_name=IntelSPR.24c --implementation=amd --backend=pytorch --category=datacenter --division=open --scenario=Offline --docker_dt=yes --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --adr.compiler.tags=gcc --device=rocm --use_dataset_from_host=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --clean --docker --quiet --docker_skip_run_cmd=yes
# cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/mlperf_inference_unofficial_submissions_v5.0 --repo_branch=main --commit_message="Results from GH action on SPR.24c" --quiet --submission_dir=$HOME/gh_action_submissions --hw_name=IntelSPR.24c
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ name: MLPerf Inference Nvidia implementations

on:
schedule:
- cron: "05 02 * * *" #to be adjusted
- cron: "54 11 * * *" #to be adjusted

jobs:
run_nvidia:
Expand Down
2 changes: 2 additions & 0 deletions script/app-mlperf-inference/_cm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -333,6 +333,7 @@ variations:

nvidia-original,r4.1-dev_default,gptj_:
docker:
image_name: mlperf-inference-nvidia-v4.1-dev-llm
deps:
- tags: get,ml-model,gptj,_nvidia,_fp8
update_tags_from_env_with_prefix:
Expand All @@ -356,6 +357,7 @@ variations:

nvidia-original,r4.1-dev_default,llama2-70b_:
docker:
image_name: mlperf-inference-nvidia-v4.1-dev-llm
deps:
- tags: get,ml-model,llama2-70b,_nvidia,_fp8
update_tags_from_env_with_prefix:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,30 +13,18 @@ bert-99:
target_qps: 4000.0
Server:
target_qps: 3800.0
SingleStream:
target_latency: 1
bert-99.9:
Offline:
target_qps: 2000.0
Server:
target_qps: 2000.0
SingleStream:
target_latency: 10
target_qps: 1600.0
resnet50:
MultiStream:
target_latency: '432111'
Offline:
target_qps: '42959.4'
Server:
target_qps: 35000.0
SingleStream:
target_latency: '226895'
retinanet:
MultiStream:
target_latency: 80
Offline:
target_qps: 700.0
Server:
target_qps: 650.0
SingleStream:
target_latency: 10
Original file line number Diff line number Diff line change
Expand Up @@ -13,30 +13,18 @@ bert-99:
target_qps: 4000.0
Server:
target_qps: 3800.0
SingleStream:
target_latency: 1
bert-99.9:
Offline:
target_qps: 2000.0
Server:
target_qps: 2000.0
SingleStream:
target_latency: 10
target_qps: 1600.0
resnet50:
MultiStream:
target_latency: '432111'
Offline:
target_qps: '42959.4'
Server:
target_qps: 35000.0
SingleStream:
target_latency: '226895'
retinanet:
MultiStream:
target_latency: 80
Offline:
target_qps: 700.0
Server:
target_qps: 650.0
SingleStream:
target_latency: 10

0 comments on commit 8e75c89

Please sign in to comment.