Merge pull request #562 from GATEOverflow/mlperf-inference

Added new docker image name for Nvidia MLPerf inference LLM models
mlcommons · Nov 16, 2024 · 8e75c89 · 8e75c89
2 parents 7d34691 + ea8ac4b
commit 8e75c89
Show file tree

Hide file tree

Showing 5 changed files with 8 additions and 30 deletions.
diff --git a/.github/workflows/test-amd-mlperf-inference-implementations.yml b/.github/workflows/test-amd-mlperf-inference-implementations.yml
@@ -2,7 +2,7 @@ name: MLPerf Inference AMD implementations
 
 on:
   schedule:
-    - cron: "29 4 * * *" #to be adjusted
+    - cron: "46 11 * * *" #to be adjusted
 
 jobs:
   run_amd:
@@ -21,6 +21,6 @@ jobs:
           source gh_action/bin/activate
           export CM_REPOS=$HOME/GH_CM
           pip install --upgrade cm4mlops
-          pip install tabulate
-          cm run script --tags=run-mlperf,inference,_all-scenarios,_full,_r4.1-dev  --execution_mode=valid  --pull_changes=yes --pull_inference_changes=yes --model=${{ matrix.model }} --submitter="MLCommons" --hw_name=IntelSPR.24c  --implementation=amd    --backend=pytorch    --category=datacenter --division=open --scenario=Offline  --docker_dt=yes --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --adr.compiler.tags=gcc --device=rocm --use_dataset_from_host=yes  --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --clean  --docker --quiet
+          cm pull repo
+          cm run script --tags=run-mlperf,inference,_all-scenarios,_full,_r4.1-dev  --execution_mode=valid  --pull_changes=yes --pull_inference_changes=yes --model=${{ matrix.model }} --submitter="MLCommons" --hw_name=IntelSPR.24c  --implementation=amd    --backend=pytorch    --category=datacenter --division=open --scenario=Offline  --docker_dt=yes --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --adr.compiler.tags=gcc --device=rocm --use_dataset_from_host=yes  --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --clean  --docker --quiet --docker_skip_run_cmd=yes
           # cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/mlperf_inference_unofficial_submissions_v5.0 --repo_branch=main --commit_message="Results from GH action on SPR.24c" --quiet --submission_dir=$HOME/gh_action_submissions --hw_name=IntelSPR.24c
diff --git a/.github/workflows/test-nvidia-mlperf-inference-implementations.yml b/.github/workflows/test-nvidia-mlperf-inference-implementations.yml
@@ -2,7 +2,7 @@ name: MLPerf Inference Nvidia implementations
 
 on:
   schedule:
-    - cron: "05 02 * * *" #to be adjusted
+    - cron: "54 11 * * *" #to be adjusted
 
 jobs:
   run_nvidia:

diff --git a/script/app-mlperf-inference/_cm.yaml b/script/app-mlperf-inference/_cm.yaml
@@ -333,6 +333,7 @@ variations:
 
   nvidia-original,r4.1-dev_default,gptj_:
     docker:
+      image_name: mlperf-inference-nvidia-v4.1-dev-llm
       deps:
         - tags: get,ml-model,gptj,_nvidia,_fp8
           update_tags_from_env_with_prefix:
@@ -356,6 +357,7 @@ variations:
 
   nvidia-original,r4.1-dev_default,llama2-70b_:
     docker:
+      image_name: mlperf-inference-nvidia-v4.1-dev-llm
       deps:
         - tags: get,ml-model,llama2-70b,_nvidia,_fp8
           update_tags_from_env_with_prefix:

diff --git a/...TX4090x1/nvidia_original-implementation/gpu-device/tensorrt-framework/default-config.yaml b/...TX4090x1/nvidia_original-implementation/gpu-device/tensorrt-framework/default-config.yaml
@@ -13,30 +13,18 @@ bert-99:
     target_qps: 4000.0
   Server:
     target_qps: 3800.0
-  SingleStream:
-    target_latency: 1
 bert-99.9:
   Offline:
     target_qps: 2000.0
   Server:
-    target_qps: 2000.0
-  SingleStream:
-    target_latency: 10
+    target_qps: 1600.0
 resnet50:
-  MultiStream:
-    target_latency: '432111'
   Offline:
     target_qps: '42959.4'
   Server:
     target_qps: 35000.0
-  SingleStream:
-    target_latency: '226895'
 retinanet:
-  MultiStream:
-    target_latency: 80
   Offline:
     target_qps: 700.0
   Server:
     target_qps: 650.0
-  SingleStream:
-    target_latency: 10
diff --git a/...mplementation/gpu-device/tensorrt-framework/framework-version-default/default-config.yaml b/...mplementation/gpu-device/tensorrt-framework/framework-version-default/default-config.yaml
@@ -13,30 +13,18 @@ bert-99:
     target_qps: 4000.0
   Server:
     target_qps: 3800.0
-  SingleStream:
-    target_latency: 1
 bert-99.9:
   Offline:
     target_qps: 2000.0
   Server:
-    target_qps: 2000.0
-  SingleStream:
-    target_latency: 10
+    target_qps: 1600.0
 resnet50:
-  MultiStream:
-    target_latency: '432111'
   Offline:
     target_qps: '42959.4'
   Server:
     target_qps: 35000.0
-  SingleStream:
-    target_latency: '226895'
 retinanet:
-  MultiStream:
-    target_latency: 80
   Offline:
     target_qps: 700.0
   Server:
     target_qps: 650.0
-  SingleStream:
-    target_latency: 10