From 58d9b01fd0f39948a4fb21b54a5f3e2d57c0c704 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Sat, 16 Nov 2024 17:08:48 +0530 Subject: [PATCH 1/5] Skip docker run command for AMD MLPerf inference gh action --- .github/workflows/test-amd-mlperf-inference-implementations.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-amd-mlperf-inference-implementations.yml b/.github/workflows/test-amd-mlperf-inference-implementations.yml index 7eb4be232a..c9a005510f 100644 --- a/.github/workflows/test-amd-mlperf-inference-implementations.yml +++ b/.github/workflows/test-amd-mlperf-inference-implementations.yml @@ -22,5 +22,5 @@ jobs: export CM_REPOS=$HOME/GH_CM pip install --upgrade cm4mlops pip install tabulate - cm run script --tags=run-mlperf,inference,_all-scenarios,_full,_r4.1-dev --execution_mode=valid --pull_changes=yes --pull_inference_changes=yes --model=${{ matrix.model }} --submitter="MLCommons" --hw_name=IntelSPR.24c --implementation=amd --backend=pytorch --category=datacenter --division=open --scenario=Offline --docker_dt=yes --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --adr.compiler.tags=gcc --device=rocm --use_dataset_from_host=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --clean --docker --quiet + cm run script --tags=run-mlperf,inference,_all-scenarios,_full,_r4.1-dev --execution_mode=valid --pull_changes=yes --pull_inference_changes=yes --model=${{ matrix.model }} --submitter="MLCommons" --hw_name=IntelSPR.24c --implementation=amd --backend=pytorch --category=datacenter --division=open --scenario=Offline --docker_dt=yes --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --adr.compiler.tags=gcc --device=rocm --use_dataset_from_host=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --clean --docker --quiet --docker_skip_run_cmd=yes # cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/mlperf_inference_unofficial_submissions_v5.0 --repo_branch=main --commit_message="Results from GH action on SPR.24c" --quiet --submission_dir=$HOME/gh_action_submissions --hw_name=IntelSPR.24c From 6c457dcd19607a2b8064f3280efbcac759dac02c Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Sat, 16 Nov 2024 11:40:37 +0000 Subject: [PATCH 2/5] Update test-amd-mlperf-inference-implementations.yml --- .../workflows/test-amd-mlperf-inference-implementations.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test-amd-mlperf-inference-implementations.yml b/.github/workflows/test-amd-mlperf-inference-implementations.yml index c9a005510f..a01b8cd266 100644 --- a/.github/workflows/test-amd-mlperf-inference-implementations.yml +++ b/.github/workflows/test-amd-mlperf-inference-implementations.yml @@ -2,7 +2,7 @@ name: MLPerf Inference AMD implementations on: schedule: - - cron: "29 4 * * *" #to be adjusted + - cron: "46 11 * * *" #to be adjusted jobs: run_amd: @@ -21,6 +21,6 @@ jobs: source gh_action/bin/activate export CM_REPOS=$HOME/GH_CM pip install --upgrade cm4mlops - pip install tabulate + cm pull repo cm run script --tags=run-mlperf,inference,_all-scenarios,_full,_r4.1-dev --execution_mode=valid --pull_changes=yes --pull_inference_changes=yes --model=${{ matrix.model }} --submitter="MLCommons" --hw_name=IntelSPR.24c --implementation=amd --backend=pytorch --category=datacenter --division=open --scenario=Offline --docker_dt=yes --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --adr.compiler.tags=gcc --device=rocm --use_dataset_from_host=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --clean --docker --quiet --docker_skip_run_cmd=yes # cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/mlperf_inference_unofficial_submissions_v5.0 --repo_branch=main --commit_message="Results from GH action on SPR.24c" --quiet --submission_dir=$HOME/gh_action_submissions --hw_name=IntelSPR.24c From 6d10ea6a30d1f2b684093e113bccb36c1a145e19 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Sat, 16 Nov 2024 11:47:44 +0000 Subject: [PATCH 3/5] Update test-nvidia-mlperf-inference-implementations.yml --- .../workflows/test-nvidia-mlperf-inference-implementations.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-nvidia-mlperf-inference-implementations.yml b/.github/workflows/test-nvidia-mlperf-inference-implementations.yml index a51dc9bcec..a13eeba133 100644 --- a/.github/workflows/test-nvidia-mlperf-inference-implementations.yml +++ b/.github/workflows/test-nvidia-mlperf-inference-implementations.yml @@ -2,7 +2,7 @@ name: MLPerf Inference Nvidia implementations on: schedule: - - cron: "05 02 * * *" #to be adjusted + - cron: "54 11 * * *" #to be adjusted jobs: run_nvidia: From a1492c24841ad7f3220b307bc190a381fe17cd3f Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Sat, 16 Nov 2024 18:28:04 +0000 Subject: [PATCH 4/5] Fixes rtx4090x1 configs --- .../tensorrt-framework/default-config.yaml | 14 +------------- .../framework-version-default/default-config.yaml | 14 +------------- 2 files changed, 2 insertions(+), 26 deletions(-) diff --git a/script/get-mlperf-inference-sut-configs/configs/RTX4090x1/nvidia_original-implementation/gpu-device/tensorrt-framework/default-config.yaml b/script/get-mlperf-inference-sut-configs/configs/RTX4090x1/nvidia_original-implementation/gpu-device/tensorrt-framework/default-config.yaml index 4aabb4b738..3a872ef0b4 100644 --- a/script/get-mlperf-inference-sut-configs/configs/RTX4090x1/nvidia_original-implementation/gpu-device/tensorrt-framework/default-config.yaml +++ b/script/get-mlperf-inference-sut-configs/configs/RTX4090x1/nvidia_original-implementation/gpu-device/tensorrt-framework/default-config.yaml @@ -13,30 +13,18 @@ bert-99: target_qps: 4000.0 Server: target_qps: 3800.0 - SingleStream: - target_latency: 1 bert-99.9: Offline: target_qps: 2000.0 Server: - target_qps: 2000.0 - SingleStream: - target_latency: 10 + target_qps: 1600.0 resnet50: - MultiStream: - target_latency: '432111' Offline: target_qps: '42959.4' Server: target_qps: 35000.0 - SingleStream: - target_latency: '226895' retinanet: - MultiStream: - target_latency: 80 Offline: target_qps: 700.0 Server: target_qps: 650.0 - SingleStream: - target_latency: 10 diff --git a/script/get-mlperf-inference-sut-configs/configs/RTX4090x1/nvidia_original-implementation/gpu-device/tensorrt-framework/framework-version-default/default-config.yaml b/script/get-mlperf-inference-sut-configs/configs/RTX4090x1/nvidia_original-implementation/gpu-device/tensorrt-framework/framework-version-default/default-config.yaml index 4aabb4b738..3a872ef0b4 100644 --- a/script/get-mlperf-inference-sut-configs/configs/RTX4090x1/nvidia_original-implementation/gpu-device/tensorrt-framework/framework-version-default/default-config.yaml +++ b/script/get-mlperf-inference-sut-configs/configs/RTX4090x1/nvidia_original-implementation/gpu-device/tensorrt-framework/framework-version-default/default-config.yaml @@ -13,30 +13,18 @@ bert-99: target_qps: 4000.0 Server: target_qps: 3800.0 - SingleStream: - target_latency: 1 bert-99.9: Offline: target_qps: 2000.0 Server: - target_qps: 2000.0 - SingleStream: - target_latency: 10 + target_qps: 1600.0 resnet50: - MultiStream: - target_latency: '432111' Offline: target_qps: '42959.4' Server: target_qps: 35000.0 - SingleStream: - target_latency: '226895' retinanet: - MultiStream: - target_latency: 80 Offline: target_qps: 700.0 Server: target_qps: 650.0 - SingleStream: - target_latency: 10 From 7031377230fd29958baee1ffc67d1438323b7ab4 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Sat, 16 Nov 2024 18:33:02 +0000 Subject: [PATCH 5/5] Added docker image names for nvidia mlperf inference llm --- script/app-mlperf-inference/_cm.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/script/app-mlperf-inference/_cm.yaml b/script/app-mlperf-inference/_cm.yaml index 3e6dae0f4d..a42ed319d6 100644 --- a/script/app-mlperf-inference/_cm.yaml +++ b/script/app-mlperf-inference/_cm.yaml @@ -333,6 +333,7 @@ variations: nvidia-original,r4.1-dev_default,gptj_: docker: + image_name: mlperf-inference-nvidia-v4.1-dev-llm deps: - tags: get,ml-model,gptj,_nvidia,_fp8 update_tags_from_env_with_prefix: @@ -356,6 +357,7 @@ variations: nvidia-original,r4.1-dev_default,llama2-70b_: docker: + image_name: mlperf-inference-nvidia-v4.1-dev-llm deps: - tags: get,ml-model,llama2-70b,_nvidia,_fp8 update_tags_from_env_with_prefix: