Skip to content

Commit

Permalink
Merge pull request #444 from mlcommons/mlperf-inference
Browse files Browse the repository at this point in the history
Sync: Mlperf inference
  • Loading branch information
arjunsuresh authored Oct 30, 2024
2 parents b209819 + 15c3f96 commit a29994d
Show file tree
Hide file tree
Showing 15 changed files with 190 additions and 27 deletions.
33 changes: 33 additions & 0 deletions .github/workflows/run-individual-script-tests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# This workflow will run configured tests for any updated CM scripts
name: Individual CM script Tests

on:
pull_request:
branches: [ "main", "mlperf-inference", "dev" ]
paths:
- 'script/**_cm.json'
- 'script/**_cm.yml'

jobs:
run-script-tests:
runs-on: ubuntu-latest
steps:
- name: 'Checkout'
uses: actions/checkout@v4
with:
fetch-depth: 2
- name: Get changed files
id: getfile
run: |
git remote add upstream ${{ github.event.pull_request.base.repo.clone_url }}
git fetch upstream
echo "files=$(git diff upstream/${{ github.event.pull_request.base.ref }} --name-only | xargs)" >> $GITHUB_OUTPUT
- name: RUN Script Tests
run: |
echo ${{ steps.getfile.outputs.files }}
for file in ${{ steps.getfile.outputs.files }}; do
echo $file
done
python3 -m pip install cmind
cm pull repo --url=${{ github.event.pull_request.head.repo.html_url }} --checkout=${{ github.event.pull_request.head.ref }}
python3 tests/script/process_tests.py ${{ steps.getfile.outputs.files }}
42 changes: 42 additions & 0 deletions .github/workflows/test-cm-based-ubmission-generation.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# This workflow will test the submission generation capability of CM f

name: CM based Submission Generation

on:
pull_request:
branches: [ "main", "dev", "mlperf-inference" ]
paths:
- '.github/workflows/test-submission-generation-non-cm-based-benchmarks.yml'
# - '**' # kept on for all the path instead of submission generation CM script so that this could help in trapping any bugs in any recent submission checker modification also
# - '!**.md'
jobs:
Case-3:
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest, windows-latest, macos-latest]
python-version: [ "3.12" ]
division: ["closed", "open"]
category: ["datacenter", "edge"]
exclude:
- os: macos-latest
- os: windows-latest
- division: "open"
- category: "edge"
steps:
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v3
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python3 -m pip install cmind
cm pull repo --url=${{ github.event.pull_request.head.repo.html_url }} --checkout=${{ github.event.pull_request.head.ref }}
- name: Pull repo where test cases are uploaded
run: |
cm pull repo anandhu-eng@inference --checkout=submission-generation-tests
- name: Submission generation(model_mapping.json not present but model name is matching with the official one in submission checker) - ${{ matrix.category }} ${{ matrix.division }}
run: |
cm run script --tags=generate,inference,submission --clean --preprocess_submission=yes --results_dir=$HOME/CM/repos/anandhu-eng@inference/case-3/ --run-checker --submitter=MLCommons --tar=yes --env.CM_TAR_OUTFILE=submission.tar.gz --division=${{ matrix.division }} --category=${{ matrix.category }} --env.CM_DETERMINE_MEMORY_CONFIGURATION=yes --quiet
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ name: MLPerf loadgen with HuggingFace bert onnx fp32 squad model

on:
pull_request:
branches: [ "main", "dev" ]
branches: [ "main", "dev", "mlperf-inference" ]
paths:
- '.github/workflows/test-mlperf-loadgen-onnx-huggingface-bert-fp32-squad.yml'
- '**'
Expand All @@ -18,7 +18,7 @@ jobs:
strategy:
fail-fast: false
matrix:
python-version: [ "3.12" ]
python-version: [ "3.10", "3.12" ]

steps:
- uses: actions/checkout@v3
Expand All @@ -30,7 +30,6 @@ jobs:
run: |
python3 -m pip install cmind
cm pull repo --url=${{ github.event.pull_request.head.repo.html_url }} --checkout=${{ github.event.pull_request.head.ref }}
cm run script --quiet --tags=get,sys-utils-cm
- name: Test MLPerf loadgen with HuggingFace bert onnx fp32 squad model
run: |
cmr "python app loadgen-generic _onnxruntime _custom _huggingface _model-stub.ctuning/mlperf-inference-bert-onnx-fp32-squad-v1.1" --adr.hf-downloader.model_filename=model.onnx --quiet
cm run script --tags=python,app,loadgen-generic,_onnxruntime,_custom,_huggingface,_model-stub.ctuning/mlperf-inference-bert-onnx-fp32-squad-v1.1 --quiet
2 changes: 1 addition & 1 deletion .github/workflows/test-nvidia-mlperf-implementation.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ name: MLPerf Inference Nvidia implementations

on:
schedule:
- cron: "04 18 * * *" #to be adjusted
- cron: "19 11 * * *" #to be adjusted

jobs:
build_nvidia:
Expand Down
40 changes: 38 additions & 2 deletions automation/script/module.py
Original file line number Diff line number Diff line change
Expand Up @@ -2425,10 +2425,46 @@ def test(self, i):

alias = meta.get('alias','')
uid = meta.get('uid','')

if console:
logging.info(path)
logging.info(' Test: TBD')
test_config = meta.get('tests', '')
if test_config:
logging.info(test_config)
test_all_variations = test_config.get('test-all-variations', False)
use_docker = test_config.get('use_docker', False)
if test_all_variations:
variations = meta.get("variations")
individual_variations = [ v for v in variations if variations[v].get('group', '') == '' and str(variations[v].get('exclude-in-test', '')).lower() not in [ "1", "true", "yes" ] ]
tags_string = ",".join(meta.get("tags"))
for variation in individual_variations:
run_tags = f"{tags_string},_{variation}"
if use_docker:
docker_images = test_config.get('docker_images', [ "ubuntu-22.04" ])
for docker_image in docker_images:
ii = {'action':'docker',
'automation':'script',
'tags': run_tags,
'quiet': i.get('quiet'),
'docker_image': docker_image,
'docker_image_name': alias
}
if i.get('docker_cm_repo', '') != '':
ii['docker_cm_repo'] = i['docker_cm_repo']
if i.get('docker_cm_repo_branch', '') != '':
ii['docker_cm_repo_branch'] = i['docker_cm_repo_branch']

r = self.cmind.access(ii)
if r['return'] > 0:
return r
else:
r = self.cmind.access({'action':'run',
'automation':'script',
'tags': run_tags,
'quiet': i.get('quiet') })
if r['return'] > 0:
return r

logging.info(' Test: WIP')


return {'return':0, 'list': lst}
Expand Down
2 changes: 1 addition & 1 deletion automation/script/module_misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -1692,7 +1692,7 @@ def docker(i):
env=i.get('env', {})

noregenerate_docker_file = i.get('docker_noregenerate', False)
norecreate_docker_image = i.get('docker_norecreate', False)
norecreate_docker_image = i.get('docker_norecreate', True)

if i.get('docker_skip_build', False):
noregenerate_docker_file = True
Expand Down
8 changes: 5 additions & 3 deletions script/generate-mlperf-inference-submission/customize.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def fill_from_json(file_path, keys, sut_info):
with open(file_path, 'r') as f:
data = json.load(f)
for key in keys:
if key in data and sut_info[key] is None:
if key in data and (sut_info[key] is None or sut_info[key] == "default"):
sut_info[key] = data[key]
elif key in data and sut_info[key] != data[key]:
return -1 # error saying there is a mismatch in the value of a key
Expand Down Expand Up @@ -149,7 +149,8 @@ def generate_submission(i):
"implementation": None,
"device": None,
"framework": None,
"run_config": None
"framework_version": "default",
"run_config": "default"
} # variable to store the system meta

model_mapping_combined = {} # to store all the model mapping related to an SUT
Expand Down Expand Up @@ -209,6 +210,7 @@ def generate_submission(i):
implementation = sut_info["implementation"]
device = sut_info["device"]
framework = sut_info["framework"].replace(" ","_")
framework_version = sut_info["framework_version"]
run_config = sut_info["run_config"]
new_res = f"{system}-{implementation}-{device}-{framework}-{run_config}"
else:
Expand All @@ -234,7 +236,7 @@ def generate_submission(i):
system_meta_default['framework'] = framework + " " + framework_version
else:
print(parts)
return {'return': 1, 'error': f"The required details for generating the inference submission:\n1.system_name\n2.implementation\n3.framework\n4.run_config\nInclude a cm-sut-info.json file with the above content in {result_path}"}
return {'return': 1, 'error': f"The required details for generating the inference submission:\n1.hardware_name\n2.implementation\n3.Device\n4.framework\n5.framework_version\n6.run_config\nInclude a cm-sut-info.json or sut-info.json file with the above content in {result_path}"}

platform_prefix = inp.get('platform_prefix', '')
if platform_prefix:
Expand Down
7 changes: 7 additions & 0 deletions script/get-generic-sys-util/_cm.json
Original file line number Diff line number Diff line change
Expand Up @@ -721,5 +721,12 @@
}
}
}
},
"tests": {
"test-all-variations": "yes",
"use_docker": "yes",
"docker_images": [
"ubuntu-22.04"
]
}
}
23 changes: 18 additions & 5 deletions script/get-generic-sys-util/run.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,19 @@
#!/bin/bash
# Safe execution of a command stored in a variable
cmd="${CM_SYS_UTIL_INSTALL_CMD}"
echo "$cmd"

cmd=${CM_SYS_UTIL_INSTALL_CMD}
echo $cmd
eval $cmd
test $? -eq 0 || exit $?
# Execute the command and capture the exit status directly
if ! eval "$cmd"; then
echo "Command failed with status $?"
if [[ "${CM_TMP_FAIL_SAFE}" == 'yes' ]]; then
# Exit safely if fail-safe is enabled
echo "Fail-safe is enabled, exiting with status 0"
exit 0
else
# Otherwise exit with the actual error status
exit $?
fi
else
#echo "Command succeeded"
exit 0
fi
11 changes: 6 additions & 5 deletions script/get-ml-model-huggingface-zoo/download_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,7 @@

model_filenames = model_filename.split(',') if ',' in model_filename else [model_filename]

# First must be model
base_model_filename = model_filenames[0]
base_model_filepath = None

files = []
if full_subfolder!='':
Expand Down Expand Up @@ -93,15 +92,17 @@ def list_hf_files(path):
xrevision = None if revision == '' else revision
xsubfolder = None if subfolder == '' else subfolder

hf_hub_download(repo_id=model_stub,
downloaded_path = hf_hub_download(repo_id=model_stub,
subfolder=xsubfolder,
filename=model_filename,
force_filename=model_filename,
revision=xrevision,
cache_dir=os.getcwd())
print(downloaded_path)
if not base_model_filepath:
base_model_filepath = downloaded_path


print ('')

with open('tmp-run-env.out', 'w') as f:
f.write(f"CM_ML_MODEL_FILE_WITH_PATH={os.path.join(os.getcwd(),base_model_filename)}")
f.write(f"CM_ML_MODEL_FILE_WITH_PATH={base_model_filepath}")
5 changes: 4 additions & 1 deletion script/get-platform-details/_cm.json
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,10 @@
"linux"
]
},
"tags": "get,sys-util,generic,_linux-tools"
"tags": "get,sys-util,generic,_linux-tools",
"env": {
"CM_TMP_FAIL_SAFE": "yes"
}
}
],
"tags": [
Expand Down
6 changes: 3 additions & 3 deletions script/get-platform-details/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ echo "------------------------------------------------------------" >> $OUTPUT_F

echo "14. cpupower frequency-info" >> $OUTPUT_FILE
eval "cpupower frequency-info" >> $OUTPUT_FILE
test $? -eq 0 || exit $?
test $? -eq 0 || echo "FAILED: cpupower frequency-info" >> $OUTPUT_FILE
echo "------------------------------------------------------------" >> $OUTPUT_FILE

echo "15. sysctl" >> $OUTPUT_FILE
Expand Down Expand Up @@ -120,7 +120,7 @@ echo "------------------------------------------------------------" >> $OUTPUT_F
echo "21. dmidecode" >> $OUTPUT_FILE
if [[ ${CM_SUDO_USER} == "yes" ]]; then
eval "${CM_SUDO} dmidecode" >> $OUTPUT_FILE
test $? -eq 0 || exit $?
test $? -eq 0 || echo "FAILED: dmidecode" >> $OUTPUT_FILE
else
echo "Requires SUDO permission" >> $OUTPUT_FILE
fi
Expand All @@ -129,7 +129,7 @@ echo "------------------------------------------------------------" >> $OUTPUT_F
echo "22. BIOS" >> $OUTPUT_FILE
if [[ ${CM_SUDO_USER} == "yes" ]]; then
eval "${CM_SUDO} dmidecode -t bios" >> $OUTPUT_FILE
test $? -eq 0 || exit $?
test $? -eq 0 || echo "FAILED: dmidecode -t bios" >> $OUTPUT_FILE
else
echo "Requires SUDO permission" >> $OUTPUT_FILE
fi
Expand Down
3 changes: 2 additions & 1 deletion script/run-docker-container/customize.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ def preprocess(i):
if len(output_split) > 1 and str(env.get('CM_DOCKER_REUSE_EXISTING_CONTAINER', '')).lower() in [ "1", "true", "yes" ]: #container exists
out = output_split[1].split(" ")
existing_container_id = out[0]
print(f"Reusing existing container {existing_container_id}")
env['CM_DOCKER_CONTAINER_ID'] = existing_container_id

else:
Expand Down Expand Up @@ -302,7 +303,7 @@ def update_docker_info(env):
if env.get('CM_DOCKER_IMAGE_NAME', '') != '':
docker_image_name = env['CM_DOCKER_IMAGE_NAME']
else:
docker_image_name = 'cm-script-'+env['CM_DOCKER_RUN_SCRIPT_TAGS'].replace(',', '-').replace('_','-')
docker_image_name = 'cm-script-'+env['CM_DOCKER_RUN_SCRIPT_TAGS'].replace(',', '-').replace('_','-').replace('+','plus')
env['CM_DOCKER_IMAGE_NAME'] = docker_image_name

docker_image_tag_extra = env.get('CM_DOCKER_IMAGE_TAG_EXTRA', '-latest')
Expand Down
3 changes: 2 additions & 1 deletion script/run-mlperf-inference-app/customize.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ def preprocess(i):
docker_extra_input[k] = inp[k]
inp = {}
if str(docker_dt).lower() in ["yes", "true", "1"]:
env['CM_DOCKER_REUSE_EXISTING_CONTAINER'] = 'yes'
env['CM_DOCKER_REUSE_EXISTING_CONTAINER'] = 'no' # turning it off for the first run and after that we turn it on

if env.get('CM_DOCKER_IMAGE_NAME', '') != '':
docker_extra_input['docker_image_name'] = env['CM_DOCKER_IMAGE_NAME']
Expand Down Expand Up @@ -256,6 +256,7 @@ def preprocess(i):
print(f"\nStop Running loadgen scenario: {scenario} and mode: {mode}")
return {'return': 0} # We run commands interactively inside the docker container
else:
env['CM_DOCKER_REUSE_EXISTING_CONTAINER'] = 'yes'
container_id = env_copy['CM_DOCKER_CONTAINER_ID']
env['CM_DOCKER_CONTAINER_ID'] = container_id
if state.get('docker', {}):
Expand Down
25 changes: 25 additions & 0 deletions tests/script/process_tests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import sys
import os
import cmind as cm
import check as checks
import json
import yaml

files=sys.argv[1:]

for file in files:
print(file)
if not os.path.isfile(file) or not "script" in file:
continue
if not file.endswith("_cm.json") and not file.endswith("_cm.yaml"):
continue
script_path = os.path.dirname(file)
f = open(file)
if file.endswith(".json"):
data = json.load(f)
elif file.endswith(".yaml"):
data = yaml.safe_load(f)
uid = data['uid']

r = cm.access({'action':'test', 'automation':'script', 'artifact': uid, 'quiet': 'yes', 'out': 'con'})
checks.check_return(r)

0 comments on commit a29994d

Please sign in to comment.