From d2f783387e0ec7383bd9bd82153b7974986c95a7 Mon Sep 17 00:00:00 2001 From: Grigori Fursin Date: Sat, 30 Nov 2024 06:40:17 +0100 Subject: [PATCH] Revert "Sync Mlperf inference (#630)" This reverts commit e277b67ca8120d7c7b95ec4eb4d22dd1f89c287d. --- .github/workflows/format.yml | 14 +- automation/script/README-extra.md | 1 - automation/script/module.py | 85 +---- .../README.md | 32 ++ .../_cm.json | 22 ++ .../README.md | 4 + .../_cm.yaml | 21 ++ .../README.md | 10 + .../_cm.yaml | 25 ++ .../README.md | 30 ++ .../_cm.json | 22 ++ .../connect-mlperf-with-medperf/README.md | 23 ++ .../connect-mlperf-with-medperf/_cm.json | 26 ++ .../README.md | 16 + .../_cm.json | 20 ++ .../README.md | 7 + .../_cm.json | 19 ++ .../README.md | 18 + .../_cm.json | 27 ++ .../README.md | 74 +++++ .../_cm.json | 27 ++ ...wd-benchmark-mlperf-bert-inference-cuda.md | 281 ++++++++++++++++ .../docs/generate-3d-unet-submission.md | 59 ++++ .../docs/generate-bert-submission.md | 80 +++++ .../docs/generate-resnet50-submission.md | 82 +++++ .../docs/generate-retinanet-submission.md | 67 ++++ .../docs/generate-rnnt-submission.md | 53 +++ .../docs/setup-aws-instance.md | 48 +++ .../docs/setup-gcp-instance.md | 35 ++ .../docs/setup-nvidia-jetson-orin.md | 53 +++ .../README.md | 83 +++++ .../_cm.json | 26 ++ .../docs/generate-3d-unet-submission.md | 67 ++++ .../docs/generate-bert-submission.md | 113 +++++++ .../docs/generate-resnet50-submission.md | 90 +++++ .../docs/generate-retinanet-submission.md | 75 +++++ .../docs/generate-rnnt-submission.md | 61 ++++ .../docs/setup-aws-instance.md | 50 +++ .../docs/setup-gcp-instance.md | 37 +++ .../docs/setup-nvidia-jetson-orin.md | 54 +++ .../README.md | 31 ++ .../_cm.json | 27 ++ .../README.md | 20 ++ .../_cm.json | 26 ++ .../README.md | 31 ++ .../_cm.json | 28 ++ .../README.md | 32 ++ .../_cm.json | 27 ++ .../README.md | 52 +++ .../_cm.json | 27 ++ .../README.md | 31 ++ .../_cm.json | 26 ++ .../README.md | 34 ++ .../_cm.json | 28 ++ .../README.md | 33 ++ .../_cm.json | 26 ++ .../README.md | 41 +++ .../_cm.json | 28 ++ .../README.md | 31 ++ .../_cm.json | 27 ++ .../README.md | 36 ++ .../_cm.json | 28 ++ .../repro-mlperf-inf-v3.0-orin/README.md | 16 + challenge/repro-mlperf-inf-v3.0-orin/_cm.json | 23 ++ .../README.md | 39 +++ .../_cm.json | 20 ++ .../README.md | 3 + .../repro-mlperf-inference-v4.0-2024/_cm.yaml | 25 ++ .../README.md | 4 + .../repro-mlperf-inference-v4.1-2024/_cm.yaml | 22 ++ .../README.md | 36 ++ .../_cm.json | 23 ++ .../README.md | 17 + .../_cm.json | 23 ++ challenge/run-mlperf@home-v3.1-cpu/README.md | 67 ++++ challenge/run-mlperf@home-v3.1-cpu/_cm.json | 21 ++ .../run-cpu-bert-99-deepsparse.md | 100 ++++++ ...cpu-dse-mobilenets-efficientnets-tflite.md | 77 +++++ challenge/run-mlperf@home-v3.1-gpu/README.md | 65 ++++ challenge/run-mlperf@home-v3.1-gpu/_cm.json | 20 ++ ...idia-gpu-bert-99-nvidia-docker-tensorrt.md | 193 +++++++++++ .../run-nvidia-gpu-gpt-j-6b-ref-pytorch.md | 314 ++++++++++++++++++ .../train-llm-for-cm-mlperf-2023/README.md | 20 ++ .../train-llm-for-cm-mlperf-2023/_cm.json | 21 ++ .../README.md | 93 ++++++ .../_cm.json | 16 + .../_cm.json | 17 + .../_cm.json | 17 + .../_cm.json | 17 + script/app-mlperf-inference/_cm.yaml | 2 - script/run-mlperf-inference-app/_cm.yaml | 1 - tests/script/check.py | 27 ++ tests/script/process_dockerfile.py | 33 ++ tests/script/process_readme.py | 27 ++ tests/script/process_tests.py | 38 +++ tests/script/test_deps.py | 25 ++ tests/script/test_docker.py | 23 ++ tests/script/test_features.py | 38 +++ tests/script/test_install.py | 15 + tests/test_cm.py | 17 + tests/test_search_speed.py | 26 ++ tests/tutorials/test_tutorial_retinanet.py | 37 +++ tests/tutorials/test_tutorial_tvm.py | 28 ++ tests/tutorials/test_tutorial_tvm_pip_ge.py | 26 ++ tests/tutorials/test_tutorial_tvm_pip_vm.py | 27 ++ 105 files changed, 4210 insertions(+), 96 deletions(-) create mode 100644 challenge/add-derived-metrics-to-mlperf-inference/README.md create mode 100644 challenge/add-derived-metrics-to-mlperf-inference/_cm.json create mode 100644 challenge/automate-mlperf-inference-v3.1-and-v4.0-2024/README.md create mode 100644 challenge/automate-mlperf-inference-v3.1-and-v4.0-2024/_cm.yaml create mode 100644 challenge/compose-high-performance-and-cost-efficient-ai-systems-based-on-mlperf-4.0-2024/README.md create mode 100644 challenge/compose-high-performance-and-cost-efficient-ai-systems-based-on-mlperf-4.0-2024/_cm.yaml create mode 100644 challenge/connect-mlperf-inference-v3.1-with-openbenchmarking/README.md create mode 100644 challenge/connect-mlperf-inference-v3.1-with-openbenchmarking/_cm.json create mode 100644 challenge/connect-mlperf-with-medperf/README.md create mode 100644 challenge/connect-mlperf-with-medperf/_cm.json create mode 100644 challenge/optimize-mlperf-inference-scc2023/README.md create mode 100644 challenge/optimize-mlperf-inference-scc2023/_cm.json create mode 100644 challenge/optimize-mlperf-inference-scc2024/README.md create mode 100644 challenge/optimize-mlperf-inference-scc2024/_cm.json create mode 100644 challenge/optimize-mlperf-inference-v2.1-2022/README.md create mode 100644 challenge/optimize-mlperf-inference-v2.1-2022/_cm.json create mode 100644 challenge/optimize-mlperf-inference-v3.0-2023/README.md create mode 100644 challenge/optimize-mlperf-inference-v3.0-2023/_cm.json create mode 100644 challenge/optimize-mlperf-inference-v3.0-2023/docs/crowd-benchmark-mlperf-bert-inference-cuda.md create mode 100644 challenge/optimize-mlperf-inference-v3.0-2023/docs/generate-3d-unet-submission.md create mode 100644 challenge/optimize-mlperf-inference-v3.0-2023/docs/generate-bert-submission.md create mode 100644 challenge/optimize-mlperf-inference-v3.0-2023/docs/generate-resnet50-submission.md create mode 100644 challenge/optimize-mlperf-inference-v3.0-2023/docs/generate-retinanet-submission.md create mode 100644 challenge/optimize-mlperf-inference-v3.0-2023/docs/generate-rnnt-submission.md create mode 100644 challenge/optimize-mlperf-inference-v3.0-2023/docs/setup-aws-instance.md create mode 100644 challenge/optimize-mlperf-inference-v3.0-2023/docs/setup-gcp-instance.md create mode 100644 challenge/optimize-mlperf-inference-v3.0-2023/docs/setup-nvidia-jetson-orin.md create mode 100644 challenge/optimize-mlperf-inference-v3.1-2023/README.md create mode 100644 challenge/optimize-mlperf-inference-v3.1-2023/_cm.json create mode 100644 challenge/optimize-mlperf-inference-v3.1-2023/docs/generate-3d-unet-submission.md create mode 100644 challenge/optimize-mlperf-inference-v3.1-2023/docs/generate-bert-submission.md create mode 100644 challenge/optimize-mlperf-inference-v3.1-2023/docs/generate-resnet50-submission.md create mode 100644 challenge/optimize-mlperf-inference-v3.1-2023/docs/generate-retinanet-submission.md create mode 100644 challenge/optimize-mlperf-inference-v3.1-2023/docs/generate-rnnt-submission.md create mode 100644 challenge/optimize-mlperf-inference-v3.1-2023/docs/setup-aws-instance.md create mode 100644 challenge/optimize-mlperf-inference-v3.1-2023/docs/setup-gcp-instance.md create mode 100644 challenge/optimize-mlperf-inference-v3.1-2023/docs/setup-nvidia-jetson-orin.md create mode 100644 challenge/optimize-mlperf-inference-v3.1-amazon-inferentia-2023/README.md create mode 100644 challenge/optimize-mlperf-inference-v3.1-amazon-inferentia-2023/_cm.json create mode 100644 challenge/optimize-mlperf-inference-v3.1-create-end-to-end-app/README.md create mode 100644 challenge/optimize-mlperf-inference-v3.1-create-end-to-end-app/_cm.json create mode 100644 challenge/optimize-mlperf-inference-v3.1-deepsparse/README.md create mode 100644 challenge/optimize-mlperf-inference-v3.1-deepsparse/_cm.json create mode 100644 challenge/optimize-mlperf-inference-v3.1-google-tpu-2023/README.md create mode 100644 challenge/optimize-mlperf-inference-v3.1-google-tpu-2023/_cm.json create mode 100644 challenge/optimize-mlperf-inference-v3.1-hugging-face-models-2023/README.md create mode 100644 challenge/optimize-mlperf-inference-v3.1-hugging-face-models-2023/_cm.json create mode 100644 challenge/optimize-mlperf-inference-v3.1-intel-2023/README.md create mode 100644 challenge/optimize-mlperf-inference-v3.1-intel-2023/_cm.json create mode 100644 challenge/optimize-mlperf-inference-v3.1-modular-mojo-2023/README.md create mode 100644 challenge/optimize-mlperf-inference-v3.1-modular-mojo-2023/_cm.json create mode 100644 challenge/optimize-mlperf-inference-v3.1-qualcomm-ai100-2023/README.md create mode 100644 challenge/optimize-mlperf-inference-v3.1-qualcomm-ai100-2023/_cm.json create mode 100644 challenge/optimize-mlperf-inference-v3.1-tvm-2023/README.md create mode 100644 challenge/optimize-mlperf-inference-v3.1-tvm-2023/_cm.json create mode 100644 challenge/optimize-mlperf-inference-v3.1-universal-cpp-implementation-2023/README.md create mode 100644 challenge/optimize-mlperf-inference-v3.1-universal-cpp-implementation-2023/_cm.json create mode 100644 challenge/optimize-mlperf-inference-v3.1-windows-2023/README.md create mode 100644 challenge/optimize-mlperf-inference-v3.1-windows-2023/_cm.json create mode 100644 challenge/repro-mlperf-inf-v3.0-orin/README.md create mode 100644 challenge/repro-mlperf-inf-v3.0-orin/_cm.json create mode 100644 challenge/repro-mlperf-inference-retinanet-scc2022/README.md create mode 100644 challenge/repro-mlperf-inference-retinanet-scc2022/_cm.json create mode 100644 challenge/repro-mlperf-inference-v4.0-2024/README.md create mode 100644 challenge/repro-mlperf-inference-v4.0-2024/_cm.yaml create mode 100644 challenge/repro-mlperf-inference-v4.1-2024/README.md create mode 100644 challenge/repro-mlperf-inference-v4.1-2024/_cm.yaml create mode 100644 challenge/reproduce-and-automate-tinymlperf-v1.1-2023/README.md create mode 100644 challenge/reproduce-and-automate-tinymlperf-v1.1-2023/_cm.json create mode 100644 challenge/reproduce-mlperf-training-v3.0-2023/README.md create mode 100644 challenge/reproduce-mlperf-training-v3.0-2023/_cm.json create mode 100644 challenge/run-mlperf@home-v3.1-cpu/README.md create mode 100644 challenge/run-mlperf@home-v3.1-cpu/_cm.json create mode 100644 challenge/run-mlperf@home-v3.1-cpu/run-cpu-bert-99-deepsparse.md create mode 100644 challenge/run-mlperf@home-v3.1-cpu/run-cpu-dse-mobilenets-efficientnets-tflite.md create mode 100644 challenge/run-mlperf@home-v3.1-gpu/README.md create mode 100644 challenge/run-mlperf@home-v3.1-gpu/_cm.json create mode 100644 challenge/run-mlperf@home-v3.1-gpu/run-nvidia-gpu-bert-99-nvidia-docker-tensorrt.md create mode 100644 challenge/run-mlperf@home-v3.1-gpu/run-nvidia-gpu-gpt-j-6b-ref-pytorch.md create mode 100644 challenge/train-llm-for-cm-mlperf-2023/README.md create mode 100644 challenge/train-llm-for-cm-mlperf-2023/_cm.json create mode 100644 report/mlperf-inference-v3.1-analysis-ctuning/README.md create mode 100644 report/mlperf-inference-v3.1-analysis-ctuning/_cm.json create mode 100644 report/mlperf-inference-v3.1-press-release-ctuning/_cm.json create mode 100644 report/mlperf-inference-v3.1-press-release-hpcwire/_cm.json create mode 100644 report/mlperf-inference-v4.0-press-release-ctuning/_cm.json create mode 100644 tests/script/check.py create mode 100644 tests/script/process_dockerfile.py create mode 100644 tests/script/process_readme.py create mode 100644 tests/script/process_tests.py create mode 100644 tests/script/test_deps.py create mode 100644 tests/script/test_docker.py create mode 100644 tests/script/test_features.py create mode 100644 tests/script/test_install.py create mode 100644 tests/test_cm.py create mode 100644 tests/test_search_speed.py create mode 100644 tests/tutorials/test_tutorial_retinanet.py create mode 100644 tests/tutorials/test_tutorial_tvm.py create mode 100644 tests/tutorials/test_tutorial_tvm_pip_ge.py create mode 100644 tests/tutorials/test_tutorial_tvm_pip_vm.py diff --git a/.github/workflows/format.yml b/.github/workflows/format.yml index 66a1318632..c6937bac3a 100644 --- a/.github/workflows/format.yml +++ b/.github/workflows/format.yml @@ -26,22 +26,16 @@ jobs: python3 -m pip install autopep8 for FILE in $(git diff --name-only ${{ github.event.before }} | grep -E '.*\.py$') do - # Check if the file still exists in the working tree - if [ -f "$FILE" ]; then - autopep8 --in-place -a "$FILE" - git add "$FILE" - fi + autopep8 --in-place -a $FILE + git add $FILE done - name: Format modified C++ files run: | for FILE in $(git diff --name-only ${{ github.event.before }} | grep -E '.*\.(cc|cpp|h|hpp)$') do - # Check if the file still exists in the working tree - if [ -f "$FILE" ]; then - clang-format -i -style=file $FILE - git add $FILE - fi + clang-format -i -style=file $FILE + git add $FILE done - name: Commit and create PR diff --git a/automation/script/README-extra.md b/automation/script/README-extra.md index 7fc9820677..d63c5dc161 100644 --- a/automation/script/README-extra.md +++ b/automation/script/README-extra.md @@ -2,7 +2,6 @@ # CM "script" automation -
Click here to see the table of contents. diff --git a/automation/script/module.py b/automation/script/module.py index ee06a2c7b2..d63f607710 100644 --- a/automation/script/module.py +++ b/automation/script/module.py @@ -1311,7 +1311,7 @@ def _run(self, i): r = self._call_run_deps(prehook_deps, self.local_env_keys, local_env_keys_from_meta, env, state, const, const_state, add_deps_recursive, recursion_spaces + extra_recursion_spaces, - remembered_selections, variation_tags_string, True, debug_script_tags, verbose, show_time, extra_recursion_spaces, run_state) + remembered_selections, variation_tags_string, found_cached, debug_script_tags, verbose, show_time, extra_recursion_spaces, run_state) if r['return'] > 0: return r @@ -1372,7 +1372,7 @@ def _run(self, i): r = self._call_run_deps(posthook_deps, self.local_env_keys, clean_env_keys_post_deps, env, state, const, const_state, add_deps_recursive, recursion_spaces + extra_recursion_spaces, - remembered_selections, variation_tags_string, True, debug_script_tags, verbose, show_time, extra_recursion_spaces, run_state) + remembered_selections, variation_tags_string, found_cached, debug_script_tags, verbose, show_time, extra_recursion_spaces, run_state) if r['return'] > 0: return r @@ -1383,7 +1383,7 @@ def _run(self, i): # Check chain of post dependencies on other CM scripts r = self._call_run_deps(post_deps, self.local_env_keys, clean_env_keys_post_deps, env, state, const, const_state, add_deps_recursive, recursion_spaces + extra_recursion_spaces, - remembered_selections, variation_tags_string, True, debug_script_tags, verbose, show_time, extra_recursion_spaces, run_state) + remembered_selections, variation_tags_string, found_cached, debug_script_tags, verbose, show_time, extra_recursion_spaces, run_state) if r['return'] > 0: return r @@ -1605,82 +1605,6 @@ def _run(self, i): if r['return'] > 0: return r - # Prepare common input to prepare and run script - run_script_input = { - 'path': path, - 'bat_ext': bat_ext, - 'os_info': os_info, - 'const': const, - 'state': state, - 'const_state': const_state, - 'reuse_cached': reuse_cached, - 'recursion': recursion, - 'recursion_spaces': recursion_spaces, - 'remembered_selections': remembered_selections, - 'tmp_file_run_state': self.tmp_file_run_state, - 'tmp_file_run_env': self.tmp_file_run_env, - 'tmp_file_state': self.tmp_file_state, - 'tmp_file_run': self.tmp_file_run, - 'local_env_keys': self.local_env_keys, - 'local_env_keys_from_meta': local_env_keys_from_meta, - 'posthook_deps': posthook_deps, - 'add_deps_recursive': add_deps_recursive, - 'remembered_selections': remembered_selections, - 'found_script_tags': found_script_tags, - 'variation_tags_string': variation_tags_string, - 'found_cached': False, - 'debug_script_tags': debug_script_tags, - 'verbose': verbose, - 'meta': meta, - 'self': self - } - - # Check if pre-process and detect - if str(meta.get('predeps', 'True')).lower() not in ["0", "false", "no"] and os.path.isfile( - path_to_customize_py): # possible duplicate execution - needs fix - r = utils.load_python_module( - {'path': path, 'name': 'customize'}) - if r['return'] > 0: - return r - - customize_code = r['code'] - - customize_common_input = { - 'input': i, - 'automation': self, - 'artifact': script_artifact, - 'customize': script_artifact.meta.get('customize', {}), - 'os_info': os_info, - 'recursion_spaces': recursion_spaces, - 'script_tags': script_tags, - 'variation_tags': variation_tags - } - run_script_input['customize_code'] = customize_code - run_script_input['customize_common_input'] = customize_common_input - - if repro_prefix != '': - run_script_input['repro_prefix'] = repro_prefix - if ignore_script_error: - run_script_input['ignore_script_error'] = True - if 'predeps' in dir(customize_code) and not fake_run: - - logging.debug( - recursion_spaces + - ' - Running preprocess ...') - - run_script_input['run_state'] = run_state - - ii = copy.deepcopy(customize_common_input) - ii['env'] = env - ii['state'] = state - ii['meta'] = meta - # may need to detect versions in multiple paths - ii['run_script_input'] = run_script_input - - r = customize_code.predeps(ii) - if r['return'] > 0: - return r - # Check chain of dependencies on other CM scripts if len(deps) > 0: logging.debug(recursion_spaces + @@ -1702,8 +1626,6 @@ def _run(self, i): # Clean some output files clean_tmp_files(clean_files, recursion_spaces) - # Repeated code - ''' # Prepare common input to prepare and run script run_script_input = { 'path': path, @@ -1733,7 +1655,6 @@ def _run(self, i): 'meta': meta, 'self': self } - ''' if os.path.isfile( path_to_customize_py): # possible duplicate execution - needs fix r = utils.load_python_module( diff --git a/challenge/add-derived-metrics-to-mlperf-inference/README.md b/challenge/add-derived-metrics-to-mlperf-inference/README.md new file mode 100644 index 0000000000..8302f63d68 --- /dev/null +++ b/challenge/add-derived-metrics-to-mlperf-inference/README.md @@ -0,0 +1,32 @@ +### Challenge + +Check past MLPerf inference results in [this MLCommons repository](https://github.com/mlcommons/cm4mlperf-results) +and add derived metrics such as result/No of cores, power efficiency, device cost, operational costs, etc. + +Add clock speed as a third dimension to graphs and improve Bar graph visualization. + +Read [this documentation](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/README.md) +to run reference implementations of MLPerf inference benchmarks +using the CM automation language and use them as a base for your developments. + +Check [this ACM REP'23 keynote](https://doi.org/10.5281/zenodo.8105339) to learn more about our open-source project and long-term vision. + + +### Prizes + +* *All contributors will receive 1 point for submitting valid results for 1 complete benchmark on one system.* +* *All contributors will receive an official MLCommons Collective Knowledge contributor award (see [this example](https://ctuning.org/awards/ck-award-202307-zhu.pdf)).* + + +### Organizers + +* [MLCommons](https://cKnowledge.org/mlcommons-taskforce) +* [cTuning.org](https://www.linkedin.com/company/ctuning-foundation) +* [cKnowledge.org](https://www.linkedin.com/company/cknowledge) + +### Results + +All accepted results will be publicly available in the CM format with derived metrics +in this [MLCommons repository](https://github.com/mlcommons/cm4mlperf-results), +in [MLCommons Collective Knowledge explorer](https://access.cknowledge.org/playground/?action=experiments) +and at official [MLCommons website](https://mlcommons.org). diff --git a/challenge/add-derived-metrics-to-mlperf-inference/_cm.json b/challenge/add-derived-metrics-to-mlperf-inference/_cm.json new file mode 100644 index 0000000000..cbdc212467 --- /dev/null +++ b/challenge/add-derived-metrics-to-mlperf-inference/_cm.json @@ -0,0 +1,22 @@ +{ + "alias": "add-derived-metrics-to-mlperf-inference", + "automation_alias": "challenge", + "automation_uid": "3d84abd768f34e08", + "date_close_extension": true, + "date_open": "20240204", + "points": 2, + "tags": [ + "modularize", + "optimize", + "reproduce", + "replicate", + "benchmark", + "automate", + "derived-metrics", + "mlperf-inference", + "mlperf-inference-derived-metrics" + ], + "title": "Add derived metrics to MLPerf inference benchmarks (power efficiency, results / No of cores, costs, etc)", + "trophies": true, + "uid": "c65b56d7770946ee" +} diff --git a/challenge/automate-mlperf-inference-v3.1-and-v4.0-2024/README.md b/challenge/automate-mlperf-inference-v3.1-and-v4.0-2024/README.md new file mode 100644 index 0000000000..a2059c0fe8 --- /dev/null +++ b/challenge/automate-mlperf-inference-v3.1-and-v4.0-2024/README.md @@ -0,0 +1,4 @@ +20240220: +* A prototype of a GUI to generate CM commands to run MLPerf inference benchmarks is ready: [link](https://access.cknowledge.org/playground/?action=howtorun&bench_uid=39877bb63fb54725) +* A prototype of the infrastructure to reproduce MLPerf inference benchmark results is ready: [link](https://access.cknowledge.org/playground/?action=reproduce) +* On-going efforts: https://github.com/mlcommons/ck/issues/1052 diff --git a/challenge/automate-mlperf-inference-v3.1-and-v4.0-2024/_cm.yaml b/challenge/automate-mlperf-inference-v3.1-and-v4.0-2024/_cm.yaml new file mode 100644 index 0000000000..b8b519d27f --- /dev/null +++ b/challenge/automate-mlperf-inference-v3.1-and-v4.0-2024/_cm.yaml @@ -0,0 +1,21 @@ +alias: automate-mlperf-inference-v3.1-and-v4.0-2024 +uid: f89f152fc2614240 + +automation_alias: challenge +automation_uid: 3d84abd768f34e08 + +title: Add MLCommons CM workflows and unifed interface to automate MLPerf inference v3.1 and v4.0 benchmarks (Intel, Nvidia, Qualcomm, Arm64, TPU ...) + +date_open: '20231215' +date_close: '20240315' + +hot: true + +tags: +- automate +- mlperf-inference-v3.1-and-v4.0 +- 2024 + +experiments: +- tags: mlperf-inference,v3.1 +- tags: mlperf-inference,v4.0 diff --git a/challenge/compose-high-performance-and-cost-efficient-ai-systems-based-on-mlperf-4.0-2024/README.md b/challenge/compose-high-performance-and-cost-efficient-ai-systems-based-on-mlperf-4.0-2024/README.md new file mode 100644 index 0000000000..adfbea7263 --- /dev/null +++ b/challenge/compose-high-performance-and-cost-efficient-ai-systems-based-on-mlperf-4.0-2024/README.md @@ -0,0 +1,10 @@ +This challenge is under preparation. You can read about the motivation behind this challenge in our [invited talk at MLPerf-Bench @ HPCA'24](https://doi.org/10.5281/zenodo.10786893). + +We plan to extend [MLCommons CM framework](https://github.com/mlcommons/ck) +to automatically compose high-performance and cost-efficient AI systems +based on MLPerf inference v4.0 results and [CM automation recipes](https://access.cknowledge.org/playground/?action=scripts). + +* A prototype of a GUI to generate CM commands to run MLPerf inference benchmarks is ready: [link](https://access.cknowledge.org/playground/?action=howtorun&bench_uid=39877bb63fb54725) +* A prototype of the infrastructure to reproduce MLPerf inference benchmark results is ready: [link](https://access.cknowledge.org/playground/?action=reproduce) + +Contact the [MLCommons Task Force on Automation and Reproducibility](https://github.com/mlcommons/ck/blob/master/docs/taskforce.md) for more details. diff --git a/challenge/compose-high-performance-and-cost-efficient-ai-systems-based-on-mlperf-4.0-2024/_cm.yaml b/challenge/compose-high-performance-and-cost-efficient-ai-systems-based-on-mlperf-4.0-2024/_cm.yaml new file mode 100644 index 0000000000..b1d4fe9f18 --- /dev/null +++ b/challenge/compose-high-performance-and-cost-efficient-ai-systems-based-on-mlperf-4.0-2024/_cm.yaml @@ -0,0 +1,25 @@ +alias: compose-high-performance-and-cost-efficient-ai-systems-based-on-mlperf-4.0-2024 +uid: 7c983102d89e4869 + +automation_alias: challenge +automation_uid: 3d84abd768f34e08 + +title: "Compose high-performance and cost-efficint AI systems using MLCommons' Collective Mind and MLPerf inference" + +date_open: '20240101' + +tags: +- compose +- ai +- systems +- mlperf-inference-v4.0 +- cm +- mlcommons-cm +- mlperf +- v4.0 +- performance +- energy +- cost + +experiments: +- tags: mlperf-inference,v4.0 diff --git a/challenge/connect-mlperf-inference-v3.1-with-openbenchmarking/README.md b/challenge/connect-mlperf-inference-v3.1-with-openbenchmarking/README.md new file mode 100644 index 0000000000..306341271c --- /dev/null +++ b/challenge/connect-mlperf-inference-v3.1-with-openbenchmarking/README.md @@ -0,0 +1,30 @@ +### Challenge + +Connect CM workflows to run MLPerf inference benchmarks with [OpenBenchmarking.org](https://openbenchmarking.org). + +Read [this documentation](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/README.md) +to run reference implementations of MLPerf inference benchmarks +using the CM automation language and use them as a base for your developments. + +Check [this ACM REP'23 keynote](https://doi.org/10.5281/zenodo.8105339) to learn more about our open-source project and long-term vision. + + +### Prizes + +* *All contributors will receive 1 point for submitting valid results for 1 complete benchmark on one system.* +* *All contributors will receive an official MLCommons Collective Knowledge contributor award (see [this example](https://ctuning.org/awards/ck-award-202307-zhu.pdf)).* + + + +### Organizers + +* Michael Larabel +* Grigori Fursin +* [MLCommons](https://cKnowledge.org/mlcommons-taskforce) +* [cTuning.org](https://www.linkedin.com/company/ctuning-foundation) +* [cKnowledge.org](https://www.linkedin.com/company/cknowledge) + +### Results + +Results will be available at [OpenBenchmark.org](https://openbenchmarking.org) +and [MLCommons CK playgronud](https://access.cknowledge.org/playground/?action=experiments). diff --git a/challenge/connect-mlperf-inference-v3.1-with-openbenchmarking/_cm.json b/challenge/connect-mlperf-inference-v3.1-with-openbenchmarking/_cm.json new file mode 100644 index 0000000000..c1e65aadbd --- /dev/null +++ b/challenge/connect-mlperf-inference-v3.1-with-openbenchmarking/_cm.json @@ -0,0 +1,22 @@ +{ + "alias": "connect-mlperf-inference-v3.1-with-openbenchmarking", + "automation_alias": "challenge", + "automation_uid": "3d84abd768f34e08", + "date_open": "20240101", + "date_close_extension": true, + "points": 2, + "tags": [ + "modularize", + "optimize", + "reproduce", + "replicate", + "benchmark", + "automate", + "openbenchmarking", + "mlperf-inference", + "mlperf-inference-openbenchmarking" + ], + "title": "Run MLPerf inference benchmarks using CM via OpenBenchmarking.org", + "trophies": true, + "uid": "534592626eb44efe" +} diff --git a/challenge/connect-mlperf-with-medperf/README.md b/challenge/connect-mlperf-with-medperf/README.md new file mode 100644 index 0000000000..f2f572bd48 --- /dev/null +++ b/challenge/connect-mlperf-with-medperf/README.md @@ -0,0 +1,23 @@ +### Challenge + +Evaluate models from [MLCommons MedPerf platform](https://www.medperf.org) in terms of latency, throughput, power consumption and other metrics +using MLPerf loadgen and MLCommons CM automation language. + +See the [Nature 2023 article about MedPerf](https://www.nature.com/articles/s42256-023-00652-2) +and [ACM REP'23 keynote about CM](https://doi.org/10.5281/zenodo.8105339) to learn more about these projects. + +Read [this documentation](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/README.md) +to run reference implementations of MLPerf inference benchmarks +using the CM automation language and use them as a base for your developments. + + +### Prizes + +* *All contributors will receive an official MLCommons Collective Knowledge contributor award (see [this example](https://ctuning.org/awards/ck-award-202307-zhu.pdf)).* + + +### Organizers + +* [cKnowledge.org](https://www.linkedin.com/company/cknowledge) +* [cTuning.org](https://www.linkedin.com/company/ctuning-foundation) +* [MLCommons](https://cKnowledge.org/mlcommons-taskforce) diff --git a/challenge/connect-mlperf-with-medperf/_cm.json b/challenge/connect-mlperf-with-medperf/_cm.json new file mode 100644 index 0000000000..d48d0a9fea --- /dev/null +++ b/challenge/connect-mlperf-with-medperf/_cm.json @@ -0,0 +1,26 @@ +{ + "alias": "connect-mlperf-with-medperf", + "automation_alias": "challenge", + "automation_uid": "3d84abd768f34e08", + "date_close_extension": true, + "date_open": "20240105", + "points": 2, + "tags": [ + "modularize", + "optimize", + "reproduce", + "replicate", + "benchmark", + "automate", + "medperf", + "mlperf-inference", + "mlperf-inference-medperf", + "mlperf-inference-medperf", + "mlperf-inference-medperf-v3.1", + "mlperf-inference-medperf-v3.1-2023", + "v3.1" + ], + "title": "Connect MedPerf with MLPerf and CM", + "trophies": true, + "uid": "c26d1fbf89164728" +} diff --git a/challenge/optimize-mlperf-inference-scc2023/README.md b/challenge/optimize-mlperf-inference-scc2023/README.md new file mode 100644 index 0000000000..62a4826ad2 --- /dev/null +++ b/challenge/optimize-mlperf-inference-scc2023/README.md @@ -0,0 +1,16 @@ +### CM tutorial + +https://github.com/mlcommons/ck/blob/master/docs/tutorials/scc23-mlperf-inference-bert.md + +### Challenge + +Reproduce and optimize MLPerf inference benchmarks during Student Cluster Competition at SuperComputing'23. + +See our [related challange from 2022]()https://access.cknowledge.org/playground/?action=challenges&name=repro-mlperf-inference-retinanet-scc2022). + +### Organizers + +* [MLCommons taskforce on automation and reproducibility](https://cKnowledge.org/mlcommons-taskforce) +* [cTuning foundation](https://cTuning.org) +* [cKnowledge.org](https://cKnowledge.org) + diff --git a/challenge/optimize-mlperf-inference-scc2023/_cm.json b/challenge/optimize-mlperf-inference-scc2023/_cm.json new file mode 100644 index 0000000000..021872b15a --- /dev/null +++ b/challenge/optimize-mlperf-inference-scc2023/_cm.json @@ -0,0 +1,20 @@ +{ + "alias": "optimize-mlperf-inference-scc2023", + "automation_alias": "challenge", + "automation_uid": "3d84abd768f34e08", + "date_close": "20231115", + "date_open": "20230915", + "tags": [ + "automate", + "modularize", + "reproduce", + "replicate", + "benchmark", + "mlperf", + "mlperf-inference", + "mlperf-inference-scc", + "mlperf-inference-scc-2023" + ], + "title": "Reproduce and optimize MLPerf inference v3.1 benchmarks at the Student Cluster Competition'23 at SuperComputing'23 using CM", + "uid": "ddaf594f84b14bc2" +} diff --git a/challenge/optimize-mlperf-inference-scc2024/README.md b/challenge/optimize-mlperf-inference-scc2024/README.md new file mode 100644 index 0000000000..1f9be23af2 --- /dev/null +++ b/challenge/optimize-mlperf-inference-scc2024/README.md @@ -0,0 +1,7 @@ +The [MLCommons](https://mlcommons.org), [cTuning foundation](https://cTuning.org) and [cKnowledge.org](https://cKnowledge.org) +are preparing a unified interface to run MLPerf inference benchmark at the Student Cluster Competition'24. + +See [the CM-MLPerf tutorial for SCC'23](https://github.com/mlcommons/ck/blob/master/docs/tutorials/scc23-mlperf-inference-bert.md). +Note that the MLPerf model will change in SCC'24 - please stay tuned for more details! + +See https://sc24.supercomputing.org/students/student-cluster-competition for more details about SCC. diff --git a/challenge/optimize-mlperf-inference-scc2024/_cm.json b/challenge/optimize-mlperf-inference-scc2024/_cm.json new file mode 100644 index 0000000000..ab75aa27a6 --- /dev/null +++ b/challenge/optimize-mlperf-inference-scc2024/_cm.json @@ -0,0 +1,19 @@ +{ + "alias": "optimize-mlperf-inference-scc2024", + "automation_alias": "challenge", + "automation_uid": "3d84abd768f34e08", + "date_open": "20241001", + "tags": [ + "automate", + "modularize", + "reproduce", + "replicate", + "benchmark", + "mlperf", + "mlperf-inference", + "mlperf-inference-scc", + "mlperf-inference-scc-2024" + ], + "title": "Run and optimize the MLPerf inference benchmark using CM at the Student Cluster Competition'24 at SuperComputing'24", + "uid": "f7fcba4c43ab4412" +} diff --git a/challenge/optimize-mlperf-inference-v2.1-2022/README.md b/challenge/optimize-mlperf-inference-v2.1-2022/README.md new file mode 100644 index 0000000000..d0ac7cf15b --- /dev/null +++ b/challenge/optimize-mlperf-inference-v2.1-2022/README.md @@ -0,0 +1,18 @@ +### Challenge + +Prepare, optimize and reproduce MLPerf inference v2.1 benchmarks across diverse implementations, software and hardware +using the [MLCommons CK framework](https://github.com/mlcommons/ck). + +### Organizers + +* [MLCommons taskforce on automation and reproducibility](https://cKnowledge.org/mlcommons-taskforce) +* [cTuning foundation](https://cTuning.org) +* [OctoML](https://octoml.ai) + +### Status + +This challenge has been successfully completed. + +### Results + +Results are available [here](https://access.cknowledge.org/playground/?action=experiments&tags=mlperf-inference,v2.1). diff --git a/challenge/optimize-mlperf-inference-v2.1-2022/_cm.json b/challenge/optimize-mlperf-inference-v2.1-2022/_cm.json new file mode 100644 index 0000000000..31cb5dffd2 --- /dev/null +++ b/challenge/optimize-mlperf-inference-v2.1-2022/_cm.json @@ -0,0 +1,27 @@ +{ + "alias": "optimize-mlperf-inference-v2.1-2022", + "automation_alias": "challenge", + "automation_uid": "3d84abd768f34e08", + "date_close": "20220901", + "date_open": "20220701", + "experiments": [ + { + "tags": "mlperf-inference,v2.1" + } + ], + "tags": [ + "modularize", + "optimize", + "reproduce", + "replicate", + "automate", + "benchmark", + "mlperf", + "mlperf-inference", + "mlperf-inference-v2.1", + "mlperf-inference-v2.1-2022", + "v2.1" + ], + "title": "Run and optimize MLPerf inference v2.1 benchmarks", + "uid": "2e13154b7fbb412d" +} diff --git a/challenge/optimize-mlperf-inference-v3.0-2023/README.md b/challenge/optimize-mlperf-inference-v3.0-2023/README.md new file mode 100644 index 0000000000..da6decc8c7 --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.0-2023/README.md @@ -0,0 +1,74 @@ +### Challenge + +Run MLPerf inference v3.0 benchmarks out-of-the-box across diverse implementations, software and hardware +using the [MLCommons CM automation language](https://github.com/mlcommons/ck) +and submit public results to the MLPerf inference v3.0 via [cTuning foundation](https://cTuning.org). + +* [GUI to run MLPerf inference benchmarks](https://cknowledge.org/mlperf-inference-gui) +* [GUI to prepare MLPerf inference submissions](https://cknowledge.org/mlperf-inference-submission-gui) + +### Organizers + +* [MLCommons Task Force on Automation and Reproducibility](https://github.com/mlcommons/ck/blob/master/docs/taskforce.md) +* [cTuning foundation](https://cTuning.org) +* [cKnowledge](https://cKnowledge.org) + +### Status + +This challenge has been successfully completed. + +### Results + +Official results: +* https://github.com/mlcommons/inference_results_v3.0/tree/main/closed/cTuning +* https://github.com/mlcommons/inference_results_v3.0/tree/main/open/cTuning + +Results in the MLCommons CK/CM format: +* https://github.com/mlcommons/cm4mlperf-results + +Visualization and comparison with derived metrics: +* [MLCommons Collective Knowledge Playground](https://access.cknowledge.org/playground/?action=experiments&tags=mlperf-inference,v3.0). + +### The outcome + +We are very pleased to announce the successful outcome of the 1st +community challenge to run, reproduce and optimize MLPerf inference v3.0 +benchmarks: our MLCommons CK/CM workflow automation framework has helped +to prepare more than 80% of all submission results including 98% of power +results with very diverse technology and benchmark implementations from +Neural Magic, Qualcomm, cKnowledge Ltd, KRAI, cTuning foundation, Dell +Technologies, Hewlett Packard Enterprise, Lenovo, Hugging Face, NVIDIA, +Intel Corporation, AMD and Apple across diverse CPUs, GPUs and DSPs with +PyTorch, ONNX, QAIC, TF/TFLite, TVM and TensorRT using popular cloud +providers (GCP, AWS, Azure) and individual servers and edge devices +provided by our [volunteers](https://access.cknowledge.org/playground/?action=contributors). + +You can now see and compare all MLPerf inference results v3.0, v2.1 and +v2.0 online together with reproducibility reports including the +[MLPerf BERT model](https://huggingface.co/ctuning/mlperf-inference-bert-onnx-fp32-squad-v1.1) +from the [Hugging Face Zoo](https://www.linkedin.com/company/huggingface/?lipi=urn%3Ali%3Apage%3Ad_flagship3_pulse_read%3B4CDUdiVxT7WqLJNXO%2BI5bQ%3D%3D) +on [Nvidia Jetson Orin platform](https://github.com/mlcommons/ck/blob/master/cm-mlops/challenge/optimize-mlperf-inference-v3.0-2023/docs/setup-nvidia-jetson-orin.md#reproducing-the-nvidia-jetson-agx-orin-submission). +You can even create your own derived metrics (such as performance per Watt), +provide your own constraints using this [MLCommons repository](https://github.com/mlcommons/cm_inference_results) and visualize +them as shown in [this example](https://access.cknowledge.org/playground/?action=experiments&name=e472410ee67c41f9&x=Result&y=Power_Efficiency&filter=result[%27Result_Power%27]%3C35&derived_metrics=result%5B%27Power_Efficiency%27%5D%3D1000%2Fresult%5B%27Result_Power%27%5D&c=accelerator_model_name&axis_key_s=version). + +Additional thanks to [Michael Goin](https://www.linkedin.com/in/michael-goin) +from [Neural Magic](https://www.linkedin.com/company/neural-magic/?lipi=urn%3Ali%3Apage%3Ad_flagship3_pulse_read%3B4CDUdiVxT7WqLJNXO%2BI5bQ%3D%3D), our international +students including [Himanshu Dutta](https://www.linkedin.com/in/ACoAACpPCiMB7zUNStsqBmaOCtd100a7wXBGu_M?lipi=urn%3Ali%3Apage%3Ad_flagship3_pulse_read%3B4CDUdiVxT7WqLJNXO%2BI5bQ%3D%3D), +[Aditya Kumar Shaw](https://www.linkedin.com/in/ACoAACJ3ikUBjuHqi35ibm8CG6IEYv-v_VsobIs?lipi=urn%3Ali%3Apage%3Ad_flagship3_pulse_read%3B4CDUdiVxT7WqLJNXO%2BI5bQ%3D%3D), +Sachin Mudaliyar, [Thomas Zhu](https://www.linkedin.com/in/hanwen-zhu-483614189), +and all [CK/CM users and contributors](https://github.com/mlcommons/ck/blob/master/CONTRIBUTING.md) for helping us to +validate, use and improve this open-source technology to automate +benchmarking and optimization of AI/ML systems in terms of performance, +accuracy, power and costs! We are also grateful to [HiPEAC](https://www.linkedin.com/company/hipeac) +and [OctoML](https://www.linkedin.com/company/octoml) for +sponsoring initial development and Peter Mattson, David Kanter, Vijay +Janapa Reddi and Alexandros Karargyris for fruitful discussions. + + +### Dissemination + +* [Forbes article](https://www.forbes.com/sites/karlfreund/2023/04/05/nvidia-performance-trounces-all-competitors-who-have-the-guts-to-submit-to-mlperf-inference-30/?sh=3c38d2866676) +* [ZDNet article](https://www.zdnet.com/article/nvidia-dell-qualcomm-speed-up-ai-results-in-latest-benchmark-tests) +* [LinkedIn article from Grigori Fursin (MLCommons Task Force co-chair)]( https://www.linkedin.com/pulse/announcing-my-new-project-reproducible-optimization-co-design-fursin ) +* [Linkedin article from Arjun Suresh (MLCommons Task Force co-chair)](https://www.linkedin.com/posts/arjunsuresh_nvidia-performance-trounces-all-competitors-activity-7049500972275929088-nnnx?utm_source=share&utm_medium=member_desktop) diff --git a/challenge/optimize-mlperf-inference-v3.0-2023/_cm.json b/challenge/optimize-mlperf-inference-v3.0-2023/_cm.json new file mode 100644 index 0000000000..0baf3cfeea --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.0-2023/_cm.json @@ -0,0 +1,27 @@ +{ + "alias": "optimize-mlperf-inference-v3.0-2023", + "automation_alias": "challenge", + "automation_uid": "3d84abd768f34e08", + "date_close": "20230301", + "date_open": "20230201", + "experiments": [ + { + "tags": "mlperf-inference,v3.0" + } + ], + "tags": [ + "modularize", + "optimize", + "reproduce", + "replicate", + "automate", + "benchmark", + "mlperf", + "mlperf-inference", + "mlperf-inference-v3.0", + "mlperf-inference-v3.0-2023", + "v3.0" + ], + "title": "Run and optimize MLPerf inference v3.0 benchmarks", + "uid": "57cbc3384d7640f9" +} diff --git a/challenge/optimize-mlperf-inference-v3.0-2023/docs/crowd-benchmark-mlperf-bert-inference-cuda.md b/challenge/optimize-mlperf-inference-v3.0-2023/docs/crowd-benchmark-mlperf-bert-inference-cuda.md new file mode 100644 index 0000000000..f6a17979ca --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.0-2023/docs/crowd-benchmark-mlperf-bert-inference-cuda.md @@ -0,0 +1,281 @@ +# Crowd-benchmarking MLPerf BERT inference + +
+Click here to see the table of contents. + +* [Crowd-benchmarking MLPerf BERT inference](#crowd-benchmarking-mlperf-bert-inference) +* [System preparation](#system-preparation) + * [Minimal system requirements](#minimal-system-requirements) + * [Install CM (CK2) automation meta-framework](#install-cm-ck2-automation-meta-framework) + * [Pull CM repository with portable automation recipes](#pull-cm-repository-with-portable-automation-recipes) + * [Detect or install CUDA](#detect-or-install-cuda) + * [Test CUDA installation](#test-cuda-installation) + * [Install Python virtual environment](#install-python-virtual-environment) + * [Detect or install cuDNN](#detect-or-install-cudnn) + * [Detect or install TensorRT](#detect-or-install-tensorrt) + * [Run MLPerf inference benchmark with BERT](#run-mlperf-inference-benchmark-with-bert) + * [Try ONNX runtime backend](#try-onnx-runtime-backend) + * [Do a test run to detect and record the system performance](#do-a-test-run-to-detect-and-record-the-system-performance) + * [Do a full accuracy run for all the scenarios](#do-a-full-accuracy-run-for-all-the-scenarios) + * [Do a full performance run for all the scenarios](#do-a-full-performance-run-for-all-the-scenarios) + * [Populate the README files](#populate-the-readme-files) + * [Generate MLPerf submission tree](#generate-mlperf-submission-tree) + * [Push the results to GitHub repo](#push-the-results-to-github-repo) + * [Try PyTorch backend](#try-pytorch-backend) + * [Test composable ML benchmark with other models, data sets, frameworks and platforms](#test-composable-ml-benchmark-with-other-models-data-sets-frameworks-and-platforms) +* [The next steps](#the-next-steps) + +
+ + +This is a pilot community project to collaboratively run MLPerf BERT inference benchmark +across diverse platforms provided by volunteers similar to [SETI@home](https://setiathome.berkeley.edu/). +However, instead of searching for extraterrestrial intelligence, we are +searching for optimal software/hardware combination to run various AI and ML workloads +in terms of performance, accuracy, power and costs ... + +This benchmark is composed from [portable and reusable automation recipes](https://github.com/mlcommons/ck/blob/master/docs/list_of_scripts.md) +developed by [MLCommons taskforce on automation and reproducibility](https://github.com/mlcommons/ck/blob/master/docs/taskforce.md) +to modularize complex AI and ML Systems and automate their benchmarking, design space exploration, optimization and deployment +across continuously evolving software, hardware, models and data. + +*If you submit your results before 1pm PST on Friday 3rd, 2023, + they will be accepted for the official MLPerf inference v3.0 submission round + and your name acknowledged in the notes!* + + +# System preparation + +## Minimal system requirements + +* CPU: any x86-64 or Arm64 based machine +* GPU: any relatively modern Nvidia GPU with 8GB+ memory and CUDA 11.4+ +* OS: we have tested this automation on Ubuntu 20.04, Ubuntu 22.04 and Debian 10 +* Disk space: ~10GB +* Python: 3.8+ +* All other dependencies (artifacts and tools) will be installed by the CM meta-framework aka (CK2) + +## Install CM (CK2) automation meta-framework + +Follow [this guide](https://github.com/mlcommons/ck/blob/master/docs/installation.md) to install the [MLCommons CM framework](https://github.com/mlcommons/ck) +(the 2nd generation on the Collective Mind framework) on your system. + +## Pull CM repository with portable automation recipes + +Pull MLCommons CM repository with [cross-platform CM scripts](https://github.com/mlcommons/ck/blob/master/docs/list_of_scripts.md) +supporting portable MLOps and DevOps: + +```bash +cm pull repo mlcommons@ck --checkout=681547519f4d9a8991d992d1300c90cfde06e9b9 +``` + +CM pulls all such repositories into the `$HOME/CM` directory to search for portable CM automation recipes and artifacts. + +We use the unified CM CLI & Python API of [portable and reusable CM scripts](https://github.com/mlcommons/ck/blob/master/docs/list_of_scripts.md) +to compose portable automation pipelines (also implemented as CM scripts) that can automatically detect or install all necessary artifacts (tools, models, datasets, libraries, etc) +required to run a given software project such as the MLPerf inference benchmark. + +These CM scripts simply wrap existing native scripts and tools as simple micro-services +with a human-readable CLI and simple Python API to be able to easily connect them together +and run on any platform in a unified way. + +## Detect or install CUDA + +Run the following CM script: +```bash +cm run script "get cuda" --out=json +``` + +If CUDA is automatically detected, it will be registered in the CM cache: +```bash +cm show cache --tags=get,cuda +``` + +Otherwise, this script will attempt to download and install the latest CUDA +from Nvidia website. + +Please report any issue with CM scripts [here](https://github.com/mlcommons/ck/issues). + +### Test CUDA installation + +You can test if CUDA toolkit and driver was detected or installed successfully using the following command: +```bash +cm run script "get cuda-devices" +``` + +You should see similar output: +```txt +Checking compiler version ... + +nvcc: NVIDIA (R) Cuda compiler driver +Copyright (c) 2005-2022 NVIDIA Corporation +Built on Wed_Sep_21_10:33:58_PDT_2022 +Cuda compilation tools, release 11.8, V11.8.89 +Build cuda_11.8.r11.8/compiler.31833905_0 + +Compiling program ... + +Running program ... + + - Running postprocess ... +GPU Device ID: 0 +GPU Name: Tesla K80 +GPU compute capability: 3.7 +CUDA driver version: 11.4 +CUDA runtime version: 11.8 +Global memory: 11997020160 +Max clock rate: 823.500000 MHz +Total amount of shared memory per block: 49152 +Total number of registers available per block: 65536 +Warp size: 32 +Maximum number of threads per multiprocessor: 2048 +Maximum number of threads per block: 1024 +Max dimension size of a thread block X: 1024 +Max dimension size of a thread block Y: 1024 +Max dimension size of a thread block Z: 64 +Max dimension size of a grid size X: 2147483647 +Max dimension size of a grid size Y: 65535 +Max dimension size of a grid size Z: 65535 + + - running time of script "get,cuda-devices": 4.16 sec. + +``` + +## Install Python virtual environment + +```bash +cm run script "get sys-utils-cm" --quiet + +cm run script "install python-venv" --name=mlperf-cuda +``` + +If you want to install specific version of Python use the following command: +```bash +cm run script "install python-venv" --version=3.10.8 --name=mlperf-cuda +``` + +## Detect or install cuDNN + +```bash +cm run script "get cudnn" +``` + +If cuDNN is not detected on your system, you can download a TAR file +from [Nvidia website](https://developer.nvidia.com/cudnn) and then use the same CM script +to install it as follows: +```bash +cm run script "get cudnn" --tar_file= +``` + +We have tested this project with the following tar file `cudnn-linux-x86_64-8.7.0.84_cuda11-archive.tar.xz`. + +## Detect or install TensorRT + +```bash +cm run script "get tensorrt" +``` +If TensorRT is not detected on your system, you can download a TAR file +from [Nvidia website](https://developer.nvidia.com/tensorrt) and then use the same CM script +to install it as follows: +```bash +cm run script "get tensorrt" --tar_file= +``` + +We have tested this project with the following tar file `TensorRT-8.5.1.7.Linux.x86_64-gnu.cuda-11.8.cudnn8.6.tar.gz`. + + +## Run MLPerf inference benchmark with BERT + +### Try ONNX runtime backend + +#### Do a test run to detect and record the system performance + +```bash +cm run script --tags=generate-run-cmds,inference,_find-performance,_all-scenarios \ + --adr.python.name=mlperf-cuda --model=bert-99 --implementation=reference \ + --device=cuda --backend=onnxruntime --quiet +``` + +#### Do a full accuracy run for all the scenarios + +```bash +cm run script --tags=generate-run-cmds,inference,_accuracy-only,_all-scenarios \ + --adr.python.name=mlperf-cuda --model=bert-99 --device=cuda \ + --implementation=reference --backend=onnxruntime --quiet \ + --execution-mode=valid --results_dir=$HOME/inference_3.0_results +``` + +#### Do a full performance run for all the scenarios + +```bash +cm run script --tags=generate-run-cmds,inference,_performance-only,_all-scenarios \ + --adr.python.name=mlperf-cuda --model=bert-99 --device=cuda \ + --implementation=reference --backend=onnxruntime --quiet \ + --execution-mode=valid --results_dir=$HOME/inference_3.0_results +``` + +#### Populate the README files + +```bash +cm run script --tags=generate-run-cmds,inference,_populate-readme,_all-scenarios \ + --adr.python.name=mlperf-cuda --model=bert-99 --device=cuda \ + --implementation=reference --backend=onnxruntime --quiet \ + --execution-mode=valid --results_dir=$HOME/inference_3.0_results +``` + +#### Generate MLPerf submission tree + +We should use the master branch of MLCommons inference repo for the submission checker. +You can use `--hw_note_extra` option to add your name to the notes. + +```bash +cm run script --tags=generate,inference,submission \ + --results_dir=$HOME/inference_3.0_results/valid_results \ + --adr.python.name=mlperf-cuda \ + --device=cuda --submission_dir=$HOME/inference_submission_tree --clean \ + --run-checker --submitter=cTuning --adr.inference-src.version=master + --hw_notes_extra="Result taken by " --quiet +``` + +#### Push the results to GitHub repo + +First create a fork of [this GitHub repo with aggregated results](https://github.com/ctuning/mlperf_inference_submissions_v3.0). +Then run the following command after replacing `--repo_url` with your fork URL. + +```bash +cm run script --tags=push,github,mlperf,inference,submission \ + --submission_dir=$HOME/inference_submission_tree \ + --adr.python.name=mlperf-cuda \ + --repo_url=https://github.com/ctuning/mlperf_inference_submissions_v3.0 \ + --commit_message="Bert crowd-results added" +``` + +Create a PR to the [GitHub repo with aggregated results](https://github.com/ctuning/mlperf_inference_submissions_v3.0/) + + + +### Try PyTorch backend + +You can run the same commands with PyTorch by rerunning all above commands and replacing `--backend=onnxruntime` with `--backend=pytorch`. + +For example, + +```bash +cm run script --tags=generate-run-cmds,inference,_accuracy-only,_all-scenarios \ + --adr.python.name=mlperf-cuda --model=bert-99 --device=cuda \ + --implementation=reference --backend=pytorch --execution-mode=valid \ + --results_dir=$HOME/inference_3.0_results --quiet +``` + + +## Test composable ML benchmark with other models, data sets, frameworks and platforms + +* [GUI to prepare CM command line and run benchmark](https://cknowledge.org/mlperf-inference-gui) +* [GUI to compare performance, accuracy, power and costs of ML/SW/HW combinations](https://cKnowledge.org/cm-gui-graph) + + +# The next steps + +Please follow the [cTuning foundation](https://cTuning.org), [cKnowledge.org](https://cKnowledge.org) +and [MLCommons](https://mlcommons.org). + diff --git a/challenge/optimize-mlperf-inference-v3.0-2023/docs/generate-3d-unet-submission.md b/challenge/optimize-mlperf-inference-v3.0-2023/docs/generate-3d-unet-submission.md new file mode 100644 index 0000000000..38f69a5d53 --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.0-2023/docs/generate-3d-unet-submission.md @@ -0,0 +1,59 @@ +## Setup +Please follow the MLCommons CK [installation guide](https://github.com/mlcommons/ck/blob/master/docs/installation.md) to install CM. +Download the ck repo to get the CM script for MLPerf submission + +``` +cm pull repo mlcommons@ck --checkout=681547519f4d9a8991d992d1300c90cfde06e9b9 +``` + +## Run Commands + +3d-unet has two variants - `3d-unet-99` and `3d-unet-99.9` where the `99` and `99.9` specifies the required accuracy constraint with respect to the reference floating point model. Both models can be submitter under edge as well as datacenter category. + +Since 3d-unet is one of the slowest running model, we are only running it using nvidia-implementation where the model is quantized and run on TensorRT backend on Nvidia GPU. + +For `3d-unet-99.9` runs, simply replace `3d-unet-99` with `3d-unet-99.9`. + +### TensorRT backend + +#### Do a test run to detect and record the system performance + +``` +cm run script --tags=generate-run-cmds,inference,_find-performance,_all-scenarios \ +--model=3d-unet-99 --implementation=nvidia-original --device=cuda --backend=tensorrt \ +--category=edge --division=open --quiet +``` +* Use `--category=datacenter` to run datacenter scenarios +* Use `--division=closed` to run all scenarios for the closed division (compliance tests are skipped for `_find-performance` mode) + +#### Do a full accuracy and performance runs for all the scenarios + +``` +cm run script --tags=generate-run-cmds,inference,_all-modes,_all-scenarios \ +--model=3d-unet-99 --device=cuda --implementation=nvidia-original --backend=tensorrt \ +--execution-mode=valid --results_dir=$HOME/inference_3.0_results \ +--category=edge --division=open --quiet +``` + +* Use `--power=yes` for measuring power. It is ignored for accuracy and compliance runs +* Use `--division=closed` to run all scenarios for the closed division including the compliance tests +* `--offline_target_qps`, `--server_target_qps` and `--singlestream_target_latency` can be used to override the determined performance numbers + +#### Populate the README files +``` +cm run script --tags=generate-run-cmds,inference,_populate-readme,_all-scenarios \ +--model=3d-unet-99 --device=cuda --implementation=nvidia-original --backend=tensorrt \ +--execution-mode=valid --results_dir=$HOME/inference_3.0_results \ +--category=edge --division=open --quiet +``` + +#### Generate actual submission tree + +Here, we are copying the performance and accuracy log files (compliance logs also in the case of closed division) from the results directory to the submission tree following the [directory structure required by MLCommons Inference](https://github.com/mlcommons/policies/blob/master/submission_rules.adoc#inference-1). After the submission tree is generated, [accuracy truncate script](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/truncate-mlperf-inference-accuracy-log) is called to truncate accuracy logs and then the [submission checker](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/run-mlperf-inference-submission-checker) is called to validate the generated submission tree. + +We should use the master branch of MLCommons inference repo for the submission checker. You can use `--hw_note_extra` option to add your name to the notes. +``` +cm run script --tags=generate,inference,submission --results_dir=$HOME/inference_3.0_results/valid_results \ +--device=cpu --submission_dir=$HOME/inference_submission_tree --clean --run-checker --submitter=cTuning +--adr.inference-src.version=master --hw_notes_extra="Result taken by NAME" --quiet +``` diff --git a/challenge/optimize-mlperf-inference-v3.0-2023/docs/generate-bert-submission.md b/challenge/optimize-mlperf-inference-v3.0-2023/docs/generate-bert-submission.md new file mode 100644 index 0000000000..8aebb068f0 --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.0-2023/docs/generate-bert-submission.md @@ -0,0 +1,80 @@ +## Setup +Please follow the MLCommons CK [installation guide](https://github.com/mlcommons/ck/blob/master/docs/installation.md) to install CM. +Download the ck repo to get the CM script for MLPerf submission + +``` +cm pull repo mlcommons@ck --checkout=681547519f4d9a8991d992d1300c90cfde06e9b9 +``` + +## Run Commands + +Bert has two variants - `bert-99` and `bert-99.9` where the `99` and `99.9` specifies the required accuracy constraint with respect to the reference floating point model. `bert-99.9` model is applicable only on a datacenter system. + +On edge category `bert-99` has Offline and SingleStream scenarios and in datacenter category both `bert-99` and `bert-99.9` have Offline and Server scenarios. The below commands are assuming an edge category system. + +### Onnxruntime backend + +#### Do a test run to detect and record the system performance + +``` +cm run script --tags=generate-run-cmds,inference,_find-performance,_all-scenarios \ +--model=bert-99 --implementation=reference --device=cpu --backend=onnxruntime \ +--category=edge --division=open --quiet +``` +* Use `--device=cuda` to run the inference on Nvidia GPU +* Use `--division=closed` to run all scenarios for the closed division including the compliance tests +* Use `--category=datacenter` to run datacenter scenarios + +#### Do a full accuracy and performance runs for all the scenarios + +``` +cm run script --tags=generate-run-cmds,inference,_all-modes,_all-scenarios \ +--model=bert-99 --device=cpu --implementation=reference --backend=onnxruntime \ +--execution-mode=valid --results_dir=$HOME/inference_3.0_results \ +--category=edge --division=open --quiet +``` + +* Use `--power=yes` for measuring power. It is ignored for accuracy and compliance runs +* Use `--division=closed` to run all scenarios for the closed division including the compliance tests +* `--offline_target_qps`, `--server_target_qps` and `--singlestream_target_latency` can be used to override the determined performance numbers + +#### Populate the README files +``` +cm run script --tags=generate-run-cmds,inference,_populate-readme,_all-scenarios \ +--model=bert-99 --device=cpu --implementation=reference --backend=onnxruntime \ +--execution-mode=valid --results_dir=$HOME/inference_3.0_results \ +--category=edge --division=open --quiet +``` + +#### Generate actual submission tree + +Here, we are copying the performance and accuracy log files (compliance logs also in the case of closed division) from the results directory to the submission tree following the [directory structure required by MLCommons Inference](https://github.com/mlcommons/policies/blob/master/submission_rules.adoc#inference-1). After the submission tree is generated, [accuracy truncate script](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/truncate-mlperf-inference-accuracy-log) is called to truncate accuracy logs and then the [submission checker](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/run-mlperf-inference-submission-checker) is called to validate the generated submission tree. + +We should use the master branch of MLCommons inference repo for the submission checker. You can use `--hw_note_extra` option to add your name to the notes. +``` +cm run script --tags=generate,inference,submission --results_dir=$HOME/inference_3.0_results/valid_results \ +--device=cpu --submission_dir=$HOME/inference_submission_tree --clean --run-checker --submitter=cTuning +--adr.inference-src.version=master --hw_notes_extra="Result taken by NAME" --quiet +``` + + +## Tensorflow backend + +Same commands as for `onnxruntime` should work by replacing `backend=onnxruntime` with `--backend=tf`. For example, + +``` +cm run script --tags=generate-run-cmds,inference,_accuracy-only,_all-scenarios \ +--model=bert-99 --device=cpu --implementation=reference --backend=tf --execution-mode=valid \ +--results_dir=$HOME/inference_3.0_results --quiet +``` + +## Pytorch backend + +Same commands as for `onnxruntime` should work by replacing `backend=onnxruntime` with `--backend=pytorch`. For example, + +``` +cm run script --tags=generate-run-cmds,inference,_accuracy-only,_all-scenarios \ +--model=bert-99 --device=cpu --implementation=reference --backend=pytorch \ +--execution-mode=valid --results_dir=$HOME/inference_3.0_results --quiet +``` + diff --git a/challenge/optimize-mlperf-inference-v3.0-2023/docs/generate-resnet50-submission.md b/challenge/optimize-mlperf-inference-v3.0-2023/docs/generate-resnet50-submission.md new file mode 100644 index 0000000000..6d6ba275fd --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.0-2023/docs/generate-resnet50-submission.md @@ -0,0 +1,82 @@ +## Setup +Please follow the MLCommons CK [installation guide](https://github.com/mlcommons/ck/blob/master/docs/installation.md) to install CM. +Download the ck repo to get the CM script for MLPerf submission + +``` +cm pull repo mlcommons@ck --checkout=681547519f4d9a8991d992d1300c90cfde06e9b9 +``` + +## Run Commands + +We need to get imagenet full dataset to make image-classification submissions for MLPerf inference. Since this dataset is not publicly available via a URL please follow the instructions given [here](https://github.com/mlcommons/ck/blob/master/cm-mlops/script/get-dataset-imagenet-val/README-extra.md) to download the dataset and register in CM. + +On edge category ResNet50 has Offline, SingleStream and MultiStream scenarios and in datacenter category it has Offline and Server scenarios. The below commands are assuming an edge category system. + +### Onnxruntime backend + +#### Do a test run to detect and record the system performance + +``` +cm run script --tags=generate-run-cmds,inference,_find-performance,_all-scenarios \ +--model=resnet50 --implementation=reference --device=cpu --backend=onnxruntime \ +--category=edge --division=open --quiet +``` +* Use `--device=cuda` to run the inference on Nvidia GPU +* Use `--division=closed` to run all scenarios for the closed division (compliance tests are skipped for `_find-performance` mode) +* Use `--category=datacenter` to run datacenter scenarios + +#### Do a full accuracy and performance runs for all the scenarios + +``` +cm run script --tags=generate-run-cmds,inference,_all-modes,_all-scenarios --model=resnet50 \ +--device=cpu --implementation=reference --backend=onnxruntime \ +--execution-mode=valid --results_dir=$HOME/inference_3.0_results \ +--category=edge --division=open --quiet +``` + +* Use `--power=yes` for measuring power. It is ignored for accuracy and compliance runs +* Use `--division=closed` to run all scenarios for the closed division including the compliance tests +* `--offline_target_qps`, `--server_target_qps`, `--singlestream_target_latency` and `multistream_target_latency` can be used to override the determined performance numbers + +#### Populate the README files +``` +cm run script --tags=generate-run-cmds,inference,_populate-readme,_all-scenarios \ +--model=resnet50 --device=cpu --implementation=reference --backend=onnxruntime \ +--execution-mode=valid --results_dir=$HOME/inference_3.0_results \ +--category=edge --division=open --quiet +``` + +#### Generate actual submission tree + +Here, we are copying the performance and accuracy log files (compliance logs also in the case of closed division) from the results directory to the submission tree following the [directory structure required by MLCommons Inference](https://github.com/mlcommons/policies/blob/master/submission_rules.adoc#inference-1). After the submission tree is generated, [accuracy truncate script](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/truncate-mlperf-inference-accuracy-log) is called to truncate accuracy logs and then the [submission checker](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/run-mlperf-inference-submission-checker) is called to validate the generated submission tree. + +We should use the master branch of MLCommons inference repo for the submission checker. You can use `--hw_note_extra` option to add your name to the notes. +``` +cm run script --tags=generate,inference,submission --results_dir=$HOME/inference_3.0_results/valid_results \ +--submission_dir=$HOME/inference_submission_tree --clean \ +--run-checker --submitter=cTuning --adr.inference-src.version=master \ +--hw_notes_extra="Result taken by NAME" --quiet +``` + + +## Tensorflow backend + +Same commands as for `onnxruntime` should work by replacing `backend=onnxruntime` with `--backend=tf`. For example, + +``` +cm run script --tags=generate-run-cmds,inference,_all-modes,_all-scenarios \ +--model=resnet50 --device=cpu --implementation=reference --backend=tf \ +--execution-mode=valid --results_dir=$HOME/inference_3.0_results \ +--category=edge --division=open --quiet +``` + +## TVM backend + +Same commands as for `onnxruntime` should work by replacing `backend=onnxruntime` with `--backend=tvm-onnx`. (Only `--device=cpu` is currently supported for TVM) For example, + +``` +cm run script --tags=generate-run-cmds,inference,_all-modes,_all-scenarios \ +--model=resnet50 --device=cpu --implementation=reference --backend=tvm-onnx \ +--execution-mode=valid --results_dir=$HOME/inference_3.0_results \ +--category=edge --division=open --quiet +``` diff --git a/challenge/optimize-mlperf-inference-v3.0-2023/docs/generate-retinanet-submission.md b/challenge/optimize-mlperf-inference-v3.0-2023/docs/generate-retinanet-submission.md new file mode 100644 index 0000000000..4eedba9f31 --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.0-2023/docs/generate-retinanet-submission.md @@ -0,0 +1,67 @@ +## Setup +Please follow the MLCommons CK [installation guide](https://github.com/mlcommons/ck/blob/master/docs/installation.md) to install CM. +Download the ck repo to get the CM script for MLPerf submission + +``` +cm pull repo mlcommons@ck --checkout=681547519f4d9a8991d992d1300c90cfde06e9b9 +``` + +## Run Commands + + +### Onnxruntime backend + +#### Do a test run to detect and record the system performance + +``` +cm run script --tags=generate-run-cmds,inference,_find-performance,_all-scenarios \ +--model=retinanet --implementation=reference --device=cpu --backend=onnxruntime \ +--category=edge --division=open --quiet +``` +* Use `--device=cuda` to run the inference on Nvidia GPU +* Use `--division=closed` to run all scenarios for the closed division (compliance tests are skipped for `_find-performance` mode) +* Use `--category=datacenter` to run datacenter scenarios + +#### Do a full accuracy and performance runs for all the scenarios + +``` +cm run script --tags=generate-run-cmds,inference,_all-modes,_all-scenarios \ +--model=retinanet --device=cpu --implementation=reference --backend=onnxruntime \ +--execution-mode=valid --results_dir=$HOME/inference_3.0_results \ +--category=edge --division=open --quiet +``` + +* Use `--power=yes` for measuring power. It is ignored for accuracy and compliance runs +* Use `--division=closed` to run all scenarios for the closed division including the compliance tests +* `--offline_target_qps`, `--server_target_qps`, `--singlestream_target_latency` and `multistream_target_latency` can be used to override the determined performance numbers + +#### Populate the README files +``` +cm run script --tags=generate-run-cmds,inference,_populate-readme,_all-scenarios \ +--model=retinanet --device=cpu --implementation=reference --backend=onnxruntime \ +--execution-mode=valid --results_dir=$HOME/inference_3.0_results \ +--category=edge --division=open --quiet +``` + +#### Generate actual submission tree + +Here, we are copying the performance and accuracy log files (compliance logs also in the case of closed division) from the results directory to the submission tree following the [directory structure required by MLCommons Inference](https://github.com/mlcommons/policies/blob/master/submission_rules.adoc#inference-1). After the submission tree is generated, [accuracy truncate script](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/truncate-mlperf-inference-accuracy-log) is called to truncate accuracy logs and then the [submission checker](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/run-mlperf-inference-submission-checker) is called to validate the generated submission tree. + +We should use the master branch of MLCommons inference repo for the submission checker. You can use `--hw_note_extra` option to add your name to the notes. +``` +cm run script --tags=generate,inference,submission --results_dir=$HOME/inference_3.0_results/valid_results \ +--device=cpu --submission_dir=$HOME/inference_submission_tree --clean --run-checker --submitter=cTuning +--adr.inference-src.version=master --hw_notes_extra="Result taken by NAME" --quiet +``` + + +## Pytorch backend + +Same commands as for `onnxruntime` should work by replacing `backend=onnxruntime` with `--backend=pytorch`. For example, + +``` +cm run script --tags=generate-run-cmds,inference,_all-modes,_all-scenarios \ +--model=retinanet --device=cpu --implementation=reference --backend=pytorch \ +--execution-mode=valid --results_dir=$HOME/inference_3.0_results \ +--category=edge --division=open --quiet +``` diff --git a/challenge/optimize-mlperf-inference-v3.0-2023/docs/generate-rnnt-submission.md b/challenge/optimize-mlperf-inference-v3.0-2023/docs/generate-rnnt-submission.md new file mode 100644 index 0000000000..d7191c808d --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.0-2023/docs/generate-rnnt-submission.md @@ -0,0 +1,53 @@ +## Setup +Please follow the MLCommons CK [installation guide](https://github.com/mlcommons/ck/blob/master/docs/installation.md) to install CM. +Download the ck repo to get the CM script for MLPerf submission + +``` +cm pull repo mlcommons@ck --checkout=681547519f4d9a8991d992d1300c90cfde06e9b9 +``` + +## Run Commands + +### TensorRT backend + +#### Do a test run to detect and record the system performance + +``` +cm run script --tags=generate-run-cmds,inference,_find-performance,_all-scenarios \ +--model=rnnt --implementation=nvidia-original --device=cuda --backend=tensorrt \ +--category=edge --division=open --quiet +``` +* Use `--category=datacenter` to run datacenter scenarios +* Use `--division=closed` to run all scenarios for the closed division (compliance tests are skipped for `_find-performance` mode) + +#### Do a full accuracy and performance runs for all the scenarios + +``` +cm run script --tags=generate-run-cmds,inference,_all-modes,_all-scenarios \ +--model=rnnt --device=cuda --implementation=nvidia-original --backend=tensorrt \ +--execution-mode=valid --results_dir=$HOME/inference_3.0_results \ +--category=edge --division=open --quiet +``` + +* Use `--power=yes` for measuring power. It is ignored for accuracy and compliance runs +* Use `--division=closed` to run all scenarios for the closed division including the compliance tests +* `--offline_target_qps`, `--server_target_qps` and `--singlestream_target_latency` can be used to override the determined performance numbers + +#### Populate the README files +``` +cm run script --tags=generate-run-cmds,inference,_populate-readme,_all-scenarios \ +--model=rnnt --device=cuda --implementation=nvidia-original --backend=tensorrt \ +--execution-mode=valid --results_dir=$HOME/inference_3.0_results \ +--category=edge --division=open --quiet +``` + +#### Generate actual submission tree + +Here, we are copying the performance and accuracy log files (compliance logs also in the case of closed division) from the results directory to the submission tree following the [directory structure required by MLCommons Inference](https://github.com/mlcommons/policies/blob/master/submission_rules.adoc#inference-1). After the submission tree is generated, [accuracy truncate script](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/truncate-mlperf-inference-accuracy-log) is called to truncate accuracy logs and then the [submission checker](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/run-mlperf-inference-submission-checker) is called to validate the generated submission tree. + +We should use the master branch of MLCommons inference repo for the submission checker. You can use `--hw_note_extra` option to add your name to the notes. +``` +cm run script --tags=generate,inference,submission --results_dir=$HOME/inference_3.0_results/valid_results \ +--device=cpu --submission_dir=$HOME/inference_submission_tree --clean --run-checker --submitter=cTuning +--adr.inference-src.version=master --hw_notes_extra="Result taken by NAME" --quiet +``` diff --git a/challenge/optimize-mlperf-inference-v3.0-2023/docs/setup-aws-instance.md b/challenge/optimize-mlperf-inference-v3.0-2023/docs/setup-aws-instance.md new file mode 100644 index 0000000000..e1691c21ac --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.0-2023/docs/setup-aws-instance.md @@ -0,0 +1,48 @@ +The below instructions are for creating an AWS instance from the CLI. You can also create an instance via web and setup CM on it. + +## Prerequisites + +1. AWS Key, secret and token +2. `*.pem` ssh key file to be used to create the instance (public key from here will be copied to the `$HOME/.ssh/authorized_keys` file in the created instance) + +## Run Commands + +We need to get imagenet full dataset to make image-classification submissions for MLPerf inference. Since this dataset is not publicly available via a URL please follow the instructions given [here](https://github.com/mlcommons/ck/blob/master/cm-mlops/script/get-dataset-imagenet-val/README-extra.md) to download the dataset and register in CM. + +### Update Access Details + +``` +cd $HOME/CM/repos/mlcommon@ck/cm-mlops/script/run-terraform/aws/ +cp credentials.example credentials.sh +``` +Update `credentials.sh` with your AWS Key, Secret and Token + +### Create an AWS Instance + + +``` +cm run script --tags=run,terraform,_m7g.xlarge,_storage_size.500,_ubuntu.2204,_us-west-2 \ +--cminit --key_file=$HOME/cmuser.pem +``` + +The above command will output the IP of the created instance which will be having CM setup already done. + +`_m7g.xlarge,_storage_size.500,_ubuntu.2204` variations can be changed to launch a different instance. Below are the variation combinations we used for MLPerf inference 3.0 submissions. + +* `_g4dn.xlarge` +* `_a1.2xlarge,_storage_size.130,_ubuntu.2204` +* `_c5.4xlarge,_storage_size.130,_ubuntu.2204` +* `_m7g.2xlarge,_storage_size.500,_ubuntu.2204` +* `_inf1.2xlarge,_storage_size.500,_amazon-linux-2-kernel.510` +* `_t2.medium,_storage_size.200,_rhel.9` + +### Copy the needed files from the local machine + +Copy the imagenet dataset to the created instance. For example, + +``` +rsync -avz -e 'ssh -i $HOME/cmuser.pem' $HOME/imagenet-2012-val/ ubuntu@54.189.93.134: +``` +For using [nvidia-original implementation](https://github.com/mlcommons/ck/tree/main/cm-mlops/script/reproduce-mlperf-inference-nvidia) tar files for cuDNN and TensorRT are needed to be downloaded locally from Nvidia website and copied to the AWS instance similar to the above command. + +Once all the required files are copied over, login to the instance and follow the individual benchmark instructions from the README files given [here](./) diff --git a/challenge/optimize-mlperf-inference-v3.0-2023/docs/setup-gcp-instance.md b/challenge/optimize-mlperf-inference-v3.0-2023/docs/setup-gcp-instance.md new file mode 100644 index 0000000000..6bd16556a3 --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.0-2023/docs/setup-gcp-instance.md @@ -0,0 +1,35 @@ +The below instructions are for creating a Google Cloud instance from the CLI. You can also create an instance via web and setup CM on it. + +## Prerequisites + +Please follow the authentication instructions given [here](https://github.com/ctuning/mlcommons-ck/blob/master/cm-mlops/script/run-terraform/README-about.md). + + +## Run Commands + +We need to get imagenet full dataset to make image-classification submissions for MLPerf inference. Since this dataset is not publicly available via a URL please follow the instructions given [here](https://github.com/mlcommons/ck/blob/master/cm-mlops/script/get-dataset-imagenet-val/README-extra.md) to download the dataset and register in CM. + + +### Create a GCP Instance + + +``` +cm run script --tags=run,terraform,_gcp,_n1-highmem.4,_gcp_project.mlperf-inference-tests --cminit +``` + +The above command will output the IP of the created instance which will be having CM setup already done. + +`_n1-highmem.4` variation can be changed to launch a different instance. Below are the variation combinations we used for MLPerf inference 3.0 submissions. + +* `_n1-standard.4` + +### Copy the needed files + +Copy the imagenet dataset to the created instance. For example, + +``` +rsync -avz -e 'ssh -i $HOME/cmuser.pem' $HOME/imagenet-2012-val/ ubuntu@54.189.93.134: +``` +For using [nvidia-original implementation](https://github.com/mlcommons/ck/tree/main/cm-mlops/script/reproduce-mlperf-inference-nvidia) tar files for cuDNN and TensorRT are needed to be downloaded locally from Nvidia website and copied to the AWS instance similar to the above command. + +Once all the required files are copied over, login to the instance and follow the individual benchmark instructions from the README files given [here](./) diff --git a/challenge/optimize-mlperf-inference-v3.0-2023/docs/setup-nvidia-jetson-orin.md b/challenge/optimize-mlperf-inference-v3.0-2023/docs/setup-nvidia-jetson-orin.md new file mode 100644 index 0000000000..68db00ea0e --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.0-2023/docs/setup-nvidia-jetson-orin.md @@ -0,0 +1,53 @@ +## Setup +We used Nvidia Jetson AGX Orin developer kit with 32GB RAM and 64GB eMMC. We also connected a 500GB SSD disk via USB and Wifi connection for internet connectivity. + +We used the out of the box developer kit image which was running Ubuntu 20.04 and JetPack 5.0.1 Developer Preview (L4T 34.1.1) with CUDA 11.4. We were also using the default 4k page size (Nvidia recommends 64k for MLPerf inference). + +[cuDNN 8.6.0](https://developer.nvidia.com/compute/cudnn/secure/8.6.0/local_installers/11.8/cudnn-local-repo-ubuntu2004-8.6.0.163_1.0-1_arm64.deb) and [TensorRT 8.5.2.2](https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/secure/8.5.3/local_repos/nv-tensorrt-local-repo-ubuntu2004-8.5.3-cuda-11.8_1.0-1_arm64.deb) were downloaded as Debian packages on a host machine, copied over to Nvidia Jetson Orin and installed. + + +We need to get imagenet full dataset to make image-classification submissions for MLPerf inference. Since this dataset is not publicly available via a URL please follow the instructions given [here](https://github.com/mlcommons/ck/blob/master/cm-mlops/script/get-dataset-imagenet-val/README-extra.md) to download the dataset. + +### Copy the needed files from a host machine + +Copy the imagenet dataset to the created instance. For example, + +``` +rsync -avz $HOME/imagenet-2012-val/ user@192.168.0.27: +``` + +Login to Orin and register the imagenet dataset as +``` +cm run script --tags=get,imagenet,dataset,_2012,_full --input=$HOME/imagenet-2012-val +``` + +Once all the required files are copied over, follow the individual benchmark instructions from the README files given [here](./) All the required dependencies should be resolved by CM. + +### Power Measurement Setup + +We were measuring power in the peak performance mode (MaxN) except for one SUT where the energy efficiency mode was changed to Max15. Our aim was to showcase the out of the box performance of Nvidia Jetson AGX Orin including the power usage. + +## Reproducing the Nvidia Jetson AGX Orin Submission + +After our submission we followed the instructions from Nvidia in the inference v3.0 repository and tried to reproduce the numbers from Nvidia. For MaxN mode we were able to match the numbers by Nvidia using same versions of CUDA, cuDNN and TensorRT but outside of docker. For MaxQ mode, we could get the same performance as Nvidia but our power usage was about 5W higher. + +### Performance results MaxN + +The below table shows the performance comparison of our results under different settings and the Nvidia submission for MLPerf inference 3.0. We'll be updating our instructions for easier reproducibility of these numbers including CM scripts for flashing the L4T image and rebuilding the kernel for 64k pagesize. + + +| Workload | Results | L4T | PAGESIZE | Power Mode | FAN Dynamic Speed control | Offline Accuracy | Offline Performance | SingleStream Accuracy | SingleStream Performance | MultiStream Accuracy | MultiStream Performance | +| --------- | --------------------------------- | ----- | -------- | ---------- | ------------------------- | ---------------- | ------------------- | --------------------- | ------------------------ | -------------------- | ----------------------- | +| ResNet50 | Nvidia Submitted (docker) | r35.3 | 64k | MaxN | active | 75.934 | 6438.1 | 76.032 | 0.633479 | 76.032 | 2.187731 | +| ResNet50 | cTuning Submitted | r34.1.1 | 4k | MaxN | active | 75.934 | 4697 | 76.032 | 0.72 | 76.032 | 2.57 | +| ResNet50 | MLCommons taskforce on reproducibility | r35.2.1 | 4k | MaxN | active | 75.85 | 6172 | 76.056 | 0.644 | 76.056 | 2.074 | +| ResNet50 | MLCommons taskforce on reproducibility | r35.3 | 64k | MaxN | active | 75.85 | 6430 | 76.056 | 0.659 | 76.056 | 2.20 | +| RetinaNet | Nvidia Submitted (docker) | r35.3 | x | MaxN | active | 37.372 | 92.4048 | 37.403 | 13.924457 | 37.519 | 104.680313 | +| RetinaNet | MLCommons taskforce on reproducibility | r35.2.1 | 4k | MaxN | active | 37.346 | 80.0854 (no DLA) | 37.350 | 14,19 | 37.409 | 105.344828 | +| RetinaNet | MLCommons taskforce on reproducibility | r35.3 | 64k | MaxN | active | 37.345 | 94.6886 | 37.340 | 14.073 | 37.488 | 103.8 | +| BERT | Nvidia Submitted (docker) | r35.3 | x | MaxN | active | 90.552 | 544.243 | 90.344 | 5.635431 | NA | NA | +| BERT | cTuning Submitted | r34.1.1 | 4k | MaxN | active | 90.552 | 449.96 | 90.344 | 7.8 | NA | NA | +| BERT | MLCommons taskforce on reproducibility | r35.2.1 | 4k | MaxN | active | 90.562 | 527 (128 batchsize) | 90.311 | 6.636 | NA | NA | +| BERT | MLCommons taskforce on reproducibility | r35.3 | 64k | MaxN | active | 90.552 | 539 | 90.344 | 6.31 | NA | NA | + + diff --git a/challenge/optimize-mlperf-inference-v3.1-2023/README.md b/challenge/optimize-mlperf-inference-v3.1-2023/README.md new file mode 100644 index 0000000000..6362f3eb66 --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-2023/README.md @@ -0,0 +1,83 @@ +### Introduction + +Our goal is to help the community benchmark and optimize various AI/ML applications +across diverse software and hardware provided by volunteers similar to SETI@home! + +Open-source [MLPerf inference benchmarks](https://arxiv.org/abs/1911.02549) +were developed by a [consortium of 50+ companies and universities (MLCommons)](https://mlcommons.org) +to enable trustable and reproducible comparison of AI/ML systems +in terms of latency, throughput, power consumption, accuracy and other metrics +across diverse software/hardware stacks from different vendors. + +However, running MLPerf inference benchmarks and submitting results [turned out to be a challenge](https://doi.org/10.5281/zenodo.8144274) +even for experts and could easily take many weeks to prepare. That's why [MLCommons](https://mlcommons.org), +[cTuning.org](https://www.linkedin.com/company/ctuning-foundation) +and [cKnowledge.org](https://www.linkedin.com/company/cknowledge) +decided to develop an open-source, technology-agnostic +and non-intrusive [Collective Mind automation language (CM)](https://github.com/mlcommons/ck) +and [Collective Knowledge Playground (CK)](https://access.cknowledge.org/playground/?action=experiments) +to help anyone run, reproduce, optimize and compare MLPerf inference benchmarks out-of-the-box +across diverse software, hardware, models and data sets. + +You can read more about our vision, open-source technology and future plans +in this [presentation](https://doi.org/10.5281/zenodo.8105339). + + + +### Advanced challenge + +We would like to ask volunteers run various MLPerf inference benchmarks +on diverse CPUs (Intel, AMD, Arm) and Nvidia GPUs similar to SETI@home +across different framework (ONNX, PyTorch, TF, TFLite) +either natively or in a cloud (AWS, Azure, GCP, Alibaba, Oracle, OVHcloud, ...) +and submit results to MLPerf inference v3.1. + +However, since some benchmarks may take 1..2 days to run, we suggest to start in the following order (these links describe CM commands to run benchmarks and submit results): +* [CPU: Reference implementation of Image Classification with ResNet50 (open and then closed division)](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/resnet50/README_reference.md) +* [CPU: TFLite C++ implementation of Image classification with variations of MobileNets and EfficientNets (open division)](https://github.com/mlcommons/ck/blob/master/cm-mlops/script/run-mlperf-inference-mobilenet-models/README-about.md) +* [Nvidia GPU: Nvidia optimized implementation of Image Classification with ResNet50 (open and then closed division)](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/resnet50/README_nvidia.md) +* [Nvidia GPU: Nvidia optimized implementation of Language processing with BERT large (open and then closed division)](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/bert/README_nvidia.md) +* [Nvidia GPU: Reference implementation of Image Classification with ResNet50 (open and then closed division)](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/bert/README_nvidia.md) +* [Nvidia GPU: Reference implementation of Language processing with BERT large (open and then closed division)](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/resnet50/README_reference.md) +* [Nvidia GPU (24GB of memory min): Reference implementation of Language processing with GPT-J 6B (open)](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/gpt-j/README_reference.md) +* [Nvidia GPU: Nvidia optimized implementation of all other models (open and closed division)](https://github.com/ctuning/mlcommons-ck/blob/master/docs/mlperf/inference/README.md#run-benchmarks-and-submit-results) + +Please read [this documentation](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/README.md) +to set up and run above benchmarks using CM. + +You can register your participation for the [Collective Knowledge leaderboard]( https://access.cKnowledge.org/playground/?action=contributors ) +using this [guide](https://github.com/mlcommons/ck/blob/master/platform/register.md). + +Please report encountered problems using [GitHub issues](https://github.com/mlcommons/ck/issues) +to help the community +improve the portability of the CM automation for MLPerf and other benchmarks and projects. + +Looking forward to your submissions and happy hacking! + + + +### Prizes + +* *All submitters will receive 1 point for submitting valid results for 1 complete benchmark on one system.* +* *All submitters will receive an official MLCommons Collective Knowledge contributor award (see [this example](https://ctuning.org/awards/ck-award-202307-zhu.pdf)).* +* *The top contributors will receive cash prizes from [MLCommons organizations](https://mlcommons.org) and [cKnowledge.org](https://www.linkedin.com/company/cknowledge)*. + + +### Organizers + +* [MLCommons](https://cKnowledge.org/mlcommons-taskforce) +* [cTuning.org](https://www.linkedin.com/company/ctuning-foundation) +* [cKnowledge.org](https://www.linkedin.com/company/cknowledge) + + +### Status + +You can see shared results in [this repostiory](https://github.com/ctuning/mlperf_inference_submissions_v3.1) +with PRs from participants [here](https://github.com/ctuning/mlperf_inference_submissions_v3.1/pulls). + +### Results + +All accepted results will be publicly available in the CM format with derived metrics +in this [MLCommons repository](https://github.com/mlcommons/cm4mlperf-results), +in [MLCommons Collective Knowledge explorer](https://access.cknowledge.org/playground/?action=experiments) +and at official [MLCommons website](https://mlcommons.org). diff --git a/challenge/optimize-mlperf-inference-v3.1-2023/_cm.json b/challenge/optimize-mlperf-inference-v3.1-2023/_cm.json new file mode 100644 index 0000000000..a30c26c928 --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-2023/_cm.json @@ -0,0 +1,26 @@ +{ + "alias": "optimize-mlperf-inference-v3.1-2023", + "automation_alias": "challenge", + "automation_uid": "3d84abd768f34e08", + "date_close": "20230817", + "date_open": "20230704", + "experiments": [], + "points": 1, + "sort": -10, + "tags": [ + "modularize", + "optimize", + "reproduce", + "replicate", + "benchmark", + "automate", + "mlperf", + "mlperf-inference", + "mlperf-inference-v3.1", + "mlperf-inference-v3.1-2023", + "v3.1" + ], + "title": "Crowd-benchmark all MLPerf inference benchmarks similar to SETI@home (latency, throughput, power consumption, accuracy, costs)", + "trophies": true, + "uid": "3e971d8089014d1f" +} diff --git a/challenge/optimize-mlperf-inference-v3.1-2023/docs/generate-3d-unet-submission.md b/challenge/optimize-mlperf-inference-v3.1-2023/docs/generate-3d-unet-submission.md new file mode 100644 index 0000000000..9806c22647 --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-2023/docs/generate-3d-unet-submission.md @@ -0,0 +1,67 @@ +## Setup + +Please follow this [installation guide](https://github.com/mlcommons/ck/blob/master/docs/installation.md) +to install the MLCommons CM reproducibility and automation language in your native environment or Docker container. + +Then install the repository with CM automation scripts to run MLPerf benchmarks out-of-the-box +across different software, hardware, models and data sets: + + +``` +cm pull repo mlcommons@ck +``` + +Note that you can install Python virtual environment via CM to avoid contaminating +your local Python installation as described [here](https://github.com/mlcommons/ck/blob/master/cm-mlops/automation/script/README-extra.md#using-python-virtual-environments). + +## Run Commands + +3d-unet has two variants - `3d-unet-99` and `3d-unet-99.9` where the `99` and `99.9` specifies the required accuracy constraint with respect to the reference floating point model. Both models can be submitter under edge as well as datacenter category. + +Since 3d-unet is one of the slowest running model, we are only running it using nvidia-implementation where the model is quantized and run on TensorRT backend on Nvidia GPU. + +For `3d-unet-99.9` runs, simply replace `3d-unet-99` with `3d-unet-99.9`. + +### TensorRT backend + +#### Do a test run to detect and record the system performance + +``` +cm run script --tags=generate-run-cmds,inference,_find-performance,_all-scenarios \ +--model=3d-unet-99 --implementation=nvidia-original --device=cuda --backend=tensorrt \ +--category=edge --division=open --quiet +``` +* Use `--category=datacenter` to run datacenter scenarios +* Use `--division=closed` to run all scenarios for the closed division (compliance tests are skipped for `_find-performance` mode) + +#### Do a full accuracy and performance runs for all the scenarios + +``` +cm run script --tags=generate-run-cmds,inference,_all-modes,_all-scenarios \ +--model=3d-unet-99 --device=cuda --implementation=nvidia-original --backend=tensorrt \ +--execution-mode=valid --results_dir=$HOME/inference_3.1_results \ +--category=edge --division=open --quiet +``` + +* Use `--power=yes` for measuring power. It is ignored for accuracy and compliance runs +* Use `--division=closed` to run all scenarios for the closed division including the compliance tests +* `--offline_target_qps`, `--server_target_qps` and `--singlestream_target_latency` can be used to override the determined performance numbers + +#### Populate the README files +``` +cm run script --tags=generate-run-cmds,inference,_populate-readme,_all-scenarios \ +--model=3d-unet-99 --device=cuda --implementation=nvidia-original --backend=tensorrt \ +--execution-mode=valid --results_dir=$HOME/inference_3.1_results \ +--category=edge --division=open --quiet +``` + +#### Generate actual submission tree + +Here, we are copying the performance and accuracy log files (compliance logs also in the case of closed division) from the results directory to the submission tree following the [directory structure required by MLCommons Inference](https://github.com/mlcommons/policies/blob/master/submission_rules.adoc#inference-1). After the submission tree is generated, [accuracy truncate script](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/truncate-mlperf-inference-accuracy-log) is called to truncate accuracy logs and then the [submission checker](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/run-mlperf-inference-submission-checker) is called to validate the generated submission tree. + +We should use the master branch of MLCommons inference repo for the submission checker. You can use `--hw_note_extra` option to add your name to the notes. +``` +cm run script --tags=generate,inference,submission --results_dir=$HOME/inference_3.1_results/valid_results \ +--device=cpu --submission_dir=$HOME/inference_submission_tree --clean --run-checker --submitter=cTuning +--adr.inference-src.version=master --hw_notes_extra="Result taken by NAME" --quiet +``` diff --git a/challenge/optimize-mlperf-inference-v3.1-2023/docs/generate-bert-submission.md b/challenge/optimize-mlperf-inference-v3.1-2023/docs/generate-bert-submission.md new file mode 100644 index 0000000000..c43363c1e9 --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-2023/docs/generate-bert-submission.md @@ -0,0 +1,113 @@ +## Setup + +Please follow this [installation guide](https://github.com/mlcommons/ck/blob/master/docs/installation.md) +to install the MLCommons CM reproducibility and automation language in your native environment or Docker container. + +Then install the repository with CM automation scripts to run MLPerf benchmarks out-of-the-box +across different software, hardware, models and data sets: + + +``` +cm pull repo mlcommons@ck +``` + +Note that you can install Python virtual environment via CM to avoid contaminating +your local Python installation as described [here](https://github.com/mlcommons/ck/blob/master/cm-mlops/automation/script/README-extra.md#using-python-virtual-environments). + +## Run Commands + +Bert has two variants - `bert-99` and `bert-99.9` where the `99` and `99.9` specifies the required accuracy constraint with respect to the reference floating point model. `bert-99.9` model is applicable only on a datacenter system. + +On edge category `bert-99` has Offline and SingleStream scenarios and in datacenter category both `bert-99` and `bert-99.9` have Offline and Server scenarios. The below commands are assuming an edge category system. + +### Onnxruntime backend (Reference implementation) + +#### Do a test run to detect and record the system performance + +``` +cm run script --tags=generate-run-cmds,inference,_find-performance,_all-scenarios \ +--model=bert-99 --implementation=reference --device=cpu --backend=onnxruntime \ +--category=edge --division=open --quiet +``` +* Use `--device=cuda` to run the inference on Nvidia GPU +* Use `--division=closed` to run all scenarios for the closed division including the compliance tests +* Use `--category=datacenter` to run datacenter scenarios + +#### Do a full accuracy and performance runs for all the scenarios + +``` +cm run script --tags=generate-run-cmds,inference,_all-modes,_all-scenarios \ +--model=bert-99 --device=cpu --implementation=reference --backend=onnxruntime \ +--execution-mode=valid --results_dir=$HOME/inference_3.1_results \ +--category=edge --division=open --quiet +``` + +* Use `--power=yes` for measuring power. It is ignored for accuracy and compliance runs. This requires a power analyzer as described [here](https://github.com/ctuning/mlcommons-ck/blob/master/docs/tutorials/mlperf-inference-power-measurement.md) +* Use `--division=closed` to run all scenarios for the closed division including the compliance tests +* `--offline_target_qps`, `--server_target_qps` and `--singlestream_target_latency` can be used to override the determined performance numbers + +#### Populate the README files +``` +cm run script --tags=generate-run-cmds,inference,_populate-readme,_all-scenarios \ +--model=bert-99 --device=cpu --implementation=reference --backend=onnxruntime \ +--execution-mode=valid --results_dir=$HOME/inference_3.1_results \ +--category=edge --division=open --quiet +``` + +#### Generate actual submission tree + +Here, we are copying the performance and accuracy log files (compliance logs also in the case of closed division) from the results directory to the submission tree following the [directory structure required by MLCommons Inference](https://github.com/mlcommons/policies/blob/master/submission_rules.adoc#inference-1). After the submission tree is generated, [accuracy truncate script](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/truncate-mlperf-inference-accuracy-log) is called to truncate accuracy logs and then the [submission checker](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/run-mlperf-inference-submission-checker) is called to validate the generated submission tree. + +We should use the master branch of MLCommons inference repo for the submission checker. You can use `--hw_note_extra` option to add your name to the notes. +``` +cm run script --tags=generate,inference,submission --results_dir=$HOME/inference_3.1_results/valid_results \ +--device=cpu --submission_dir=$HOME/inference_submission_tree --clean --run-checker --submitter=cTuning +--adr.inference-src.version=master --hw_notes_extra="Result taken by NAME" --quiet +``` + + +## Tensorflow backend (Reference implementation) + +Same commands as for `onnxruntime` should work by replacing `backend=onnxruntime` with `--backend=tf`. For example, + +``` +cm run script --tags=generate-run-cmds,inference,_accuracy-only,_all-scenarios \ +--model=bert-99 --device=cpu --implementation=reference --backend=tf --execution-mode=valid \ +--results_dir=$HOME/inference_3.1_results --quiet +``` + +## Pytorch backend (Reference implementation) + +Same commands as for `onnxruntime` should work by replacing `backend=onnxruntime` with `--backend=pytorch`. For example, + +``` +cm run script --tags=generate-run-cmds,inference,_accuracy-only,_all-scenarios \ +--model=bert-99 --device=cpu --implementation=reference --backend=pytorch \ +--execution-mode=valid --results_dir=$HOME/inference_3.1_results --quiet +``` + +## TensorRT backend (Nvidia implementation) + +For TensorRT backend we are using the [Nvidia implementation](https://github.com/ctuning/mlcommons-ck/tree/master/cm-mlops/script/reproduce-mlperf-inference-nvidia) and not the [MLPerf inference reference implementation](https://github.com/ctuning/mlcommons-ck/tree/master/cm-mlops/script/app-mlperf-inference-reference) for the below reasons +* TensorRT backend is not supported by default in the reference implementation +* Reference implemnetation is mostly for fp32 models and quantization is not suppoted by default +* Nvidia has done some fantastic work in optimizing performance for TensorRT backend + +To get setup please follow the instructions [here](https://github.com/mlcommons/ck/blob/master/cm-mlops/script/reproduce-mlperf-inference-nvidia/README-about.md) to download and install TensorRT and cuDNN unless you already have them installed. This readme also details how to handle the configuration files which are automatically generated by the Nvidia implementation scripts. Once this is done, the following command will run all the modes and scenarios. + +``` +cm run script --tags=generate-run-cmds,inference,_all-modes,_all-scenarios \ +--model=bert-99 --device=cuda --implementation=nvidia-original --backend=tensorrt \ +--execution-mode=valid --results_dir=$HOME/inference_3.1_results --quiet +``` + +* Use `--power=yes` for measuring power. It is ignored for accuracy and compliance runs. This requires a power analyzer as described [here](https://github.com/ctuning/mlcommons-ck/blob/master/docs/tutorials/mlperf-inference-power-measurement.md) +* Use `--division=closed` to run all scenarios for the closed division including the compliance tests +* `--offline_target_qps`, `--server_target_qps` and `--singlestream_target_latency` can be used to override the default performance numbers +* Use `--division=closed` to run all scenarios for the closed division including the compliance tests +* Use `--category=datacenter` to run datacenter scenarios + + +TensorRT backend has an engine generation stage which can be time consuming. For repeated runs `--adr.nvidia-harness.make_cmd=run_harness` option will avoid this engine regeneration and reuse the previously generated one. + + diff --git a/challenge/optimize-mlperf-inference-v3.1-2023/docs/generate-resnet50-submission.md b/challenge/optimize-mlperf-inference-v3.1-2023/docs/generate-resnet50-submission.md new file mode 100644 index 0000000000..470930e373 --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-2023/docs/generate-resnet50-submission.md @@ -0,0 +1,90 @@ +## Setup + +Please follow this [installation guide](https://github.com/mlcommons/ck/blob/master/docs/installation.md) +to install the MLCommons CM reproducibility and automation language in your native environment or Docker container. + +Then install the repository with CM automation scripts to run MLPerf benchmarks out-of-the-box +across different software, hardware, models and data sets: + + +``` +cm pull repo mlcommons@ck +``` + +Note that you can install Python virtual environment via CM to avoid contaminating +your local Python installation as described [here](https://github.com/mlcommons/ck/blob/master/cm-mlops/automation/script/README-extra.md#using-python-virtual-environments). + +## Run Commands + +We need to get imagenet full dataset to make image-classification submissions for MLPerf inference. Since this dataset is not publicly available via a URL please follow the instructions given [here](https://github.com/mlcommons/ck/blob/master/cm-mlops/script/get-dataset-imagenet-val/README-extra.md) to download the dataset and register in CM. + +On edge category ResNet50 has Offline, SingleStream and MultiStream scenarios and in datacenter category it has Offline and Server scenarios. The below commands are assuming an edge category system. + +### Onnxruntime backend + +#### Do a test run to detect and record the system performance + +``` +cm run script --tags=generate-run-cmds,inference,_find-performance,_all-scenarios \ +--model=resnet50 --implementation=reference --device=cpu --backend=onnxruntime \ +--category=edge --division=open --quiet +``` +* Use `--device=cuda` to run the inference on Nvidia GPU +* Use `--division=closed` to run all scenarios for the closed division (compliance tests are skipped for `_find-performance` mode) +* Use `--category=datacenter` to run datacenter scenarios + +#### Do a full accuracy and performance runs for all the scenarios + +``` +cm run script --tags=generate-run-cmds,inference,_all-modes,_all-scenarios --model=resnet50 \ +--device=cpu --implementation=reference --backend=onnxruntime \ +--execution-mode=valid --results_dir=$HOME/inference_3.1_results \ +--category=edge --division=open --quiet +``` + +* Use `--power=yes` for measuring power. It is ignored for accuracy and compliance runs +* Use `--division=closed` to run all scenarios for the closed division including the compliance tests +* `--offline_target_qps`, `--server_target_qps`, `--singlestream_target_latency` and `multistream_target_latency` can be used to override the determined performance numbers + +#### Populate the README files +``` +cm run script --tags=generate-run-cmds,inference,_populate-readme,_all-scenarios \ +--model=resnet50 --device=cpu --implementation=reference --backend=onnxruntime \ +--execution-mode=valid --results_dir=$HOME/inference_3.1_results \ +--category=edge --division=open --quiet +``` + +#### Generate actual submission tree + +Here, we are copying the performance and accuracy log files (compliance logs also in the case of closed division) from the results directory to the submission tree following the [directory structure required by MLCommons Inference](https://github.com/mlcommons/policies/blob/master/submission_rules.adoc#inference-1). After the submission tree is generated, [accuracy truncate script](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/truncate-mlperf-inference-accuracy-log) is called to truncate accuracy logs and then the [submission checker](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/run-mlperf-inference-submission-checker) is called to validate the generated submission tree. + +We should use the master branch of MLCommons inference repo for the submission checker. You can use `--hw_note_extra` option to add your name to the notes. +``` +cm run script --tags=generate,inference,submission --results_dir=$HOME/inference_3.1_results/valid_results \ +--submission_dir=$HOME/inference_submission_tree --clean \ +--run-checker --submitter=cTuning --adr.inference-src.version=master \ +--hw_notes_extra="Result taken by NAME" --quiet +``` + + +## Tensorflow backend + +Same commands as for `onnxruntime` should work by replacing `backend=onnxruntime` with `--backend=tf`. For example, + +``` +cm run script --tags=generate-run-cmds,inference,_all-modes,_all-scenarios \ +--model=resnet50 --device=cpu --implementation=reference --backend=tf \ +--execution-mode=valid --results_dir=$HOME/inference_3.1_results \ +--category=edge --division=open --quiet +``` + +## TVM backend + +Same commands as for `onnxruntime` should work by replacing `backend=onnxruntime` with `--backend=tvm-onnx`. (Only `--device=cpu` is currently supported for TVM) For example, + +``` +cm run script --tags=generate-run-cmds,inference,_all-modes,_all-scenarios \ +--model=resnet50 --device=cpu --implementation=reference --backend=tvm-onnx \ +--execution-mode=valid --results_dir=$HOME/inference_3.1_results \ +--category=edge --division=open --quiet +``` diff --git a/challenge/optimize-mlperf-inference-v3.1-2023/docs/generate-retinanet-submission.md b/challenge/optimize-mlperf-inference-v3.1-2023/docs/generate-retinanet-submission.md new file mode 100644 index 0000000000..4420462cde --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-2023/docs/generate-retinanet-submission.md @@ -0,0 +1,75 @@ +## Setup + +Please follow this [installation guide](https://github.com/mlcommons/ck/blob/master/docs/installation.md) +to install the MLCommons CM reproducibility and automation language in your native environment or Docker container. + +Then install the repository with CM automation scripts to run MLPerf benchmarks out-of-the-box +across different software, hardware, models and data sets: + + +``` +cm pull repo mlcommons@ck +``` + +Note that you can install Python virtual environment via CM to avoid contaminating +your local Python installation as described [here](https://github.com/mlcommons/ck/blob/master/cm-mlops/automation/script/README-extra.md#using-python-virtual-environments). + +## Run Commands + + +### Onnxruntime backend + +#### Do a test run to detect and record the system performance + +``` +cm run script --tags=generate-run-cmds,inference,_find-performance,_all-scenarios \ +--model=retinanet --implementation=reference --device=cpu --backend=onnxruntime \ +--category=edge --division=open --quiet +``` +* Use `--device=cuda` to run the inference on Nvidia GPU +* Use `--division=closed` to run all scenarios for the closed division (compliance tests are skipped for `_find-performance` mode) +* Use `--category=datacenter` to run datacenter scenarios + +#### Do a full accuracy and performance runs for all the scenarios + +``` +cm run script --tags=generate-run-cmds,inference,_all-modes,_all-scenarios \ +--model=retinanet --device=cpu --implementation=reference --backend=onnxruntime \ +--execution-mode=valid --results_dir=$HOME/inference_3.1_results \ +--category=edge --division=open --quiet +``` + +* Use `--power=yes` for measuring power. It is ignored for accuracy and compliance runs +* Use `--division=closed` to run all scenarios for the closed division including the compliance tests +* `--offline_target_qps`, `--server_target_qps`, `--singlestream_target_latency` and `multistream_target_latency` can be used to override the determined performance numbers + +#### Populate the README files +``` +cm run script --tags=generate-run-cmds,inference,_populate-readme,_all-scenarios \ +--model=retinanet --device=cpu --implementation=reference --backend=onnxruntime \ +--execution-mode=valid --results_dir=$HOME/inference_3.1_results \ +--category=edge --division=open --quiet +``` + +#### Generate actual submission tree + +Here, we are copying the performance and accuracy log files (compliance logs also in the case of closed division) from the results directory to the submission tree following the [directory structure required by MLCommons Inference](https://github.com/mlcommons/policies/blob/master/submission_rules.adoc#inference-1). After the submission tree is generated, [accuracy truncate script](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/truncate-mlperf-inference-accuracy-log) is called to truncate accuracy logs and then the [submission checker](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/run-mlperf-inference-submission-checker) is called to validate the generated submission tree. + +We should use the master branch of MLCommons inference repo for the submission checker. You can use `--hw_note_extra` option to add your name to the notes. +``` +cm run script --tags=generate,inference,submission --results_dir=$HOME/inference_3.1_results/valid_results \ +--device=cpu --submission_dir=$HOME/inference_submission_tree --clean --run-checker --submitter=cTuning +--adr.inference-src.version=master --hw_notes_extra="Result taken by NAME" --quiet +``` + + +## Pytorch backend + +Same commands as for `onnxruntime` should work by replacing `backend=onnxruntime` with `--backend=pytorch`. For example, + +``` +cm run script --tags=generate-run-cmds,inference,_all-modes,_all-scenarios \ +--model=retinanet --device=cpu --implementation=reference --backend=pytorch \ +--execution-mode=valid --results_dir=$HOME/inference_3.1_results \ +--category=edge --division=open --quiet +``` diff --git a/challenge/optimize-mlperf-inference-v3.1-2023/docs/generate-rnnt-submission.md b/challenge/optimize-mlperf-inference-v3.1-2023/docs/generate-rnnt-submission.md new file mode 100644 index 0000000000..a6ca069215 --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-2023/docs/generate-rnnt-submission.md @@ -0,0 +1,61 @@ +## Setup + +Please follow this [installation guide](https://github.com/mlcommons/ck/blob/master/docs/installation.md) +to install the MLCommons CM reproducibility and automation language in your native environment or Docker container. + +Then install the repository with CM automation scripts to run MLPerf benchmarks out-of-the-box +across different software, hardware, models and data sets: + + +``` +cm pull repo mlcommons@ck +``` + +Note that you can install Python virtual environment via CM to avoid contaminating +your local Python installation as described [here](https://github.com/mlcommons/ck/blob/master/cm-mlops/automation/script/README-extra.md#using-python-virtual-environments). + +## Run Commands + +### TensorRT backend + +#### Do a test run to detect and record the system performance + +``` +cm run script --tags=generate-run-cmds,inference,_find-performance,_all-scenarios \ +--model=rnnt --implementation=nvidia-original --device=cuda --backend=tensorrt \ +--category=edge --division=open --quiet +``` +* Use `--category=datacenter` to run datacenter scenarios +* Use `--division=closed` to run all scenarios for the closed division (compliance tests are skipped for `_find-performance` mode) + +#### Do a full accuracy and performance runs for all the scenarios + +``` +cm run script --tags=generate-run-cmds,inference,_all-modes,_all-scenarios \ +--model=rnnt --device=cuda --implementation=nvidia-original --backend=tensorrt \ +--execution-mode=valid --results_dir=$HOME/inference_3.1_results \ +--category=edge --division=open --quiet +``` + +* Use `--power=yes` for measuring power. It is ignored for accuracy and compliance runs +* Use `--division=closed` to run all scenarios for the closed division including the compliance tests +* `--offline_target_qps`, `--server_target_qps` and `--singlestream_target_latency` can be used to override the determined performance numbers + +#### Populate the README files +``` +cm run script --tags=generate-run-cmds,inference,_populate-readme,_all-scenarios \ +--model=rnnt --device=cuda --implementation=nvidia-original --backend=tensorrt \ +--execution-mode=valid --results_dir=$HOME/inference_3.1_results \ +--category=edge --division=open --quiet +``` + +#### Generate actual submission tree + +Here, we are copying the performance and accuracy log files (compliance logs also in the case of closed division) from the results directory to the submission tree following the [directory structure required by MLCommons Inference](https://github.com/mlcommons/policies/blob/master/submission_rules.adoc#inference-1). After the submission tree is generated, [accuracy truncate script](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/truncate-mlperf-inference-accuracy-log) is called to truncate accuracy logs and then the [submission checker](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/run-mlperf-inference-submission-checker) is called to validate the generated submission tree. + +We should use the master branch of MLCommons inference repo for the submission checker. You can use `--hw_note_extra` option to add your name to the notes. +``` +cm run script --tags=generate,inference,submission --results_dir=$HOME/inference_3.1_results/valid_results \ +--device=cpu --submission_dir=$HOME/inference_submission_tree --clean --run-checker --submitter=cTuning +--adr.inference-src.version=master --hw_notes_extra="Result taken by NAME" --quiet +``` diff --git a/challenge/optimize-mlperf-inference-v3.1-2023/docs/setup-aws-instance.md b/challenge/optimize-mlperf-inference-v3.1-2023/docs/setup-aws-instance.md new file mode 100644 index 0000000000..152c612aad --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-2023/docs/setup-aws-instance.md @@ -0,0 +1,50 @@ +## Setup ASW instance for MLPerf + +The below instructions are for creating an AWS instance from the CLI. You can also create an instance via web and setup CM on it. + +## Prerequisites + +1. AWS Key, secret and token +2. `*.pem` ssh key file to be used to create the instance (public key from here will be copied to the `$HOME/.ssh/authorized_keys` file in the created instance) + +## Run Commands + +We need to get imagenet full dataset to make image-classification submissions for MLPerf inference. Since this dataset is not publicly available via a URL please follow the instructions given [here](https://github.com/mlcommons/ck/blob/master/cm-mlops/script/get-dataset-imagenet-val/README-extra.md) to download the dataset and register in CM. + +### Update Access Details + +``` +cd $HOME/CM/repos/mlcommon@ck/cm-mlops/script/run-terraform/aws/ +cp credentials.example credentials.sh +``` +Update `credentials.sh` with your AWS Key, Secret and Token + +### Create an AWS Instance + + +``` +cm run script --tags=run,terraform,_m7g.xlarge,_storage_size.500,_ubuntu.2204,_us-west-2 \ +--cminit --key_file=$HOME/cmuser.pem +``` + +The above command will output the IP of the created instance which will be having CM setup already done. + +`_m7g.xlarge,_storage_size.500,_ubuntu.2204` variations can be changed to launch a different instance. Below are the variation combinations we used for MLPerf inference 3.0 submissions. + +* `_g4dn.xlarge` +* `_a1.2xlarge,_storage_size.130,_ubuntu.2204` +* `_c5.4xlarge,_storage_size.130,_ubuntu.2204` +* `_m7g.2xlarge,_storage_size.500,_ubuntu.2204` +* `_inf1.2xlarge,_storage_size.500,_amazon-linux-2-kernel.510` +* `_t2.medium,_storage_size.200,_rhel.9` + +### Copy the needed files from the local machine + +Copy the imagenet dataset to the created instance. For example, + +``` +rsync -avz -e 'ssh -i $HOME/cmuser.pem' $HOME/imagenet-2012-val/ ubuntu@54.189.93.134: +``` +For using [nvidia-original implementation](https://github.com/mlcommons/ck/tree/main/cm-mlops/script/reproduce-mlperf-inference-nvidia) tar files for cuDNN and TensorRT are needed to be downloaded locally from Nvidia website and copied to the AWS instance similar to the above command. + +Once all the required files are copied over, login to the instance and follow the individual benchmark instructions from the README files given [here](./) diff --git a/challenge/optimize-mlperf-inference-v3.1-2023/docs/setup-gcp-instance.md b/challenge/optimize-mlperf-inference-v3.1-2023/docs/setup-gcp-instance.md new file mode 100644 index 0000000000..a3a0e457a1 --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-2023/docs/setup-gcp-instance.md @@ -0,0 +1,37 @@ +## Setup GCP instance for MLPerf + +The below instructions are for creating a Google Cloud instance from the CLI. You can also create an instance via web and setup CM on it. + +## Prerequisites + +Please follow the authentication instructions given [here](https://github.com/mlcommons/ck/blob/master/cm-mlops/script/run-terraform/README-about.md). + + +## Run Commands + +We need to get imagenet full dataset to make image-classification submissions for MLPerf inference. Since this dataset is not publicly available via a URL please follow the instructions given [here](https://github.com/mlcommons/ck/blob/master/cm-mlops/script/get-dataset-imagenet-val/README-extra.md) to download the dataset and register in CM. + + +### Create a GCP Instance + + +``` +cm run script --tags=run,terraform,_gcp,_n1-highmem.4,_gcp_project.mlperf-inference-tests --cminit +``` + +The above command will output the IP of the created instance which will be having CM setup already done. + +`_n1-highmem.4` variation can be changed to launch a different instance. Below are the variation combinations we used for MLPerf inference 3.0 submissions. + +* `_n1-standard.4` + +### Copy the needed files + +Copy the imagenet dataset to the created instance. For example, + +``` +rsync -avz -e 'ssh -i $HOME/cmuser.pem' $HOME/imagenet-2012-val/ ubuntu@54.189.93.134: +``` +For using [nvidia-original implementation](https://github.com/mlcommons/ck/tree/main/cm-mlops/script/reproduce-mlperf-inference-nvidia) tar files for cuDNN and TensorRT are needed to be downloaded locally from Nvidia website and copied to the AWS instance similar to the above command. + +Once all the required files are copied over, login to the instance and follow the individual benchmark instructions from the README files given [here](./) diff --git a/challenge/optimize-mlperf-inference-v3.1-2023/docs/setup-nvidia-jetson-orin.md b/challenge/optimize-mlperf-inference-v3.1-2023/docs/setup-nvidia-jetson-orin.md new file mode 100644 index 0000000000..08c0a8eeb0 --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-2023/docs/setup-nvidia-jetson-orin.md @@ -0,0 +1,54 @@ +## Setup + +We used Nvidia Jetson AGX Orin developer kit with 32GB RAM and 64GB eMMC. We also connected a 500GB SSD disk via USB and Wifi connection for internet connectivity. + +We used the out of the box developer kit image which was running Ubuntu 20.04 and JetPack 5.0.1 Developer Preview (L4T 34.1.1) with CUDA 11.4. We were also using the default 4k page size (Nvidia recommends 64k for MLPerf inference). + +[cuDNN 8.6.0](https://developer.nvidia.com/compute/cudnn/secure/8.6.0/local_installers/11.8/cudnn-local-repo-ubuntu2004-8.6.0.163_1.0-1_arm64.deb) and [TensorRT 8.5.2.2](https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/secure/8.5.3/local_repos/nv-tensorrt-local-repo-ubuntu2004-8.5.3-cuda-11.8_1.0-1_arm64.deb) were downloaded as Debian packages on a host machine, copied over to Nvidia Jetson Orin and installed. + + +We need to get imagenet full dataset to make image-classification submissions for MLPerf inference. Since this dataset is not publicly available via a URL please follow the instructions given [here](https://github.com/mlcommons/ck/blob/master/cm-mlops/script/get-dataset-imagenet-val/README-extra.md) to download the dataset. + +### Copy the needed files from a host machine + +Copy the imagenet dataset to the created instance. For example, + +``` +rsync -avz $HOME/imagenet-2012-val/ user@192.168.0.27: +``` + +Login to Orin and register the imagenet dataset as +``` +cm run script --tags=get,imagenet,dataset,_2012,_full --input=$HOME/imagenet-2012-val +``` + +Once all the required files are copied over, follow the individual benchmark instructions from the README files given [here](./) All the required dependencies should be resolved by CM. + +### Power Measurement Setup + +We were measuring power in the peak performance mode (MaxN) except for one SUT where the energy efficiency mode was changed to Max15. Our aim was to showcase the out of the box performance of Nvidia Jetson AGX Orin including the power usage. + +## Reproducing the Nvidia Jetson AGX Orin Submission + +After our submission we followed the instructions from Nvidia in the inference v3.0 repository and tried to reproduce the numbers from Nvidia. For MaxN mode we were able to match the numbers by Nvidia using same versions of CUDA, cuDNN and TensorRT but outside of docker. For MaxQ mode, we could get the same performance as Nvidia but our power usage was about 5W higher. + +### Performance results MaxN + +The below table shows the performance comparison of our results under different settings and the Nvidia submission for MLPerf inference 3.0. We'll be updating our instructions for easier reproducibility of these numbers including CM scripts for flashing the L4T image and rebuilding the kernel for 64k pagesize. + + +| Workload | Results | L4T | PAGESIZE | Power Mode | FAN Dynamic Speed control | Offline Accuracy | Offline Performance | SingleStream Accuracy | SingleStream Performance | MultiStream Accuracy | MultiStream Performance | +| --------- | --------------------------------- | ----- | -------- | ---------- | ------------------------- | ---------------- | ------------------- | --------------------- | ------------------------ | -------------------- | ----------------------- | +| ResNet50 | Nvidia Submitted (docker) | r35.3 | 64k | MaxN | active | 75.934 | 6438.1 | 76.032 | 0.633479 | 76.032 | 2.187731 | +| ResNet50 | cTuning Submitted | r34.1.1 | 4k | MaxN | active | 75.934 | 4697 | 76.032 | 0.72 | 76.032 | 2.57 | +| ResNet50 | MLCommons taskforce on reproducibility | r35.2.1 | 4k | MaxN | active | 75.85 | 6172 | 76.056 | 0.644 | 76.056 | 2.074 | +| ResNet50 | MLCommons taskforce on reproducibility | r35.3 | 64k | MaxN | active | 75.85 | 6430 | 76.056 | 0.659 | 76.056 | 2.20 | +| RetinaNet | Nvidia Submitted (docker) | r35.3 | x | MaxN | active | 37.372 | 92.4048 | 37.403 | 13.924457 | 37.519 | 104.680313 | +| RetinaNet | MLCommons taskforce on reproducibility | r35.2.1 | 4k | MaxN | active | 37.346 | 80.0854 (no DLA) | 37.350 | 14,19 | 37.409 | 105.344828 | +| RetinaNet | MLCommons taskforce on reproducibility | r35.3 | 64k | MaxN | active | 37.345 | 94.6886 | 37.340 | 14.073 | 37.488 | 103.8 | +| BERT | Nvidia Submitted (docker) | r35.3 | x | MaxN | active | 90.552 | 544.243 | 90.344 | 5.635431 | NA | NA | +| BERT | cTuning Submitted | r34.1.1 | 4k | MaxN | active | 90.552 | 449.96 | 90.344 | 7.8 | NA | NA | +| BERT | MLCommons taskforce on reproducibility | r35.2.1 | 4k | MaxN | active | 90.562 | 527 (128 batchsize) | 90.311 | 6.636 | NA | NA | +| BERT | MLCommons taskforce on reproducibility | r35.3 | 64k | MaxN | active | 90.552 | 539 | 90.344 | 6.31 | NA | NA | + + diff --git a/challenge/optimize-mlperf-inference-v3.1-amazon-inferentia-2023/README.md b/challenge/optimize-mlperf-inference-v3.1-amazon-inferentia-2023/README.md new file mode 100644 index 0000000000..b72349ad59 --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-amazon-inferentia-2023/README.md @@ -0,0 +1,31 @@ +### Challenge + +Develop a reference implementation of any MLPerf inference benchmark to run on Amazon Inferentia. +Submit preliminary (unoptimized) benchmarking results to MLPerf inference v3.1 and beyond. + +Read [this documentation](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/README.md) +to run reference implementations of MLPerf inference benchmarks +using the CM automation language and use them as a base for your developments. + +Check [this ACM REP'23 keynote](https://doi.org/10.5281/zenodo.8105339) to learn more about our open-source project and long-term vision. + + +### Prizes + +* *All contributors will receive an official MLCommons Collective Knowledge contributor award (see [this example](https://ctuning.org/awards/ck-award-202307-zhu.pdf)).* +* *The top contributors will receive cash prizes from [MLCommons organizations](https://mlcommons.org) and [cKnowledge.org](https://www.linkedin.com/company/cknowledge)*. + + + +### Organizers + +* [MLCommons](https://cKnowledge.org/mlcommons-taskforce) +* [cTuning.org](https://www.linkedin.com/company/ctuning-foundation) +* [cKnowledge.org](https://www.linkedin.com/company/cknowledge) + +### Results + +All accepted results will be publicly available in the CM format with derived metrics +in this [MLCommons repository](https://github.com/mlcommons/cm4mlperf-results), +in [MLCommons Collective Knowledge explorer](https://access.cknowledge.org/playground/?action=experiments) +and at official [MLCommons website](https://mlcommons.org). diff --git a/challenge/optimize-mlperf-inference-v3.1-amazon-inferentia-2023/_cm.json b/challenge/optimize-mlperf-inference-v3.1-amazon-inferentia-2023/_cm.json new file mode 100644 index 0000000000..66431963a5 --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-amazon-inferentia-2023/_cm.json @@ -0,0 +1,27 @@ +{ + "alias": "optimize-mlperf-inference-v3.1-amazon-inferentia-2023", + "automation_alias": "challenge", + "automation_uid": "3d84abd768f34e08", + "date_close": "20230817", + "date_close_extension": true, + "date_open": "20230704", + "points":3, + "trophies":true, + "tags": [ + "modularize", + "optimize", + "reproduce", + "replicate", + "benchmark", + "automate", + "inferentia", + "mlperf-inference", + "mlperf-inference-inferentia", + "mlperf-inference-inferentia", + "mlperf-inference-inferentia-v3.1", + "mlperf-inference-inferentia-v3.1-2023", + "v3.1" + ], + "title": "Develop a reference implementation of any MLPerf inference benchmark to run on Amazon Inferentia and submit to MLPerf inference v3.1+", + "uid": "c8f2573320424e2a" +} diff --git a/challenge/optimize-mlperf-inference-v3.1-create-end-to-end-app/README.md b/challenge/optimize-mlperf-inference-v3.1-create-end-to-end-app/README.md new file mode 100644 index 0000000000..c08847da6a --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-create-end-to-end-app/README.md @@ -0,0 +1,20 @@ +### Challenge + +Create any end-to-end AI application with web cam, speech recognition, chat bot, LLM +that uses any MLPerf model and CM automation. + +Check [this ACM REP'23 keynote](https://doi.org/10.5281/zenodo.8105339) to learn more about our open-source project and long-term vision. + +Looking forward to your submissions and happy hacking! + +### Prizes + +* *All submitters will receive 1 point for submitting valid results for 1 complete benchmark on one system.* +* *All submitters will receive an official MLCommons Collective Knowledge contributor award (see [this example](https://ctuning.org/awards/ck-award-202307-zhu.pdf)).* + + +### Organizers + +* [MLCommons taskforce on automation and reproducibility](https://cKnowledge.org/mlcommons-taskforce) +* [cTuning foundation](https://cTuning.org) + diff --git a/challenge/optimize-mlperf-inference-v3.1-create-end-to-end-app/_cm.json b/challenge/optimize-mlperf-inference-v3.1-create-end-to-end-app/_cm.json new file mode 100644 index 0000000000..23fb64d835 --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-create-end-to-end-app/_cm.json @@ -0,0 +1,26 @@ +{ + "alias": "optimize-mlperf-inference-v3.1-create-end-to-end-app", + "automation_alias": "challenge", + "automation_uid": "3d84abd768f34e08", + "date_open": "20230704", + "date_close_extension": true, + "points":3, + "trophies":true, + "tags": [ + "modularize", + "optimize", + "reproduce", + "replicate", + "automate", + "benchmark", + "end-to-end-app", + "mlperf-inference", + "mlperf-inference-end-to-end-app", + "mlperf-inference-end-to-end-app", + "mlperf-inference-end-to-end-app-v3.1", + "mlperf-inference-end-to-end-app-v3.1-2023", + "v3.1" + ], + "title": "Generate end-to-end optimized AI apps (LLM, speech, etc) based on MLPerf inference results (with and without container)", + "uid": "96ca61a5aa914063" +} diff --git a/challenge/optimize-mlperf-inference-v3.1-deepsparse/README.md b/challenge/optimize-mlperf-inference-v3.1-deepsparse/README.md new file mode 100644 index 0000000000..f0f8908d29 --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-deepsparse/README.md @@ -0,0 +1,31 @@ +### Challenge + +Prepare, optimize and submit benchmarking results to MLPerf inference v3.1 using +CM automation language with the DeepSparse library, any model and any platform. + +Check [this related challenge](https://access.cknowledge.org/playground/?action=challenges&name=3e971d8089014d1f) for more details. + +Read [this documentation](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/README.md) +to run reference implementations of MLPerf inference benchmarks +using the CM automation language and use them as a base for your developments. + +Check [this ACM REP'23 keynote](https://doi.org/10.5281/zenodo.8105339) to learn more about our open-source project and long-term vision. + +### Prizes + +* *All contributors will receive 1 point for submitting valid results for 1 complete benchmark on one system.* +* *All contributors will receive an official MLCommons Collective Knowledge contributor award (see [this example](https://ctuning.org/awards/ck-award-202307-zhu.pdf)).* +* *The top contributors will receive cash prizes from [MLCommons organizations](https://mlcommons.org) and [cKnowledge.org](https://www.linkedin.com/company/cknowledge)*. + +### Organizers + +* [MLCommons taskforce on automation and reproducibility](https://cKnowledge.org/mlcommons-taskforce) +* [cTuning foundation](https://cTuning.org) +* [cKnowledge Ltd](https://cKnowledge.org) + +### Results + +All accepted results will be publicly available in the CM format with derived metrics +in this [MLCommons repository](https://github.com/mlcommons/cm4mlperf-results), +in [MLCommons Collective Knowledge explorer](https://access.cknowledge.org/playground/?action=experiments) +and at official [MLCommons website](https://mlcommons.org). diff --git a/challenge/optimize-mlperf-inference-v3.1-deepsparse/_cm.json b/challenge/optimize-mlperf-inference-v3.1-deepsparse/_cm.json new file mode 100644 index 0000000000..e1cc4f8880 --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-deepsparse/_cm.json @@ -0,0 +1,28 @@ +{ + "alias": "optimize-mlperf-inference-v3.1-deepsparse", + "automation_alias": "challenge", + "automation_uid": "3d84abd768f34e08", + "date_close": "20230817", + "date_close_extension": true, + "date_open": "20230704", + "experiments": [], + "points": 1, + "tags": [ + "modularize", + "optimize", + "reproduce", + "replicate", + "automate", + "benchmark", + "deepsparse", + "mlperf-inference", + "mlperf-inference-deepsparse", + "mlperf-inference-deepsparse", + "mlperf-inference-deepsparse-v3.1", + "mlperf-inference-deepsparse-v3.1-2023", + "v3.1" + ], + "title": "Run and optimize MLPerf inference v3.1 benchmarks with Neural Magic's DeepSparse library", + "trophies": true, + "uid": "c495863b08e74abc" +} diff --git a/challenge/optimize-mlperf-inference-v3.1-google-tpu-2023/README.md b/challenge/optimize-mlperf-inference-v3.1-google-tpu-2023/README.md new file mode 100644 index 0000000000..94fad05b51 --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-google-tpu-2023/README.md @@ -0,0 +1,32 @@ +### Challenge + +Develop a reference implementation of any MLPerf inference benchmark to run on the latest publicly available Google TPU. +Submit preliminary (unoptimized) benchmarking results to MLPerf inference v3.1 and beyond. + +Note that you can use either GCP TPU or Coral TPU USB-Accelerator CPU card. +In the latter case, you can reuse and extend our CM-MLPerf script for MobileNets! + +Read [this documentation](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/README.md) +to run reference implementations of MLPerf inference benchmarks +using the CM automation language and use them as a base for your developments. + +Check [this ACM REP'23 keynote](https://doi.org/10.5281/zenodo.8105339) to learn more about our open-source project and long-term vision. + +### Prizes + +* *All contributors will receive an official MLCommons Collective Knowledge contributor award (see [this example](https://ctuning.org/awards/ck-award-202307-zhu.pdf)).* +* *The top contributors will receive cash prizes from [MLCommons organizations](https://mlcommons.org) and [cKnowledge.org](https://www.linkedin.com/company/cknowledge)*. + +### Organizers + +* [MLCommons](https://cKnowledge.org/mlcommons-taskforce) +* [cTuning.org](https://www.linkedin.com/company/ctuning-foundation) +* [cKnowledge.org](https://www.linkedin.com/company/cknowledge) + + +### Results + +All accepted results will be publicly available in the CM format with derived metrics +in this [MLCommons repository](https://github.com/mlcommons/cm4mlperf-results), +in [MLCommons Collective Knowledge explorer](https://access.cknowledge.org/playground/?action=experiments) +and at official [MLCommons website](https://mlcommons.org). diff --git a/challenge/optimize-mlperf-inference-v3.1-google-tpu-2023/_cm.json b/challenge/optimize-mlperf-inference-v3.1-google-tpu-2023/_cm.json new file mode 100644 index 0000000000..3d5aecc950 --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-google-tpu-2023/_cm.json @@ -0,0 +1,27 @@ +{ + "alias": "optimize-mlperf-inference-v3.1-google-tpu-2023", + "automation_alias": "challenge", + "automation_uid": "3d84abd768f34e08", + "date_close": "20230817", + "date_open": "20230704", + "points":3, + "trophies":true, + "date_close_extension": true, + "tags": [ + "modularize", + "optimize", + "reproduce", + "replicate", + "automate", + "benchmark", + "tpu", + "mlperf-inference", + "mlperf-inference-tpu", + "mlperf-inference-tpu", + "mlperf-inference-tpu-v3.1", + "mlperf-inference-tpu-v3.1-2023", + "v3.1" + ], + "title": "Develop a reference implementation of any MLPerf inference benchmark to run on the latest publicly available Google TPU (GCP or Coral USB accelerator) and submit to MLPerf inference v3.1+", + "uid": "5975fd0e18cd4073" +} diff --git a/challenge/optimize-mlperf-inference-v3.1-hugging-face-models-2023/README.md b/challenge/optimize-mlperf-inference-v3.1-hugging-face-models-2023/README.md new file mode 100644 index 0000000000..014f83f7d9 --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-hugging-face-models-2023/README.md @@ -0,0 +1,52 @@ +### Introduction + +Open-source [MLPerf inference benchmarks](https://arxiv.org/abs/1911.02549) +were developed by a [consortium of 50+ companies and universities (MLCommons)](https://mlcommons.org) +to enable trustable and reproducible comparison of AI/ML systems +in terms of latency, throughput, power consumption, accuracy and other metrics +across diverse software/hardware stacks from different vendors. + +However, it is difficult to customize and run MLPerf benchmarks with non-reference models. + +That's why the MLCommons Task Force on automation and reproducibility has developed +a [Collective Mind automation language](https://doi.org/10.5281/zenodo.8144274) +to modularize this benchmark and make it easier to run with different models and data sets. + + +### Challenge + +Implement a CM workflow to connect any Hugging Face model +to MLPerf loadgen and run it with random inputs to obtain a preliminary latency and througput +without accuracy. + +Resources: +* [CM script to get ML model from Hugging Face zoo](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/get-ml-model-huggingface-zoo) +* [CM script to convert Hugging Face model to ONNX](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/convert-ml-model-huggingface-to-onnx) +* [CM script to build MLPerf loadgen](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/get-mlperf-inference-loadgen) +* [CM script to run Python Loadgen with any ONNX model](https://github.com/mlcommons/ck/blob/master/cm-mlops/script/app-loadgen-generic-python/README-extra.md) +* [MLPerf BERT FP32 model is available at Hugging Face](https://huggingface.co/ctuning/mlperf-inference-bert-onnx-fp32-squad-v1.1) + +Some results showcases CK workflow to benchmark Hugging Face models with MLPerf from v3.0 (BERT): +* https://access.cknowledge.org/playground/?action=experiments&name=2f1f70d8b2594149 +* https://access.cknowledge.org/playground/?action=experiments&name=mlperf-inference--v3.0--edge--open-power--language-processing--offline&result_uid=9d2594448bbb4b45 + +Read [this documentation](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/README.md) +to run reference implementations of MLPerf inference benchmarks +using the CM automation language and use them as a base for your developments. + +Check [this ACM REP'23 keynote](https://doi.org/10.5281/zenodo.8105339) to learn more about our open-source project and long-term vision. + +### Prizes + +* *All contributors will receive 1 point for submitting valid results for 1 complete benchmark on one system.* +* *All contributors will receive an official MLCommons Collective Knowledge contributor award (see [this example](https://ctuning.org/awards/ck-award-202307-zhu.pdf)).* +* *The top contributors will receive cash prizes from [MLCommons organizations](https://mlcommons.org) and [cKnowledge.org](https://www.linkedin.com/company/cknowledge)*. + + +### Organizers + +* [MLCommons](https://cKnowledge.org/mlcommons-taskforce) +* [cTuning.org](https://www.linkedin.com/company/ctuning-foundation) +* [cKnowledge.org](https://www.linkedin.com/company/cknowledge) + + diff --git a/challenge/optimize-mlperf-inference-v3.1-hugging-face-models-2023/_cm.json b/challenge/optimize-mlperf-inference-v3.1-hugging-face-models-2023/_cm.json new file mode 100644 index 0000000000..146505b55a --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-hugging-face-models-2023/_cm.json @@ -0,0 +1,27 @@ +{ + "alias": "optimize-mlperf-inference-v3.1-hugging-face-models-2023", + "automation_alias": "challenge", + "automation_uid": "3d84abd768f34e08", + "date_close": "20230817", + "date_close_extension": true, + "date_open": "20230704", + "points":3, + "trophies":true, + "tags": [ + "modularize", + "optimize", + "reproduce", + "replicate", + "automate", + "benchmark", + "huggingface", + "mlperf-inference", + "mlperf-inference-huggingface", + "mlperf-inference-huggingface", + "mlperf-inference-huggingface-v3.1", + "mlperf-inference-huggingface-v3.1-2023", + "v3.1" + ], + "title": "Implement CM automation to run benchmark Hugging Face models using MLPerf loadgen", + "uid": "72b95d08a9e04698" +} diff --git a/challenge/optimize-mlperf-inference-v3.1-intel-2023/README.md b/challenge/optimize-mlperf-inference-v3.1-intel-2023/README.md new file mode 100644 index 0000000000..aec0514730 --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-intel-2023/README.md @@ -0,0 +1,31 @@ +### Challenge + +Add CM interface to run MLPerf inference benchmarks on Intel-based platforms. + +You can start from reproducing any past MLPerf inference submission from Intel and their partners +and then adding CM automation. + +Read [this documentation](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/README.md) +to run reference implementations of MLPerf inference benchmarks +using the CM automation language and use them as a base for your developments. + +Check [this ACM REP'23 keynote](https://doi.org/10.5281/zenodo.8105339) to learn more about our open-source project and long-term vision. + +### Prizes + +* *All contributors will receive an official MLCommons Collective Knowledge contributor award (see [this example](https://ctuning.org/awards/ck-award-202307-zhu.pdf)).* +* *The top contributors will receive cash prizes from [MLCommons organizations](https://mlcommons.org) and [cKnowledge.org](https://www.linkedin.com/company/cknowledge)*. + + +### Organizers + +* [MLCommons](https://cKnowledge.org/mlcommons-taskforce) +* [cTuning.org](https://www.linkedin.com/company/ctuning-foundation) +* [cKnowledge.org](https://www.linkedin.com/company/cknowledge) + +### Results + +All accepted results will be publicly available in the CM format with derived metrics +in this [MLCommons repository](https://github.com/mlcommons/cm4mlperf-results), +in [MLCommons Collective Knowledge explorer](https://access.cknowledge.org/playground/?action=experiments) +and at official [MLCommons website](https://mlcommons.org). diff --git a/challenge/optimize-mlperf-inference-v3.1-intel-2023/_cm.json b/challenge/optimize-mlperf-inference-v3.1-intel-2023/_cm.json new file mode 100644 index 0000000000..c3d9adbe4c --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-intel-2023/_cm.json @@ -0,0 +1,26 @@ +{ + "alias": "optimize-mlperf-inference-v3.1-intel-2023", + "automation_alias": "challenge", + "automation_uid": "3d84abd768f34e08", + "date_close": "20240104", + "date_open": "20230704", + "points": 2, + "tags": [ + "modularize", + "optimize", + "reproduce", + "replicate", + "automate", + "benchmark", + "intel", + "mlperf-inference", + "mlperf-inference-intel", + "mlperf-inference-intel", + "mlperf-inference-intel-v3.1", + "mlperf-inference-intel-v3.1-2023", + "v3.1" + ], + "title": "Add the CM interface to run MLPerf inference benchmarks on Intel-based platforms", + "trophies": true, + "uid": "1c1d5da6766f4afb" +} diff --git a/challenge/optimize-mlperf-inference-v3.1-modular-mojo-2023/README.md b/challenge/optimize-mlperf-inference-v3.1-modular-mojo-2023/README.md new file mode 100644 index 0000000000..6aaf4e3947 --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-modular-mojo-2023/README.md @@ -0,0 +1,34 @@ +### Challenge + +Add support to run a reference implementation of any MLPerf inference benchmark using +[Mojo language]( https://github.com/modularml/mojo ) +from [Modular.ai](https://modular.ai). + +Prepare, optimize and submit benchmarking results to MLPerf inference v3.1 with Mojo. + +Check [this related challenge](https://access.cknowledge.org/playground/?action=challenges&name=3e971d8089014d1f) for more details. + +Read [this documentation](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/README.md) +to run reference implementations of MLPerf inference benchmarks +using the CM automation language and use them as a base for your developments. + +Check [this ACM REP'23 keynote](https://doi.org/10.5281/zenodo.8105339) to learn more about our open-source project and long-term vision. + +### Prizes + +* *The first implementation will receive a cache prize from organizers.* +* *All contributors will receive 1 point for submitting valid results for 1 complete benchmark on one system.* +* *The top contributors will receive cash prizes from [MLCommons organizations](https://mlcommons.org) and [cKnowledge.org](https://www.linkedin.com/company/cknowledge)*. + +### Organizers + +* [MLCommons taskforce on automation and reproducibility](https://cKnowledge.org/mlcommons-taskforce) +* [cTuning foundation](https://cTuning.org) +* [cKnowledge Ltd](https://cKnowledge.org) + +### Results + +All accepted results will be publicly available in the CM format with derived metrics +in this [MLCommons repository](https://github.com/mlcommons/cm4mlperf-results), +in [MLCommons Collective Knowledge explorer](https://access.cknowledge.org/playground/?action=experiments) +and at official [MLCommons website](https://mlcommons.org). diff --git a/challenge/optimize-mlperf-inference-v3.1-modular-mojo-2023/_cm.json b/challenge/optimize-mlperf-inference-v3.1-modular-mojo-2023/_cm.json new file mode 100644 index 0000000000..e805879dee --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-modular-mojo-2023/_cm.json @@ -0,0 +1,28 @@ +{ + "alias": "optimize-mlperf-inference-v3.1-modular-mojo-2023", + "automation_alias": "challenge", + "automation_uid": "3d84abd768f34e08", + "date_close": "20230817", + "date_close_extension": true, + "date_open": "20230704", + "experiments": [], + "points": 1, + "tags": [ + "modularize", + "optimize", + "reproduce", + "replicate", + "automate", + "benchmark", + "mojo", + "mlperf-inference", + "mlperf-inference-mojo", + "mlperf-inference-mojo", + "mlperf-inference-mojo-v3.1", + "mlperf-inference-mojo-v3.1-2023", + "v3.1" + ], + "title": "Run reference implementations of MLperf inference benchmarks using Mojo language from Modular.ai", + "trophies": true, + "uid": "0a8a7bb5572447db" +} diff --git a/challenge/optimize-mlperf-inference-v3.1-qualcomm-ai100-2023/README.md b/challenge/optimize-mlperf-inference-v3.1-qualcomm-ai100-2023/README.md new file mode 100644 index 0000000000..c16a9335a6 --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-qualcomm-ai100-2023/README.md @@ -0,0 +1,33 @@ +### Challenge + +Add CM interface to run MLPerf inference benchmarks on Qualcomm AI100-based platforms. + +You can start from reproducing any past submission from Dell, Lenovo or HPE +and then adding CM automation. + +Read [this documentation](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/README.md) +to run reference implementations of MLPerf inference benchmarks +using the CM automation language and use them as a base for your developments. + +Check [this ACM REP'23 keynote](https://doi.org/10.5281/zenodo.8105339) to learn more about our open-source project and long-term vision. + +### Prizes + +* *All contributors will receive an official MLCommons Collective Knowledge contributor award (see [this example](https://ctuning.org/awards/ck-award-202307-zhu.pdf)).* +* *The top contributors will receive cash prizes from [MLCommons organizations](https://mlcommons.org) and [cKnowledge.org](https://www.linkedin.com/company/cknowledge)*. + + + +### Organizers + +* [MLCommons](https://cKnowledge.org/mlcommons-taskforce) +* [cTuning.org](https://www.linkedin.com/company/ctuning-foundation) +* [cKnowledge.org](https://www.linkedin.com/company/cknowledge) + + +### Results + +All accepted results will be publicly available in the CM format with derived metrics +in this [MLCommons repository](https://github.com/mlcommons/cm4mlperf-results), +in [MLCommons Collective Knowledge explorer](https://access.cknowledge.org/playground/?action=experiments) +and at official [MLCommons website](https://mlcommons.org). diff --git a/challenge/optimize-mlperf-inference-v3.1-qualcomm-ai100-2023/_cm.json b/challenge/optimize-mlperf-inference-v3.1-qualcomm-ai100-2023/_cm.json new file mode 100644 index 0000000000..07c626e259 --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-qualcomm-ai100-2023/_cm.json @@ -0,0 +1,26 @@ +{ + "alias": "optimize-mlperf-inference-v3.1-qualcomm-ai100-2023", + "automation_alias": "challenge", + "automation_uid": "3d84abd768f34e08", + "date_close": "20240104", + "date_open": "20230704", + "points":3, + "trophies":true, + "tags": [ + "modularize", + "optimize", + "reproduce", + "replicate", + "automate", + "benchmark", + "ai100", + "mlperf-inference", + "mlperf-inference-ai100", + "mlperf-inference-ai100", + "mlperf-inference-ai100-v3.1", + "mlperf-inference-ai100-v3.1-2023", + "v3.1" + ], + "title": "Add the CM interface to run MLPerf inference benchmarks on Qualcomm AI100-based platforms", + "uid": "09bd5f9e05ff46b1" +} diff --git a/challenge/optimize-mlperf-inference-v3.1-tvm-2023/README.md b/challenge/optimize-mlperf-inference-v3.1-tvm-2023/README.md new file mode 100644 index 0000000000..f8d9fbd71b --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-tvm-2023/README.md @@ -0,0 +1,41 @@ +### Challenge + +Prepare, optimize and submit benchmarking results to MLPerf inference v3.1 using +CM automation language with Apache TVM, any model and any platform. + +Check [this related challenge](https://access.cknowledge.org/playground/?action=challenges&name=3e971d8089014d1f) for more details. + +Read [this documentation](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/README.md) +to run reference implementations of MLPerf inference benchmarks +using the CM automation language and use them as a base for your developments. + +Check [this ACM REP'23 keynote](https://doi.org/10.5281/zenodo.8105339) to learn more about our open-source project and long-term vision. + +### Prizes + +* *All contributors will receive 1 point for submitting valid results for 1 complete benchmark on one system.* +* *All contributors will receive an official MLCommons Collective Knowledge contributor award (see [this example](https://ctuning.org/awards/ck-award-202307-zhu.pdf)).* +* *The top contributors will receive cash prizes from [MLCommons organizations](https://mlcommons.org) and [cKnowledge.org](https://www.linkedin.com/company/cknowledge)*. + +### Organizers + +* [Deelvin](https://deelvin.com) +* [MLCommons taskforce on automation and reproducibility](https://cKnowledge.org/mlcommons-taskforce) +* [cTuning foundation](https://cTuning.org) +* [cKnowledge Ltd](https://cKnowledge.org) + +### Status + +This challenge is under preparation. + +* https://github.com/mlcommons/ck/pull/693 +* https://github.com/mlcommons/ck/pull/700 +* https://github.com/mlcommons/ck/pull/701 + + +### Results + +All accepted results will be publicly available in the CM format with derived metrics +in this [MLCommons repository](https://github.com/mlcommons/cm4mlperf-results), +in [MLCommons Collective Knowledge explorer](https://access.cknowledge.org/playground/?action=experiments) +and at official [MLCommons website](https://mlcommons.org). diff --git a/challenge/optimize-mlperf-inference-v3.1-tvm-2023/_cm.json b/challenge/optimize-mlperf-inference-v3.1-tvm-2023/_cm.json new file mode 100644 index 0000000000..839fb6b86e --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-tvm-2023/_cm.json @@ -0,0 +1,28 @@ +{ + "alias": "optimize-mlperf-inference-v3.1-tvm-2023", + "automation_alias": "challenge", + "automation_uid": "3d84abd768f34e08", + "date_close": "20230817", + "date_close_extension": true, + "date_open": "20230704", + "points":1, + "trophies":true, + "experiments": [], + "tags": [ + "modularize", + "optimize", + "reproduce", + "replicate", + "automate", + "benchmark", + "tvm", + "mlperf-inference", + "mlperf-inference-tvm", + "mlperf-inference-tvm", + "mlperf-inference-tvm-v3.1", + "mlperf-inference-tvm-v3.1-2023", + "v3.1" + ], + "title": "Run and optimize MLPerf inference v3.1 benchmarks with Apache TVM", + "uid": "29c416e245884746" +} diff --git a/challenge/optimize-mlperf-inference-v3.1-universal-cpp-implementation-2023/README.md b/challenge/optimize-mlperf-inference-v3.1-universal-cpp-implementation-2023/README.md new file mode 100644 index 0000000000..0a5fe9aa2c --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-universal-cpp-implementation-2023/README.md @@ -0,0 +1,31 @@ +### Challenge + +Add more models and hardware backends to the [universal C++ implementation of MLPerf inference benchmarks)](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/app-mlperf-inference-cpp) +being developed by the [MLCommons Task Force on Automation and Reproducibility](https://github.com/mlcommons/ck/blob/master/docs/taskforce.md). + +Read [this documentation](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/README.md) +to run reference implementations of MLPerf inference benchmarks +using the CM automation language and use them as a base for your developments. + +Check [this ACM REP'23 keynote](https://doi.org/10.5281/zenodo.8105339) to learn more about our open-source project and long-term vision. + + +### Prizes + +* *All contributors will receive an official MLCommons Collective Knowledge contributor award (see [this example](https://ctuning.org/awards/ck-award-202307-zhu.pdf)).* +* *The top contributors will receive cash prizes from [MLCommons organizations](https://mlcommons.org) and [cKnowledge.org](https://www.linkedin.com/company/cknowledge)*. + + + +### Organizers + +* [MLCommons](https://cKnowledge.org/mlcommons-taskforce) +* [cTuning.org](https://www.linkedin.com/company/ctuning-foundation) +* [cKnowledge.org](https://www.linkedin.com/company/cknowledge) + +### Results + +All accepted results will be publicly available in the CM format with derived metrics +in this [MLCommons repository](https://github.com/mlcommons/cm4mlperf-results), +in [MLCommons Collective Knowledge explorer](https://access.cknowledge.org/playground/?action=experiments) +and at official [MLCommons website](https://mlcommons.org). diff --git a/challenge/optimize-mlperf-inference-v3.1-universal-cpp-implementation-2023/_cm.json b/challenge/optimize-mlperf-inference-v3.1-universal-cpp-implementation-2023/_cm.json new file mode 100644 index 0000000000..e4e5cae105 --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-universal-cpp-implementation-2023/_cm.json @@ -0,0 +1,27 @@ +{ + "alias": "optimize-mlperf-inference-v3.1-universal-cpp-implementation-2023", + "automation_alias": "challenge", + "automation_uid": "3d84abd768f34e08", + "date_close": "20230817", + "date_close_extension": true, + "date_open": "20230704", + "points": 2, + "tags": [ + "modularize", + "optimize", + "reproduce", + "replicate", + "automate", + "benchmark", + "cpp", + "mlperf-inference", + "mlperf-inference-cpp", + "mlperf-inference-cpp", + "mlperf-inference-cpp-v3.1", + "mlperf-inference-cpp-v3.1-2023", + "v3.1" + ], + "title": "Add more models and hardware backends to the universal C++ implementation of MLPerf inference benchmarks from MLCommons", + "trophies": true, + "uid": "518420b0e6dd4fed" +} diff --git a/challenge/optimize-mlperf-inference-v3.1-windows-2023/README.md b/challenge/optimize-mlperf-inference-v3.1-windows-2023/README.md new file mode 100644 index 0000000000..d587f62f89 --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-windows-2023/README.md @@ -0,0 +1,36 @@ +### Challenge + +Prepare, optimize and submit any benchmarking results to MLPerf inference v3.1 using +CM automation language on Windows. + +Read [this documentation](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/README.md) +to run reference implementations of MLPerf inference benchmarks +using the CM automation language and use them as a base for your developments. + +Check [this ACM REP'23 keynote](https://doi.org/10.5281/zenodo.8105339) to learn more about our open-source project and long-term vision. + + +### Prizes + +* *All contributors will receive an official MLCommons Collective Knowledge contributor award (see [this example](https://ctuning.org/awards/ck-award-202307-zhu.pdf)).* +* *The top contributors will receive cash prizes from [MLCommons organizations](https://mlcommons.org) and [cKnowledge.org](https://www.linkedin.com/company/cknowledge)*. + + +### Organizers + +* [MLCommons](https://cKnowledge.org/mlcommons-taskforce) +* [cTuning.org](https://www.linkedin.com/company/ctuning-foundation) +* [cKnowledge.org](https://www.linkedin.com/company/cknowledge) + + +### Status + +Open ticket: [GitHub](https://github.com/mlcommons/ck/issues/696) + + +### Results + +All accepted results will be publicly available in the CM format with derived metrics +in this [MLCommons repository](https://github.com/mlcommons/cm4mlperf-results), +in [MLCommons Collective Knowledge explorer](https://access.cknowledge.org/playground/?action=experiments) +and at official [MLCommons website](https://mlcommons.org). diff --git a/challenge/optimize-mlperf-inference-v3.1-windows-2023/_cm.json b/challenge/optimize-mlperf-inference-v3.1-windows-2023/_cm.json new file mode 100644 index 0000000000..1a55dcbe0f --- /dev/null +++ b/challenge/optimize-mlperf-inference-v3.1-windows-2023/_cm.json @@ -0,0 +1,28 @@ +{ + "alias": "optimize-mlperf-inference-v3.1-windows-2023", + "automation_alias": "challenge", + "automation_uid": "3d84abd768f34e08", + "date_close": "20230817", + "date_close_extension": true, + "date_open": "20230704", + "points":2, + "trophies":true, + "experiments": [], + "tags": [ + "modularize", + "optimize", + "reproduce", + "replicate", + "automate", + "benchmark", + "windows", + "mlperf-inference", + "mlperf-inference-windows", + "mlperf-inference-windows", + "mlperf-inference-windows-v3.1", + "mlperf-inference-windows-v3.1-2023", + "v3.1" + ], + "title": "Run and optimize MLPerf inference v3.1 benchmarks on Windows", + "uid": "53e56d714c7649c7" +} diff --git a/challenge/repro-mlperf-inf-v3.0-orin/README.md b/challenge/repro-mlperf-inf-v3.0-orin/README.md new file mode 100644 index 0000000000..54dd4feeb0 --- /dev/null +++ b/challenge/repro-mlperf-inf-v3.0-orin/README.md @@ -0,0 +1,16 @@ +### Challenge + +Reproduce MLPerf inference v3.0 benchmark results for Nvidia Jetson Orin +(performance, accuracy,power) and automate it using the +[MLCommons CK framework](https://github.com/mlcommons/ck). + +### Organizers + +* [MLCommons taskforce on automation and reproducibility](https://cKnowledge.org/mlcommons-taskforce) +* [cTuning foundation](https://cTuning.org) +* [cKnowledge](https://cKnowledge.org) + +### Status + +Finished. Preliminary results are available [here](https://github.com/mlcommons/ck/blob/master/cm-mlops/challenge/optimize-mlperf-inference-v3.0-2023/docs/setup-nvidia-jetson-orin.md). + diff --git a/challenge/repro-mlperf-inf-v3.0-orin/_cm.json b/challenge/repro-mlperf-inf-v3.0-orin/_cm.json new file mode 100644 index 0000000000..aff0fdba0f --- /dev/null +++ b/challenge/repro-mlperf-inf-v3.0-orin/_cm.json @@ -0,0 +1,23 @@ +{ + "alias": "repro-mlperf-inf-v3.0-orin", + "automation_alias": "challenge", + "automation_uid": "3d84abd768f34e08", + "date_close": "20230406", + "date_open": "20230301", + "experiments": [ + { + "tags": "mlperf-inference,v3.0" + } + ], + "_password_hash": "$2b$12$ionIRWe5Ft7jkn4y/7C6/eYoo6uBBMkGy/9SxwtKhaDRqZ1w2s3dO", + "tags": [ + "reproduce", + "replicate", + "automate", + "orin", + "nvidia", + "mlperf-inference-v3.0-orin" + ], + "title": "Reproduce MLPerf inference v3.0 results for Nvidia Jetson Orin", + "uid": "6d377c1a1b224636" +} diff --git a/challenge/repro-mlperf-inference-retinanet-scc2022/README.md b/challenge/repro-mlperf-inference-retinanet-scc2022/README.md new file mode 100644 index 0000000000..9917547c15 --- /dev/null +++ b/challenge/repro-mlperf-inference-retinanet-scc2022/README.md @@ -0,0 +1,39 @@ +### Challenge + +Reproduce the MLPerf inference RetinaNet benchmark during Student Cluster Competition at SuperComputing'22 +using the following [CM tutorial](https://github.com/mlcommons/ck/blob/master/docs/tutorials/sc22-scc-mlperf.md). + +### Organizers + +* [MLCommons taskforce on automation and reproducibility](https://cKnowledge.org/mlcommons-taskforce) +* [cTuning foundation](https://cTuning.org) +* [OctoML](https://octoml.ai) + +### Status + +This challenge has been successfully completed. + +### Results + +Results from 10 international student teams are available at: +* [W&B dashboard 1 (during SCC'22)](https://wandb.ai/cmind/cm-mlperf-sc22-scc-retinanet-offline/table?workspace=user-gfursin) +* [W&B dashboard 2 (after SCC'22)](https://wandb.ai/cmind/cm-mlperf-dse-testing/table?workspace=user-gfursin) + + +### Acknowledgments + +We thank +[Hai Ah Nam](https://www.nersc.gov/about/nersc-staff/advanced-technologies-group/hai-ah-nam), +[Steve Leak](https://www.linkedin.com/in/steve-leak), +[Vijay Janappa Reddi](https://scholar.harvard.edu/vijay-janapa-reddi/home), +[Tom Jablin](https://scholar.google.com/citations?user=L_1FmIMAAAAJ&hl=en), +[Ramesh N Chukka](https://www.linkedin.com/in/ramesh-chukka-74b5b21), +[Peter Mattson](https://www.linkedin.com/in/peter-mattson-33b8863/), +[David Kanter](https://www.linkedin.com/in/kanterd), +[Pablo Gonzalez Mesa](https://www.linkedin.com/in/pablo-gonzalez-mesa-952ab2207), +[Thomas Zhu](https://www.linkedin.com/in/hanwen-zhu-483614189), +[Thomas Schmid](https://www.linkedin.com/in/tschmid) +and [Gaurav Verma](https://www.linkedin.com/in/grverma) +for their suggestions and contributions. + + diff --git a/challenge/repro-mlperf-inference-retinanet-scc2022/_cm.json b/challenge/repro-mlperf-inference-retinanet-scc2022/_cm.json new file mode 100644 index 0000000000..68352f9c3b --- /dev/null +++ b/challenge/repro-mlperf-inference-retinanet-scc2022/_cm.json @@ -0,0 +1,20 @@ +{ + "alias": "repro-mlperf-inference-retinanet-scc2022", + "automation_alias": "challenge", + "automation_uid": "3d84abd768f34e08", + "date_close": "20221201", + "date_open": "20221101", + "tags": [ + "modularize", + "reproduce", + "replicate", + "automate", + "benchmark", + "mlperf", + "mlperf-inference", + "mlperf-inference-scc", + "mlperf-inference-scc-2022" + ], + "title": "Automate MLPerf RetinaNet benchmark at the Student Cluster Competition at SuperComputing'22 using CM", + "uid": "e71fa8b396874e68" +} diff --git a/challenge/repro-mlperf-inference-v4.0-2024/README.md b/challenge/repro-mlperf-inference-v4.0-2024/README.md new file mode 100644 index 0000000000..af23eb1205 --- /dev/null +++ b/challenge/repro-mlperf-inference-v4.0-2024/README.md @@ -0,0 +1,3 @@ +The [MLCommons](https://mlcommons.org), [cTuning foundation](https://cTuning.org) and [cKnowledge.org](https://cKnowledge.org) +are preparing a unified interface to reproduce results from the MLPerf inference benchmark submission v4.0. +Please feel free to join the testing phase using [GitHub issues](https://github.com/mlcommons/ck/issues)! diff --git a/challenge/repro-mlperf-inference-v4.0-2024/_cm.yaml b/challenge/repro-mlperf-inference-v4.0-2024/_cm.yaml new file mode 100644 index 0000000000..01bcfd52a7 --- /dev/null +++ b/challenge/repro-mlperf-inference-v4.0-2024/_cm.yaml @@ -0,0 +1,25 @@ +alias: repro-mlperf-inference-v4.0-2024 +uid: e6b8738383eb46d0 + +automation_alias: challenge +automation_uid: 3d84abd768f34e08 + +title: Reproduce and automate MLPerf inference benchmark results v4.0 from different vendors (Intel, Nvidia, Qualcomm, Google, NeuralMagic, ...) using CM + +date_open: '20240201' + +tags: +- modularize +- optimize +- reproduce +- replicate +- automate +- benchmark +- mlperf +- mlperf-inference +- mlperf-inference-v4.0 +- mlperf-inference-v4.0-2024 +- v4.0 + +experiments: +- tags: mlperf-inference,v4.0 diff --git a/challenge/repro-mlperf-inference-v4.1-2024/README.md b/challenge/repro-mlperf-inference-v4.1-2024/README.md new file mode 100644 index 0000000000..1aacc2d59d --- /dev/null +++ b/challenge/repro-mlperf-inference-v4.1-2024/README.md @@ -0,0 +1,4 @@ +The [cTuning foundation](https://cTuning.org), [cKnowledge.org](https://cKnowledge.org) and [MLCommons](https://mlcommons.org) +are preparing an open reproducibility challenge to reproduce various results from the MLPerf inference benchmark v4.1 +using the MLCommons CM automation framework. Please stay tuned for more details! + diff --git a/challenge/repro-mlperf-inference-v4.1-2024/_cm.yaml b/challenge/repro-mlperf-inference-v4.1-2024/_cm.yaml new file mode 100644 index 0000000000..840d58318d --- /dev/null +++ b/challenge/repro-mlperf-inference-v4.1-2024/_cm.yaml @@ -0,0 +1,22 @@ +alias: repro-mlperf-inference-v4.1-2024 +uid: 2093f4d750144df4 + +automation_alias: challenge +automation_uid: 3d84abd768f34e08 + +title: 'Reproduce the upcoming MLPerf inference benchmark v4.1 results' + +date_open: '20240901' + +tags: +- modularize +- optimize +- reproduce +- replicate +- automate +- benchmark +- mlperf +- mlperf-inference +- mlperf-inference-v4.1 +- mlperf-inference-v4.1-2024 +- v4.1 diff --git a/challenge/reproduce-and-automate-tinymlperf-v1.1-2023/README.md b/challenge/reproduce-and-automate-tinymlperf-v1.1-2023/README.md new file mode 100644 index 0000000000..0f59f59f0e --- /dev/null +++ b/challenge/reproduce-and-automate-tinymlperf-v1.1-2023/README.md @@ -0,0 +1,36 @@ +### Challenge + +Reproduce and automate [TinyMLPerf benchmarks](https://github.com/mlcommons/tiny). + +### Organizers + +* [MLCommons taskforce on automation and reproducibility](https://cKnowledge.org/mlcommons-taskforce) +* [cTuning foundation](https://cTuning.org) +* [cKnowledge Ltd](https://cKnowledge.org) + +### Status + +We have successfully reproduced [TinyMLPerf v1.0 submission with microTVM on the STMicroelectronics NUCLEO-L4R5ZI board](https://github.com/mlcommons/tiny_results_v1.0/tree/main/closed/OctoML), +automated it with the latest version of the [MLCommons CM automation language](https://github.com/mlcommons/ck/blob/master/docs/README.md), +submit reproduce results to the TinyMLperf v1.1 round, +and added all past TinyMLPerf results to the [MLCommons CK playground](https://access.cknowledge.org/playground/?action=experiments&tags=mlperf-tiny) +for further collaborative analysis and improvement. + +Pleases check our tutorial and reproducibility report: +* [Automate TinyMLPerf benchmark](https://github.com/ctuning/mlcommons-ck/blob/master/docs/tutorials/automate-mlperf-tiny.md) - useful for all SW/HW stacks and submission rounds. +* [Reproduce TinyMLPerf v1.0 submission](https://github.com/ctuning/mlcommons-ck/blob/master/docs/tutorials/reproduce-mlperf-tiny.md). + +TinyMLPerf v1.1 results will be published at te [MLCommons CK playground](https://access.cknowledge.org/playground/?action=experiments&tags=mlperf-tiny) +in mid June 2023. + +### Related discussions for the future + +* https://github.com/mlcommons/ck/pull/693 +* https://github.com/mlcommons/ck/pull/700 +* https://github.com/mlcommons/ck/pull/701 +* https://github.com/mlcommons/ck/issues/606 + +### Results + +All results will be available in [this GitHub repo](https://github.com/ctuning/cm4mlperf-results) +and can be visualized and compared using the [MLCommons Collective Knowledge Playground](https://access.cknowledge.org/playground/?action=experiments&tags=mlperf-tiny). diff --git a/challenge/reproduce-and-automate-tinymlperf-v1.1-2023/_cm.json b/challenge/reproduce-and-automate-tinymlperf-v1.1-2023/_cm.json new file mode 100644 index 0000000000..4e9e248505 --- /dev/null +++ b/challenge/reproduce-and-automate-tinymlperf-v1.1-2023/_cm.json @@ -0,0 +1,23 @@ +{ + "alias": "reproduce-and-automate-tinymlperf-v1.1-2023", + "automation_alias": "challenge", + "automation_uid": "3d84abd768f34e08", + "date_close": "20230519", + "date_open": "20230501", + "experiments": [], + "tags": [ + "modularize", + "automate", + "reproduce", + "replicate", + "optimize", + "benchmark", + "tinymlperf", + "tinymlperf-inference", + "tinymlperf-inference-v3.0", + "tinymlperf-inference-v3.0-2023", + "v1.0" + ], + "title": "Reproduce and optimize TinyMLPerf inference v1.1 benchmarks", + "uid": "d98cd66e0e5641f7" +} diff --git a/challenge/reproduce-mlperf-training-v3.0-2023/README.md b/challenge/reproduce-mlperf-training-v3.0-2023/README.md new file mode 100644 index 0000000000..a1f1ea22ac --- /dev/null +++ b/challenge/reproduce-mlperf-training-v3.0-2023/README.md @@ -0,0 +1,17 @@ +### Challenge + +Prepare, optimize and reproduce MLPerf training v3.0 benchmarks +using the [MLCommons CM (CK2) automation framework](https://github.com/mlcommons/ck) + +### Status + +We could not do a successful submission mainly because the training scripts were not converging on a single GPU. We tried resnet and bert training. The below CM scripts are added to do MLPerf training for BERT using the reference and NVIDIA implementations. + +1. [BERT Training using Nvidia code](https://github.com/ctuning/mlcommons-ck/tree/master/cm-mlops/script/app-mlperf-training-nvidia) +2. [BERT Training using MLPerf Reference code](https://github.com/ctuning/mlcommons-ck/tree/master/cm-mlops/script/app-mlperf-training-reference) + +### Organizers + +* [MLCommons taskforce on automation and reproducibility](https://cKnowledge.org/mlcommons-taskforce) +* [cTuning foundation](https://cTuning.org) +* [cKnowledge](https://cKnowledge.org) diff --git a/challenge/reproduce-mlperf-training-v3.0-2023/_cm.json b/challenge/reproduce-mlperf-training-v3.0-2023/_cm.json new file mode 100644 index 0000000000..d1e5eddea8 --- /dev/null +++ b/challenge/reproduce-mlperf-training-v3.0-2023/_cm.json @@ -0,0 +1,23 @@ +{ + "alias": "reproduce-mlperf-training-v3.0-2023", + "automation_alias": "challenge", + "automation_uid": "3d84abd768f34e08", + "date_close": "20230519", + "date_open": "20230501", + "experiments": [], + "tags": [ + "modularize", + "optimize", + "reproduce", + "replicate", + "automate", + "benchmark", + "mlperf", + "mlperf-training", + "mlperf-training-v3.0", + "mlperf-training-v3.0-2023", + "v3.0" + ], + "title": "Reproduce MLPerf training v3.0 benchmarks", + "uid": "1d26149c1cce4da3" +} diff --git a/challenge/run-mlperf@home-v3.1-cpu/README.md b/challenge/run-mlperf@home-v3.1-cpu/README.md new file mode 100644 index 0000000000..bd734f7896 --- /dev/null +++ b/challenge/run-mlperf@home-v3.1-cpu/README.md @@ -0,0 +1,67 @@ +### Introduction + +The goal of this MLPerf@home challenge is to help the community find +the most efficient CPU (Intel/AMD/Arm) for BERT-99 model with DeepSparse engine +and different variations of MobileNets/EfficientNets with TFLite +in terms of latency, throughput, accuracy, number of cores, frequency, memory size, cost, and other metrics. + +We would like to ask you to run a few [MLPerf inference benchmarks](https://arxiv.org/abs/1911.02549) +with BERT and MobileNets/EfficientNets on one or more systems with different CPUs +that you have an access to: laptops, servers, cloud instances... + +You will be able to run benchmarks, collect all metrics and submit results in an automated way +in a native environment or Docker container using the portable and technology-agnostic +[MLCommons Collective Mind automation language (CM)](https://doi.org/10.5281/zenodo.8105339). + +Your name and benchmark submissions will be published in the official MLCommons inference v3.1 results +on September 1, 2023 (submission deadline: August 4, 2023), +will be published in the [official leaderboard](https://access.cknowledge.org/playground/?action=contributors), +will be included to the prize draw, and will be presented in our upcoming ACM/HiPEAC events. + +Please report encountered problems using [GitHub issues](https://github.com/mlcommons/ck) +to help the community improve CM automation workflows to run MLPerf benchmarks on any system with any software/hardware stack. + +Thank you in advance for helping the community find Pareto-efficient AI/ML Systems! + +### Minimal requirements + +* CPU: Any x86-64 or Arm64 +* OS: + * native: any Linux (tested on Ubuntu 22.04) + * Docker: any OS +* Disk space: + * BERT-99: ~ 20GB + * Different variations of MobileNets/EfficientNets: ~ 140GB +* Time to run: + * BERT-99: ~ 2 hours + * Different variations of MobileNets/EfficientNets: ~ 2 days + +### Instructions to run benchmarks and submit results + +You can run any of these benchmarks or all depending on available time: + +* [Automated Design Space Exploration of MobileNets/EfficientNets; TFLite MLPerf implementation; native environment or Docker](https://github.com/mlcommons/ck/blob/master/cm-mlops/challenge/run-mlperf%40home-v3.1-cpu/run-cpu-dse-mobilenets-efficientnets-tflite.md) +* [BERT-99 model; DeepSparse MLPerf implementation; native environment](https://github.com/mlcommons/ck/blob/master/cm-mlops/challenge/run-mlperf%40home-v3.1-cpu/run-cpu-bert-99-deepsparse.md) + +### Results + +All accepted results with submitter names will be publicly available +at the official [MLCommons website](https://mlcommons.org) +and in the [Collective Knowledge explorer (MLCommons CK)](https://access.cknowledge.org/playground/?action=experiments) +along with the reproducibility and automation report to help the community +build efficient AI/ML systems. + + +### Organizers + +* [MLCommons Task Force on Automation and Reproducibility](https://cKnowledge.org/mlcommons-taskforce) +* [cTuning.org](https://www.linkedin.com/company/ctuning-foundation) +* [cKnowledge.org](https://www.linkedin.com/company/cknowledge) + +### Advanced challenges + +If you feel that running these benchmarks was relatively easy, +please try [more advanced challenges](https://access.cknowledge.org/playground/?action=challenges), +read about our [plans and long-term vision](https://doi.org/10.5281/zenodo.8105339), +check [CM documentation](https://github.com/mlcommons/ck/blob/master/docs/README.md) +and run other [MLPerf benchmarks](https://github.com/mlcommons/ck/tree/master/docs/mlperf). diff --git a/challenge/run-mlperf@home-v3.1-cpu/_cm.json b/challenge/run-mlperf@home-v3.1-cpu/_cm.json new file mode 100644 index 0000000000..88f4716cda --- /dev/null +++ b/challenge/run-mlperf@home-v3.1-cpu/_cm.json @@ -0,0 +1,21 @@ +{ + "alias": "run-mlperf@home-v3.1-cpu", + "automation_alias": "challenge", + "automation_uid": "3d84abd768f34e08", + "date_close": "20230817", + "date_open": "20230725", + "experiments": [], + "points": 2, + "sort": -20, + "tags": [ + "run", + "mlperf", + "inference", + "v3.1", + "mlperf-inference-v3.1-simple-cpu" + ], + "title": "Work with the community to find the most efficient CPUs (Intel/AMD/Arm) for BERT and MobileNets/EfficientNets (latency, throughput, accuracy, number of cores, frequency, memory size, cost and other metrics)", + "skip": true, + "trophies": true, + "uid": "498f33f3dac647c1" +} diff --git a/challenge/run-mlperf@home-v3.1-cpu/run-cpu-bert-99-deepsparse.md b/challenge/run-mlperf@home-v3.1-cpu/run-cpu-bert-99-deepsparse.md new file mode 100644 index 0000000000..b4266ffa97 --- /dev/null +++ b/challenge/run-mlperf@home-v3.1-cpu/run-cpu-bert-99-deepsparse.md @@ -0,0 +1,100 @@ +# Introduction + +This guide will help you automatically run the MLPerf inference benchmark v3.1 with BERT-99 model and DeepSparse engine +on any Linux-based system with Intel, AMD or Arm CPU. + +This benchmark is automated by the MLCommons CM language and you should be able to submit official MLPerf v3.1 inference results +for offline scenario in open division and edge category. + +It will require ~20GB of disk space and can take ~2 hours to run on 1 system. + + + + +## Install CM automation language + +Install the [MLCommons CM automation language](https://doi.org/10.5281/zenodo.8105339) as described in this [guide](../../../docs/installation.md). +It is a small Python library with `cm` and `cmr` command line front-ends and minimal dependencies including Python 3+, Git and wget. + +If you encounter problems, please report them at [GitHub](https://github.com/mlcommons/ck/issues). + + +## Install repository with CM automations + +Install the MLCommons repository with [reusable and portable automation recipes (CM scripts)](https://github.com/mlcommons/ck/tree/master/cm-mlops/script) via CM. +These scripts are being developed and shared by the community and MLCommons under Apache 2.0 license +to enable portable, modular, and technology-agnostic benchmarks and applications +that can automatically run with any software, hardware, models and data sets. + +```bash +cm pull repo mlcommons@ck +``` + +You can run it again at any time to pick up the latest updates. + +Note that CM will store all such repositories and downloaded/installed data sets, models and tools +in your `$HOME/CM` directory. + +Since MLPerf benchmarks require lots of space (somethings hundreds of Gigabytes), +you can change the above location to some large scratch disk using `CM_REPOS` +environment variable as follows: + +```bash +export CM_REPOS={new path to CM repositories and data} +echo "CM_REPOS=${CM_REPOS} >> $HOME/.bashrc" +cm pull repo mlcommons@ck +``` + + + +## Setup virtual environment + +We suggest you to setup a Python virtual environment via CM to avoid contaminating your existing Python installation: + +```bash +cm run script "install python-venv" --name=mlperf --version_min=3.8 +export CM_SCRIPT_EXTRA_CMD="--adr.python.name=mlperf" +``` + +CM will install a new Python virtual environment in CM cache and will install all Python dependencies there: +```bash +cm show cache --tags=python-venv +``` + +Note that CM downloads and/or installs models, data sets, packages, libraries and tools in this cache. + +You can clean it at any time and start from scratch using the following command: +```bash +cm rm cache -f +``` + +Alternatively, you can remove specific entries using tags: +```bash +cm show cache +cm rm cache --tags=tag1,tag2,... +``` + + + + +### Do a test run to detect and record the system performance + +```bash +cm run script --tags=generate-run-cmds,inference,_find-performance \ +--model=bert-99 --implementation=reference --device=cpu --backend=deepsparse \ +--category=edge --division=open --quiet --scenario=Offline +``` + +### Do full accuracy and performance run + +``` +cm run script --tags=generate-run-cmds,inference,_submission --model=bert-99 \ +--device=cpu --implementation=reference --backend=deepsparse \ +--execution-mode=valid --results_dir=$HOME/results_dir \ +--category=edge --division=open --quiet --scenario=Offline +``` +### Generate and upload MLPerf submission + +Follow [this guide](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/Submission.md) to generate the submission tree and upload your results. + + diff --git a/challenge/run-mlperf@home-v3.1-cpu/run-cpu-dse-mobilenets-efficientnets-tflite.md b/challenge/run-mlperf@home-v3.1-cpu/run-cpu-dse-mobilenets-efficientnets-tflite.md new file mode 100644 index 0000000000..f41b1b463b --- /dev/null +++ b/challenge/run-mlperf@home-v3.1-cpu/run-cpu-dse-mobilenets-efficientnets-tflite.md @@ -0,0 +1,77 @@ +# Introduction + +This guide will help you automatically run the MLPerf inference benchmark v3.1 with multiple variations of MobileNets and EfficientNets +and TFLite on any Linux-based system with Intel, AMD or Arm CPU. + +This benchmark is automated by the MLCommons CM language and you should be able to submit official MLPerf v3.1 inference results +for singlestream scenario in open division and edge category. + +It will require ~140GB of disk space and can take ~2 days to run on 1 system producing 243 MLPerf results +during automatic design space exploration to trade off accuracy vs performance. + + + +## Install CM automation language + +Install the [MLCommons CM automation language](https://doi.org/10.5281/zenodo.8105339) as described in this [guide](../../../docs/installation.md). +It is a small Python library with `cm` and `cmr` command line front-ends and minimal dependencies including Python 3+, Git and wget. + +If you encounter problems, please report them at [GitHub](https://github.com/mlcommons/ck/issues). + + +## Install repository with CM automations + +Install the MLCommons repository with [reusable and portable automation recipes (CM scripts)](https://github.com/mlcommons/ck/tree/master/cm-mlops/script) via CM. +These scripts are being developed and shared by the community and MLCommons under Apache 2.0 license +to enable portable, modular, and technology-agnostic benchmarks and applications +that can automatically run with any software, hardware, models and data sets. + +```bash +cm pull repo mlcommons@ck +``` + +You can run it again at any time to pick up the latest updates. + +Note that CM will store all such repositories and downloaded/installed data sets, models and tools +in your `$HOME/CM` directory. + +Since MLPerf benchmarks require lots of space (somethings hundreds of Gigabytes), +you can change the above location to some large scratch disk using `CM_REPOS` +environment variable as follows: + +```bash +export CM_REPOS={new path to CM repositories and data} +echo "CM_REPOS=${CM_REPOS} >> $HOME/.bashrc" +cm pull repo mlcommons@ck +``` + + + +## Setup virtual environment + +We suggest you to setup a Python virtual environment via CM to avoid contaminating your existing Python installation: + +```bash +cm run script "install python-venv" --name=mlperf --version_min=3.8 +export CM_SCRIPT_EXTRA_CMD="--adr.python.name=mlperf" +``` + +CM will install a new Python virtual environment in CM cache and will install all Python dependencies there: +```bash +cm show cache --tags=python-venv +``` + +Note that CM downloads and/or installs models, data sets, packages, libraries and tools in this cache. + +You can clean it at any time and start from scratch using the following command: +```bash +cm rm cache -f +``` + +Alternatively, you can remove specific entries using tags: +```bash +cm show cache +cm rm cache --tags=tag1,tag2,... +``` + + diff --git a/challenge/run-mlperf@home-v3.1-gpu/README.md b/challenge/run-mlperf@home-v3.1-gpu/README.md new file mode 100644 index 0000000000..b6482d3835 --- /dev/null +++ b/challenge/run-mlperf@home-v3.1-gpu/README.md @@ -0,0 +1,65 @@ +### Introduction + +The goal of this MLPerf@home challenge is to help the community find +the most efficient Nvidia GPUs for GPT-J 6B model and BERT-99 in terms of +latency, throughput, accuracy, number of cores, frequency, memory size, cost, and other metrics. + +We would like to ask you to run a few [MLPerf inference benchmarks](https://arxiv.org/abs/1911.02549) +with GPT-J and BERT-99 models on one or more systems with different Nvidia GPUs +that you have an access to: laptops, servers, cloud instances... + +You will be able to run benchmarks, collect all metrics and submit results in an automated way +in a native environment or Docker container using the portable and technology-agnostic +[MLCommons Collective Mind automation language (CM)](https://doi.org/10.5281/zenodo.8105339). + +Your name and benchmark submissions will be published in the official MLCommons inference v3.1 results +on September 1, 2023 (**submission deadline: August 17, 2023**), +will be published in the [official leaderboard](https://access.cknowledge.org/playground/?action=contributors), +will be included to the prize draw, and will be presented in our upcoming ACM/HiPEAC events. + +Please report encountered problems using [GitHub issues](https://github.com/mlcommons/ck) +to help the community improve CM automation workflows to run MLPerf benchmarks on any system with any software/hardware stack. + +Thank you in advance for helping the community find Pareto-efficient AI/ML Systems! + +### Minimal requirements + +* GPU: Nvidia +* GPU memory: + * GPT-J 6B: min 24GB + * BERT-99: min 8..16GB +* OS: + * native: any Linux (tested on Ubuntu 22.04) + * Docker: any OS + any Linux (tested on Ubuntu 22.04) +* Disk space: ~30GB per model/data set +* Time to run: + * GPT-J 6B: ~ 1 day + * BERT-99: ~ 2 hours + +### Instructions to run benchmarks and submit results + +* [GPT-J 6B model (24GB min GPU memory); PyTorch+CUDA; native environment](https://github.com/mlcommons/ck/blob/master/cm-mlops/challenge/run-mlperf%40home-v3.1-gpu/run-nvidia-gpu-gpt-j-6b-ref-pytorch.md) +* [BERT-99 model (8GB min GPU memory); TensorRT; Docker](https://github.com/mlcommons/ck/blob/master/cm-mlops/challenge/run-mlperf%40home-v3.1-gpu/run-nvidia-gpu-bert-99-nvidia-docker-tensorrt.md) + +### Results + +All accepted results with submitter names will be publicly available +at the official [MLCommons website](https://mlcommons.org) +and in the [Collective Knowledge explorer (MLCommons CK)](https://access.cknowledge.org/playground/?action=experiments) +along with the reproducibility and automation report to help the community +build efficient AI/ML systems. + +### Organizers + +* [MLCommons Task Force on Automation and Reproducibility](https://cKnowledge.org/mlcommons-taskforce) +* [cTuning.org](https://www.linkedin.com/company/ctuning-foundation) +* [cKnowledge.org](https://www.linkedin.com/company/cknowledge) + +### Advanced challenges + +If you feel that running these benchmarks was relatively easy, +please try [more advanced challenges](https://access.cknowledge.org/playground/?action=challenges), +read about our [plans and long-term vision](https://doi.org/10.5281/zenodo.8105339), +check [CM documentation](https://github.com/mlcommons/ck/blob/master/docs/README.md) +and run other [MLPerf benchmarks](https://github.com/mlcommons/ck/tree/master/docs/mlperf). diff --git a/challenge/run-mlperf@home-v3.1-gpu/_cm.json b/challenge/run-mlperf@home-v3.1-gpu/_cm.json new file mode 100644 index 0000000000..af7deeadae --- /dev/null +++ b/challenge/run-mlperf@home-v3.1-gpu/_cm.json @@ -0,0 +1,20 @@ +{ + "alias": "run-mlperf@home-v3.1-gpu", + "automation_alias": "challenge", + "automation_uid": "3d84abd768f34e08", + "date_close": "20230817", + "date_open": "20230725", + "experiments": [], + "points": 2, + "sort": -30, + "tags": [ + "run", + "mlperf", + "inference", + "v3.1", + "mlperf-inference-v3.1-simple-cpu" + ], + "title": "Work with the community to find the most efficient Nvidia GPUs for GPT-J 6B model and BERT (latency, throughput, accuracy, number of cores, frequency, memory size, cost, and other metrics)", + "trophies": true, + "uid": "54230c3b66564cef" +} diff --git a/challenge/run-mlperf@home-v3.1-gpu/run-nvidia-gpu-bert-99-nvidia-docker-tensorrt.md b/challenge/run-mlperf@home-v3.1-gpu/run-nvidia-gpu-bert-99-nvidia-docker-tensorrt.md new file mode 100644 index 0000000000..f543c23621 --- /dev/null +++ b/challenge/run-mlperf@home-v3.1-gpu/run-nvidia-gpu-bert-99-nvidia-docker-tensorrt.md @@ -0,0 +1,193 @@ +# Introduction + +This guide will help you run the Nvidia implementation of the MLPerf inference benchmark v3.1 +with BERT-99 model and TensorRT on any Linux-based system with Nvidia GPU (8..16GB min memory required) +and Docker. + +This benchmark is semi-automated by the [MLCommons CM language](https://doi.org/10.5281/zenodo.8105339) +and you should be able to submit official MLPerf v3.1 inference results +for all scenarios in closed division and edge category +(**deadline to send us results for v3.1 submission: August 3, 2023**). + + +It will require ~30GB of disk space and can take ~2 hours to run on 1 system. + + +## Install CM automation language + +Install the [MLCommons CM automation language](https://doi.org/10.5281/zenodo.8105339) as described in this [guide](../../../docs/installation.md). +It is a small Python library with `cm` and `cmr` command line front-ends and minimal dependencies including Python 3+, Git and wget. + +If you encounter problems, please report them at [GitHub](https://github.com/mlcommons/ck/issues). + + +## Install repository with CM automations + +Install the MLCommons repository with [reusable and portable automation recipes (CM scripts)](https://github.com/mlcommons/ck/tree/master/cm-mlops/script) via CM. +These scripts are being developed and shared by the community and MLCommons under Apache 2.0 license +to enable portable, modular, and technology-agnostic benchmarks and applications +that can automatically run with any software, hardware, models and data sets. + +```bash +cm pull repo mlcommons@ck +``` + +You can run it again at any time to pick up the latest updates. + +Note that CM will store all such repositories and downloaded/installed data sets, models and tools +in your `$HOME/CM` directory. + +Since MLPerf benchmarks require lots of space (somethings hundreds of Gigabytes), +you can change the above location to some large scratch disk using `CM_REPOS` +environment variable as follows: + +```bash +export CM_REPOS={new path to CM repositories and data} +echo "CM_REPOS=${CM_REPOS} >> $HOME/.bashrc" +cm pull repo mlcommons@ck +``` + + + +## Setup CUDA and Docker container + +### Download CUDA 11.8 + +Nvidia recommends the following version of CUDA to be used with their MLPerf inference implementation: + +``` +wget https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_520.61.05_linux.run +``` + +However, you are very welcome to try another version! + +### Download cuDNN, TensorRT + +For x86 machines, please download the following TAR files: +1. [cuDNN](https://developer.nvidia.com/cudnn) - note that Nvidia recommends `cudnn-linux-x86_64-8.9.2.26_cuda11-archive.tar.xz` + but you are welcome to try another version +2. [TensorRT](https://developer.nvidia.com/tensorrt) - note that Nvidia recommends `TensorRT-8.6.1.6.Linux.x86_64-gnu.cuda-11.8.tar.gz` + but you can try another version + + +### Set up Nvidia Docker container with MLPerf benchmarks + +1. [Install Docker](https://docs.docker.com/engine/install/) and [Nvidia container toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html) + +2. Give Docker permission to the current user + ``` + sudo usermod -aG docker $USER + ``` + Logout and login + Restart docker if required and confirm that Nvidia container toolkit is working by + ``` + nvidia-ctk --version + ``` + +3. Check if Nvidia driver is working properly on the host. + ``` + nvidia-smi + ``` + If the above command produces any error you'll need to install Nvidia drivers on the host. You can do this via CM if you have sudo access + ``` + cmr "install cuda prebuilt _driver" --version=11.8.0 + ``` + + +4. Build the docker container and mount the paths from the host machine. + + *You may need to change --cuda_run_file_path, --tensorrt_tar_file_path and --cudnn_tar_file_path if you downloaded other versions than recommended by Nvidia.* + + *You may want to change the `scratch_path` location as it can take 100s of GBs.* + + ```bash + cm docker script --tags=build,nvidia,inference,server \ + --cuda_run_file_path=$HOME/cuda_11.8.0_520.61.05_linux.run \ + --tensorrt_tar_file_path=$HOME/TensorRT-8.6.1.6.Linux.x86_64-gnu.cuda-11.8.tar.gz \ + --cudnn_tar_file_path=$HOME/cudnn-linux-x86_64-8.9.2.26_cuda11-archive.tar.xz \ + --scratch_path=$HOME/mlperf_scratch \ + --docker_cm_repo=mlcommons@ck \ + --results_dir=$HOME/results_dir \ + --submission_dir=$HOME/submission_dir \ + --adr.compiler.tags=gcc + ``` + +5. At the end of the build you'll get a prompt - please enter your system name such as "aws_nvidia_t4" + (note that space, `-` and other special characters are not allowed), + and say `yes` to generating the configuration files. + + ``` + ============================================ + => A system ID is a string containing only letters, numbers, and underscores + => that is used as the human-readable name of the system. It is also used as + => the system name when creating the measurements/ and results/ entries. + => This string should also start with a letter to be a valid Python enum member name. + => Specify the system ID to use for the current system: phoenix + => Reloaded system list. MATCHED_SYSTEM: KnownSystem.phoenix + => This script will generate Benchmark Configuration stubs for the detected system. + Continue? [y/n]: y + ``` + Now you'll be inside the CM Nvidia docker container and can access Nvidia implementations of MLPerf inference benchmarks. + +6. Once the build is complete, you can run Nvidia implementations of MLPerf inference benchmarks + using the unified CM interface. + + You can also save the container at this stage using [Docker commit](https://docs.docker.com/engine/reference/commandline/commit/) + so that it can be launched later without having to go through the previous steps. + + +### Do a test run to detect and record the system performance + +``` +cmr "generate-run-cmds inference _find-performance _all-scenarios" \ + --model=bert-99 \ + --implementation=nvidia-original \ + --device=cuda \ + --backend=tensorrt \ + --category=edge \ + --division=closed \ + --test_query_count=1000 \ + --quiet +``` + +### Do full accuracy and performance runs + +``` +cmr "generate-run-cmds inference _submission _allscenarios" \ + --model=bert-99 \ + --device=cuda \ + --implementation=nvidia-original \ + --backend=tensorrt \ + --execution-mode=valid \ + --results_dir=$HOME/results_dir \ + --category=edge \ + --division=closed \ + --quiet +``` + +* `--offline_target_qps` and `--singlestream_target_latency` can be used to override the determined performance numbers + +### Populate the README files describing your submission + +``` +cmr "generate-run-cmds inference _populate-readme _all-scenarios" \ + --model=bert-99 \ + --device=cuda \ + --implementation=nvidia-original \ + --backend=tensorrt \ + --execution-mode=valid \ + --results_dir=$HOME/results_dir \ + --category=edge \ + --division=closed \ + --quiet +``` + +### Generate and upload MLPerf submission + +Follow [this guide](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/Submission.md) to generate the submission tree and upload your results. + + +## Questions? Suggestions? + +Please follow the [cTuning foundation](https://cTuning.org), [cKnowledge.org](https://cKnowledge.org) +and [MLCommons](https://mlcommons.org). diff --git a/challenge/run-mlperf@home-v3.1-gpu/run-nvidia-gpu-gpt-j-6b-ref-pytorch.md b/challenge/run-mlperf@home-v3.1-gpu/run-nvidia-gpu-gpt-j-6b-ref-pytorch.md new file mode 100644 index 0000000000..39b1cc0de2 --- /dev/null +++ b/challenge/run-mlperf@home-v3.1-gpu/run-nvidia-gpu-gpt-j-6b-ref-pytorch.md @@ -0,0 +1,314 @@ +# Introduction + +This guide will help you run the reference implementation of the MLPerf inference benchmark v3.1 +with GPT-J 6B model and PyTorch on any Linux-based system with Nvidia GPU (24GB min memory required) +using the [MLCommons CM automation language](https://doi.org/10.5281/zenodo.8105339). + +CM will help you to obtain performance and accuracy numbers for GPT-J 6B model on your system +for the SingleStream scenario and submit them to the official MLPerf v3.1 inference benchmarking round +in open division and edge category +(**deadline to send us results for v3.1 submission: August 3, 2023**). + +You can read more about scenarios, divisions and categories of MLPerf inference benchmarks +in this [MLPerf inference benchmark paper](https://arxiv.org/abs/1911.02549) - +our goal is to help the community compare performance, accuracy and other metrics of popular models across diverse systems +in an automated, unified and reproducible way! + +This benchmark will require ~30GB of disk space and can take ~1 day to run on one system +to have a valid MLPerf result. + + + +## Install CM automation language + +Install the [MLCommons CM automation language](https://github.com/mlcommons/ck) as described in this [guide](../../../docs/installation.md). +It is a small Python library with `cm` and `cmr` command line front-ends and minimal dependencies including Python 3+, Git and wget. + +If you encounter problems, please report them at [GitHub](https://github.com/mlcommons/ck/issues). + + +## Install repository with CM automations + +Install the MLCommons repository with [reusable and portable automation recipes (CM scripts)](https://github.com/mlcommons/ck/tree/master/cm-mlops/script) via CM. +These scripts are being developed and shared by the community and MLCommons under Apache 2.0 license +to enable portable, modular, and technology-agnostic benchmarks and applications +that can automatically run with any software, hardware, models and data sets. + +```bash +cm pull repo mlcommons@ck +``` + +You can run it again at any time to pick up the latest updates. + +Note that CM will store all such repositories and downloaded/installed data sets, models, and tools +in your `$HOME/CM` directory. + +Since MLPerf benchmarks require lots of space (somethings hundreds of Gigabytes), +you can change the above location to some large scratch disk using `CM_REPOS` +environment variable as follows: + +```bash +export CM_REPOS={new path to CM repositories and data} +echo "CM_REPOS=${CM_REPOS} >> $HOME/.bashrc" +cm pull repo mlcommons@ck +``` + + + +## Setup virtual environment + +We suggest you to setup a Python virtual environment via CM to avoid contaminating your existing Python installation: + +```bash +cm run script "install python-venv" --name=mlperf --version_min=3.8 +export CM_SCRIPT_EXTRA_CMD="--adr.python.name=mlperf" +``` + +CM will install a new Python virtual environment in CM cache and will install all Python dependencies there: +```bash +cm show cache --tags=python-venv +``` + +Note that CM downloads and/or installs models, data sets, packages, libraries and tools in this cache. + +You can clean it at any time and start from scratch using the following command: +```bash +cm rm cache -f +``` + +Alternatively, you can remove specific entries using tags: +```bash +cm show cache +cm rm cache --tags=tag1,tag2,... +``` + + +## Do the performance run + +Now you can run MLPerf inference benchmark to measure performance of GPT-J using CM command as follows +(note that `cmr` is equivalent to `cm run script`): + +```bash +cm run script --tags=generate-run-cmds,inference,_performance-only \ + --division=open \ + --category=edge \ + --model=gptj-99 \ + --precision=bfloat16 \ + --device=cuda \ + --implementation=reference \ + --backend=pytorch \ + --scenario=SingleStream \ + --env.GPTJ_BEAM_SIZE=1 \ + --execution-mode=valid \ + --results_dir=$HOME/results_dir \ + --quiet +``` + +Note that this command will need to automatically download the model (24GB) +and [CNN Daily Mail dataset (relatively small)](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/get-dataset-cnndm)! + +The benchmark run is expected to finish within 10-100 minutes depending on the performance of your GPU. + +In the end of the valid run, you should see [output](https://github.com/ctuning/mlperf_inference_submissions_v3.1/blob/main/open/cTuning/results/amd_zen4_workstation-reference-gpu-pytorch-v2.0.1-default_config/gptj-99/singlestream/performance/run_1/mlperf_log_summary.txt) similar to + +```txt +================================================ +MLPerf Results Summary +================================================ +SUT name : PySUT +Scenario : SingleStream +Mode : PerformanceOnly +90th percentile latency (ns) : 4751920830 +Result is : VALID + Min duration satisfied : Yes + Min queries satisfied : Yes + Early stopping satisfied: Yes +Early Stopping Result: + * Processed at least 64 queries (201). + * Would discard 9 highest latency queries. + * Early stopping 90th percentile estimate: 5387449249 + * Not enough queries processed for 99th percentile + early stopping estimate (would need to process at + least 662 total queries). + +================================================ +Additional Stats +================================================ +QPS w/ loadgen overhead : 0.33 +QPS w/o loadgen overhead : 0.33 + +Min latency (ns) : 881803157 +Max latency (ns) : 5939081711 +Mean latency (ns) : 3008773902 +50.00 percentile latency (ns) : 2788885477 +90.00 percentile latency (ns) : 4751920830 +95.00 percentile latency (ns) : 5307244203 +97.00 percentile latency (ns) : 5677375096 +99.00 percentile latency (ns) : 5927209480 +99.90 percentile latency (ns) : 5939081711 + +================================================ +Test Parameters Used +================================================ +samples_per_query : 1 +target_qps : 2000 +target_latency (ns): 0 +max_async_queries : 1 +min_duration (ms): 600000 +max_duration (ms): 620000 +min_query_count : 100 +max_query_count : 0 +qsl_rng_seed : 148687905518835231 +sample_index_rng_seed : 520418551913322573 +schedule_rng_seed : 811580660758947900 +accuracy_log_rng_seed : 0 +accuracy_log_probability : 0 +accuracy_log_sampling_target : 0 +print_timestamps : 0 +performance_issue_unique : 0 +performance_issue_same : 0 +performance_issue_same_index : 0 +performance_sample_count : 13368 + +No warnings encountered during test. + +No errors encountered during test. +``` + + +## Do the accuracy run + +```bash +cm run script --tags=generate-run-cmds,inference,_accuracy-only \ + --division=open \ + --category=edge \ + --model=gptj-99 \ + --precision=bfloat16 \ + --device=cuda \ + --implementation=reference \ + --backend=pytorch \ + --scenario=SingleStream \ + --env.GPTJ_BEAM_SIZE=1 \ + --execution-mode=valid \ + --results_dir=$HOME/results_dir \ + --quiet +``` + +This accuracy run can take many hours (typically 12..46 hours). You can estimate it using the QPS (queries per second) +from the previous performance run as follows: + +accuracy time = data set / QPS = 13368 / QPS . + +For example, if your reported QPS is 0.1 (equivalent to 10000 ms latency), it will take 13368/0.1 ~ 37 hours. + + + +## Populate the MLPerf README files describing your submission + +Now you can use CM to automatically populate README files mandated by MLPerf to describe your submission +(we also show you a simpler syntax of `cmr` instead of `cm run script --tags=`): + +```bash +cmr "generate-run-cmds inference _populate-readme" \ + --division=open \ + --category=edge \ + --model=gptj-99 \ + --precision=bfloat16 \ + --device=cuda \ + --implementation=reference \ + --backend=pytorch \ + --scenario=SingleStream \ + --env.GPTJ_BEAM_SIZE=1 \ + --execution-mode=valid \ + --results_dir=$HOME/results_dir \ + --quiet +``` + + +## Generate MLPerf submission + +Unless your organization is an official member of MLCommons, you will be able to participate in the official MLPerf inference community submission +via the cTuning foundation (founding member of MLCommons). + +You should update the following flags in the below CM command: +* Use `--hw_notes_extra` option to add your name to the submission such as `--hw_notes_extra="Result taken by NAME" `. +* Use `--hw_name="My system name"` to give a meaningful system name describing your GPU. + Examples can be seen [here](https://github.com/mlcommons/inference_results_v3.0/tree/main/open/cTuning/systems). +* Use `--submitter=` if your organization is an official MLCommons member and you would like to submit under your organization. + +You should use the master branch of MLCommons inference repo for the submission checker: + +```bash +cmr "generate inference submission" \ + --clean \ + --submitter=cTuning \ + --results_dir=$HOME/results_dir/valid_results \ + --submission_dir=$HOME/inference_submission_tree \ + --preprocess_submission=yes \ + --adr.compiler.tags=gcc \ + --adr.inference-src.version=master \ + --run-checker +``` + +## Push the results to GitHub repo + +1. Create a fork of [this cTuning repo with the community results](https://github.com/ctuning/mlperf_inference_submissions_v3.1). + +2. Run the following command after replacing `--repo_url` with your fork URL. + + ``` + cmr "push github mlperf inference submission" \ + --submission_dir=$HOME/inference_submission_tree \ + --repo_url=https://github.com/ctuning/mlperf_inference_submissions_v3.1/ \ + --commit_message="GPTJ results on added by " + ``` + +3. Create a PR to the [cTuning repo with the community results](https://github.com/ctuning/mlperf_inference_submissions_v3.1) + + + + + + + + + +## Additional performance optimization challenge for interested enthusiasts + +The MLPerf GPT-J inference benchmark is implemented in this [backend.py](https://github.com/mlcommons/inference/blob/master/language/gpt-j/backend.py). + +It is automatically installed and cached by CM. You can find it on your system using this command: +```bash +cd `cm find cache --tags=inference,src,_branch.master`/language/gpt-j +ls backend.py +``` + +The original model is available at the [Hugging Face Zoo](https://huggingface.co/EleutherAI/gpt-j-6b). It was fine-tuned by Intel for this benchmark +and is available at the MLCommons cloud. It is automatically downloaded by CM using [this script](https://github.com/mlcommons/ck/blob/master/cm-mlops/script/get-ml-model-gptj/_cm.json). + +You can try to improve the performance (QPS) on this code or fine-tune model and substitute the default one +in [this line](https://github.com/mlcommons/inference/blob/master/language/gpt-j/backend.py#L27). + +Some examples of fine-tuning can be seen [here](https://betterprogramming.pub/fine-tuning-gpt-j-6b-on-google-colab-or-equivalent-desktop-or-server-gpu-b6dc849cb205). + +Any better performance or accuracy result will be very valuable to the community. + +After any modification, you can redo a quick performance run to see the performance difference. +``` +cm run script --tags=generate-run-cmds,inference,_performance-only \ + --division=open \ + --category=edge \ + --model=gptj-99 \ + --precision=bfloat16 \ + --device=cuda \ + --implementation=reference \ + --backend=pytorch \ + --scenario=SingleStream \ + --env.GPTJ_BEAM_SIZE=1 \ + --execution-mode=valid \ + --results_dir=$HOME/results_dir \ + --quiet +``` + + + diff --git a/challenge/train-llm-for-cm-mlperf-2023/README.md b/challenge/train-llm-for-cm-mlperf-2023/README.md new file mode 100644 index 0000000000..4e9f6cf178 --- /dev/null +++ b/challenge/train-llm-for-cm-mlperf-2023/README.md @@ -0,0 +1,20 @@ +### Challenge + +Improve the prototype of our LLM-based assistant to suggest users how to run MLPerf inference benchmarks +using the MLCommons CM automation language: https://access.cknowledge.org/assistant . + +Read [this documentation](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/README.md) +to run reference implementations of MLPerf inference benchmarks +using the CM automation language and use them as a base for your developments. + +Check [this ACM REP'23 keynote](https://doi.org/10.5281/zenodo.8105339) to learn more about our open-source project and long-term vision. + + +### Prizes + +* *Get in touch with organizers for more info!* + + +### Organizers + +* [cKnowledge.org](https://www.linkedin.com/company/cknowledge) diff --git a/challenge/train-llm-for-cm-mlperf-2023/_cm.json b/challenge/train-llm-for-cm-mlperf-2023/_cm.json new file mode 100644 index 0000000000..ce6009db37 --- /dev/null +++ b/challenge/train-llm-for-cm-mlperf-2023/_cm.json @@ -0,0 +1,21 @@ +{ + "alias": "train-llm-for-cm-mlperf-2023", + "automation_alias": "challenge", + "automation_uid": "3d84abd768f34e08", + "date_close_extension": true, + "date_open": "20230704", + "experiments": [], + "points": 3, + "tags": [ + "train", + "improve", + "llm", + "assistant", + "mlperf-llm", + "mlperf-llm-assistant", + "mlperf-assistant" + ], + "title": "Train and improve LLM to suggest users how to run MLPerf inference benchmarks using CM automation language", + "trophies": true, + "uid": "d37bf37a24c44ec3" +} diff --git a/report/mlperf-inference-v3.1-analysis-ctuning/README.md b/report/mlperf-inference-v3.1-analysis-ctuning/README.md new file mode 100644 index 0000000000..9d4b696949 --- /dev/null +++ b/report/mlperf-inference-v3.1-analysis-ctuning/README.md @@ -0,0 +1,93 @@ +On this page, we highlight some of the exciting submissions done by CTuning for the MLCommons Inference 3.1 round. + +## Top Results in Edge Category + +In the edge category, Rigel Supercomputers from One Stop Systems achieved the peak offline performance for the four submitted benchmarks - Image classification (ResNet50), Object detection (RetinaNet), Language processing (Bert) and Speech Recognition (RNNT). The below graph compares the peak performance of bert-99 model among the top 10 performing systems. + +![image](https://github.com/ctuning/mlcommons-ck/assets/4791823/9f8e3367-1ca4-4298-8545-285cdedfc991) + + +Nvidia RTX 4090 has the best performance for performance per accelerator, and this accelerator is assembled on a PC made by PCSPECIALIST UK. The below graph compares the performance per accelerator of bert-99 model among the top 10 performing systems. + +![image](https://github.com/ctuning/mlcommons-ck/assets/4791823/c02120cb-eda9-4eef-9e22-56fff4bf23a7) + + +Nvidia RTX 4090 wins the latency metric too for ResNet50, Bert and 3d-unet in the SingleStream scenario. +![image](https://github.com/ctuning/mlcommons-ck/assets/4791823/6d4b39a0-9f39-474a-ac16-5498e281ebad) + +![image](https://github.com/ctuning/mlcommons-ck/assets/4791823/8afb5609-581d-4ee8-be56-731af731f10f) + +![image](https://github.com/ctuning/mlcommons-ck/assets/4791823/5cb88f53-9255-4a0b-98df-a192ba87b125) + + +## Best energy efficient results in Edge category + +For the Speech Recognition model rnnt, CTuning submitted the best power-efficient result on Nvidia Jetson Orin AGX. + +![image](https://github.com/ctuning/mlcommons-ck/assets/4791823/d485aa50-a0d4-4a40-a805-cc2ddc3e0ca6) + + +For the Medical Imaging model 3d-unet where the samples per second is quite low, the best 4 energy efficient results are by CTuning. + +![image](https://github.com/ctuning/mlcommons-ck/assets/4791823/d15297fb-3eff-47c9-b188-68d438b7f248) + +For the Language Processing model bert-99, gloria highend system from Qualcomm tops the energy efficiency metric and CTuning's Nvidia Jetson Orin AGX is at second place. + +![image](https://github.com/ctuning/mlcommons-ck/assets/4791823/22c85404-51f5-44b7-b128-8df4579c635c) + + + +## Benchmarking Rigel Supercomputer + +Rigel Edge Supercomputer from OneStopSytems wins the peak performance for all four submitted models and comfortably beats the second-place system. It also wins the best latency for ResNet50 MultiStream scenario. + + +![image](https://github.com/ctuning/mlcommons-ck/assets/4791823/635f5f29-080f-4c7c-85a5-65fcf438f9e1) + +![image](https://github.com/ctuning/mlcommons-ck/assets/4791823/c993c2f5-a8b7-4a11-b89f-35d96e357e42) + + + + + +## Benchmarking MLPerf Inference Reference Implementations + +We compared the performance of the reference implementation with that of the Nvidia optimized implementation by running both implementations on an Nvidia RTX 4090 GPU. Reference implementation uses fp32 models whereas Nvidia implementation uses quantized models. + +![image](https://github.com/ctuning/mlcommons-ck/assets/4791823/b46bc509-f242-4bc6-a9e8-ec318d09616b) + +![image](https://github.com/ctuning/mlcommons-ck/assets/4791823/404b54d2-a04e-4e5e-861d-43c7d940faf8) + +![image](https://github.com/ctuning/mlcommons-ck/assets/4791823/f5a04e85-269f-485a-8839-348dddcd5eb7) + +## Showcasing Apple Metal Performance + +We benchmarked the performance of Apple metal using Tensorflow-metal. The below graphs show the performance benefit of running inference on Apple meta using tensorflow-metal versus onnxruntime running only on CPUs. + +![image](https://github.com/ctuning/mlcommons-ck/assets/4791823/87385e24-b3b5-4694-8106-2c30eeb393de) + +![image](https://github.com/ctuning/mlcommons-ck/assets/4791823/c9a38dc9-0986-461e-b81d-988297e1771e) + +![image](https://github.com/ctuning/mlcommons-ck/assets/4791823/4b8565b4-7a23-4f29-b450-6eaf00d10f63) + + + + + +## Design Space Exploration For NeuralMagic Deepsparse Library + +Using CM experiment automation we did a design space exploration to find the optimal batch size for the bert-99 compatible sparse models. + +![image](https://github.com/ctuning/mlcommons-ck/assets/4791823/a18088f2-c864-4c16-b714-5b375cf5fc94) + +![image](https://github.com/ctuning/mlcommons-ck/assets/4791823/8bd95c5f-344f-4d9f-9f94-c3024efbce13) + + +## Comparing the performance of Modular MLPerf Inference C++ implementations + +Here we compare the performance of MIL Library used by CTuning and the KILT library used by KRAI both on CPUs and GPUs. This is not an apple-to-apple comparison as KILT used Nvidia Nvidia A1000 GPU and MIL was run on Nvidia RTX 4090 GPU. For CPUs, KILT was run on a [24-core Dell server](https://github.com/mlcommons/inference_results_v3.1/blob/main/closed/Krai/systems/7920t-kilt-onnxruntime_cpu.json) with peak frequency of 4000 MHz whereas MIL was run on a [16 core PCSPECIALIST custom workstation](https://github.com/mlcommons/inference_results_v3.1/blob/main/closed/CTuning/systems/amd_ryzen_workstation-cpp-cpu-onnxruntime-vdefault-default_config.json) with peak frequency of 5900 MHz. + +![image](https://github.com/ctuning/mlcommons-ck/assets/4791823/6d73360a-27ab-4158-b4cc-a5724d6d4c73) + +![image](https://github.com/ctuning/mlcommons-ck/assets/4791823/d6b5516b-4861-4355-badf-65decbf8d3b0) + diff --git a/report/mlperf-inference-v3.1-analysis-ctuning/_cm.json b/report/mlperf-inference-v3.1-analysis-ctuning/_cm.json new file mode 100644 index 0000000000..4860af17b2 --- /dev/null +++ b/report/mlperf-inference-v3.1-analysis-ctuning/_cm.json @@ -0,0 +1,16 @@ +{ + "alias": "mlperf-inference-v3.1-analysis-ctuning", + "automation_alias": "report", + "automation_uid": "6462ecdba2054467", + "date":"20230917", + "title":"cTuning's analysis of MLPerf inference v3.1 community results", + "tags": [ + "mlperf", + "inference", + "mlperf-inference", + "v3.1", + "analysis", + "ctuning" + ], + "uid": "ebc483653dbc45b6" +} diff --git a/report/mlperf-inference-v3.1-press-release-ctuning/_cm.json b/report/mlperf-inference-v3.1-press-release-ctuning/_cm.json new file mode 100644 index 0000000000..99d0370a50 --- /dev/null +++ b/report/mlperf-inference-v3.1-press-release-ctuning/_cm.json @@ -0,0 +1,17 @@ +{ + "alias": "mlperf-inference-v3.1-press-release-ctuning", + "automation_alias": "report", + "automation_uid": "6462ecdba2054467", + "date": "20230913", + "redirect": "https://www.linkedin.com/pulse/new-milestone-make-mlperf-benchmarks-accessible-everyone-fursin", + "tags": [ + "mlperf", + "inference", + "mlperf-inference", + "v3.1", + "analysis", + "ctuning" + ], + "title": "cTuning press-release about making MLPerf inference accessible to everyone", + "uid": "85ff4a6ac203411e" +} diff --git a/report/mlperf-inference-v3.1-press-release-hpcwire/_cm.json b/report/mlperf-inference-v3.1-press-release-hpcwire/_cm.json new file mode 100644 index 0000000000..159a986735 --- /dev/null +++ b/report/mlperf-inference-v3.1-press-release-hpcwire/_cm.json @@ -0,0 +1,17 @@ +{ + "alias": "mlperf-inference-v3.1-press-release-hpcwire", + "automation_alias": "report", + "automation_uid": "6462ecdba2054467", + "date": "20230913", + "tags": [ + "mlperf", + "inference", + "mlperf-inference", + "v3.1", + "analysis", + "ctuning" + ], + "redirect": "https://www.hpcwire.com/2023/09/13/mlperf-releases-latest-inference-results-and-new-storage-benchmark", + "title": "HPCWire about MLPerf inference v3.1 and storage results (with cTuning/cKnowledge coverage)", + "uid": "50960565640142d6" +} diff --git a/report/mlperf-inference-v4.0-press-release-ctuning/_cm.json b/report/mlperf-inference-v4.0-press-release-ctuning/_cm.json new file mode 100644 index 0000000000..15c3fa6c42 --- /dev/null +++ b/report/mlperf-inference-v4.0-press-release-ctuning/_cm.json @@ -0,0 +1,17 @@ +{ + "alias": "mlperf-inference-v4.0-press-release-ctuning", + "automation_alias": "report", + "automation_uid": "6462ecdba2054467", + "date": "20230913", + "redirect": "https://www.linkedin.com/pulse/new-cm-mlperf-automation-helps-benchmark-commodity-hardware-fursin-61noe", + "tags": [ + "mlperf", + "inference", + "mlperf-inference", + "v4.0", + "analysis", + "ctuning" + ], + "title": "cTuning press-release about a new version of the CM workflow to automate MLPerf", + "uid": "acc35b8e9ed14c98" +} diff --git a/script/app-mlperf-inference/_cm.yaml b/script/app-mlperf-inference/_cm.yaml index ffb4a26b86..6e95a00827 100644 --- a/script/app-mlperf-inference/_cm.yaml +++ b/script/app-mlperf-inference/_cm.yaml @@ -66,8 +66,6 @@ input_mapping: tp_size: CM_NVIDIA_TP_SIZE use_dataset_from_host: CM_USE_DATASET_FROM_HOST -predeps: False - # Duplicate CM environment variables to the ones used in native apps env_key_mappings: CM_HOST_: HOST_ diff --git a/script/run-mlperf-inference-app/_cm.yaml b/script/run-mlperf-inference-app/_cm.yaml index 5b9d4b1512..cf390bc3ab 100644 --- a/script/run-mlperf-inference-app/_cm.yaml +++ b/script/run-mlperf-inference-app/_cm.yaml @@ -29,7 +29,6 @@ tags: - reference tags_help: "run-mlperf,inference" -predeps: False default_env: CM_MLPERF_IMPLEMENTATION: reference diff --git a/tests/script/check.py b/tests/script/check.py new file mode 100644 index 0000000000..dd030c3bb8 --- /dev/null +++ b/tests/script/check.py @@ -0,0 +1,27 @@ +def check_return(r): + if 'return' not in r: + raise Exception( + 'CM access function should always return key \'return\'!') + if 'error' in r: + raise Exception(r['error']) + + +def check_list(r, string, found=True): + check_return(r) + if 'list' not in r: + raise Exception('CM search should return a list!') + if len(r['list']) < 1 and found: + raise Exception('CM search returned an empty list for ' + string) + if len(r['list']) > 0 and not found: + raise Exception('CM search returned at lease one entry for ' + string) + + +def check_key_value(d, key, value, absent_ok=False): + if not d.get(key): + if absent_ok: + return True + else: + raise Exception(f"{key} is missing. Current values are {d}") + elif d[key] != value: + raise Exception( + f"{key} is not having the expected value of {value}. Current value is {d[key]}") diff --git a/tests/script/process_dockerfile.py b/tests/script/process_dockerfile.py new file mode 100644 index 0000000000..d9abd2abeb --- /dev/null +++ b/tests/script/process_dockerfile.py @@ -0,0 +1,33 @@ +import sys +import os +import cmind as cm +import check as checks +import json +import yaml + +files = sys.argv[1:] + +for file in files: + if not os.path.isfile(file): + continue + if not file.endswith("_cm.json") and not file.endswith("_cm.yaml"): + continue + if not file.startswith(os.path.join("cm-mlops", "script")): + continue + + script_path = os.path.dirname(file) + + f = open(file) + + if file.endswith(".json"): + data = json.load(f) + elif file.endswith(".yaml"): + data = yaml.safe_load(f) + + uid = data['uid'] + + r = cm.access({'action': 'dockerfile', + 'automation': 'script', + 'artifact': uid, + 'quiet': 'yes'}) + checks.check_return(r) diff --git a/tests/script/process_readme.py b/tests/script/process_readme.py new file mode 100644 index 0000000000..de7e04033e --- /dev/null +++ b/tests/script/process_readme.py @@ -0,0 +1,27 @@ +import sys +import os +import cmind as cm +import check as checks +import json +import yaml + +files = sys.argv[1:] + +for file in files: + if not os.path.isfile(file): + continue + if not file.endswith("_cm.json") and not file.endswith("_cm.yaml"): + continue + if not file.startswith(os.path.join("cm-mlops", "script")): + continue + script_path = os.path.dirname(file) + f = open(file) + if file.endswith(".json"): + data = json.load(f) + elif file.endswith(".yaml"): + data = yaml.safe_load(f) + uid = data['uid'] + + r = cm.access({'action': 'doc', 'automation': 'script', + 'artifact': uid, 'quiet': 'yes'}) + checks.check_return(r) diff --git a/tests/script/process_tests.py b/tests/script/process_tests.py new file mode 100644 index 0000000000..8012d097b6 --- /dev/null +++ b/tests/script/process_tests.py @@ -0,0 +1,38 @@ +import sys +import os +import cmind as cm +import check as checks +import json +import yaml + +files = sys.argv[1:] + +for file in files: + print(file) + if not os.path.isfile(file) or not "script" in file: + continue + if not file.endswith("_cm.json") and not file.endswith("_cm.yaml"): + continue + script_path = os.path.dirname(file) + f = open(file) + if file.endswith(".json"): + data = json.load(f) + elif file.endswith(".yaml"): + data = yaml.safe_load(f) + if data.get('uid', '') == '': + continue # not a CM script meta + uid = data['uid'] + + ii = { + 'action': 'test', 'automation': 'script', 'artifact': uid, 'quiet': 'yes', 'out': 'con' + } + if os.environ.get('DOCKER_CM_REPO', '') != '': + ii['docker_cm_repo'] = os.environ['DOCKER_CM_REPO'] + if os.environ.get('DOCKER_CM_REPO_BRANCH', '') != '': + ii['docker_cm_repo_branch'] = os.environ['DOCKER_CM_REPO_BRANCH'] + if os.environ.get('TEST_INPUT_INDEX', '') != '': + ii['test_input_index'] = os.environ['TEST_INPUT_INDEX'] + print(ii) + r = cm.access(ii) + + checks.check_return(r) diff --git a/tests/script/test_deps.py b/tests/script/test_deps.py new file mode 100644 index 0000000000..37d75fd4c4 --- /dev/null +++ b/tests/script/test_deps.py @@ -0,0 +1,25 @@ +# This test covers version, variation, compilation from src, add_deps, +# add_deps_recursive, deps, post_deps + +import cmind as cm +import check as checks + +# MLPerf v3.0 inference is now very outdated and we are testing inference +# in separate tests + +# r = cm.access({'action':'run', 'automation':'script', 'tags': 'generate-run-cmds,mlperf', 'adr': +# {'loadgen': {'version': 'r3.0'}, 'compiler': {'tags': "gcc"}}, 'env': {'CM_MODEL': 'resnet50', +# 'CM_DEVICE': 'cpu', 'CM_BACKEND': 'onnxruntime'}, 'quiet': 'yes'}) +# checks.check_return(r) +# +# r = cm.access({'action':'search', 'automation': 'cache', 'tags': 'loadgen,version-r3.0,deps-python-non-virtual'}) +# checks.check_list(r, "loadgen,version-r3.0,deps-python-non-virtual") +# +# r = cm.access({'action':'search', 'automation': 'cache', 'tags': 'inference,src,version-r3.0'}) +# checks.check_list(r, "inference,src,version-r3.0") +# +# r = cm.access({'action':'run', 'automation':'script', 'tags': 'app,mlperf,inference,generic,_python,_resnet50,_onnxruntime,_cpu,_r3.0_default', 'adr': {'mlperf-implementation': { 'version': 'master'}}, 'quiet': 'yes'}) +# checks.check_return(r) +# +# r = cm.access({'action':'run', 'automation':'script', 'tags': 'app,mlperf,inference,generic,_python,_resnet50,_tf,_cpu,_r3.0_default', 'adr': {'mlperf-implementation': { 'version': 'master'}}, 'quiet': 'yes'}) +# checks.check_return(r) diff --git a/tests/script/test_docker.py b/tests/script/test_docker.py new file mode 100644 index 0000000000..991ef04030 --- /dev/null +++ b/tests/script/test_docker.py @@ -0,0 +1,23 @@ +# This test covers version, variation, compilation from src, +# add_deps_recursive, post_deps + +import cmind as cm +import check as checks + +r = cm.access({'action': 'run', + 'automation': 'script', + 'tags': 'run,docker,container', + 'add_deps_recursive': { + 'compiler': {'tags': "gcc"} + }, + 'image_name': 'cm-script-app-image-classification-onnx-py', + 'env': { + 'CM_DOCKER_RUN_SCRIPT_TAGS': 'app,image-classification,onnx,python', + 'CM_MLOPS_REPO': 'mlcommons@cm4mlops', + 'CM_MLOPS_REPO_BRANCH': 'mlperf-inference', + 'CM_DOCKER_IMAGE_BASE': 'ubuntu:22.04' + }, + 'quiet': 'yes' + }) + +checks.check_return(r) diff --git a/tests/script/test_features.py b/tests/script/test_features.py new file mode 100644 index 0000000000..b29ee6a7a5 --- /dev/null +++ b/tests/script/test_features.py @@ -0,0 +1,38 @@ +# This test covers +# 1. python-virtual-env and update_deps inside customize.py +# 2. cache search using "-" prefix + +import cmind as cm +import check as checks + +r = cm.access({'action': 'run', + 'automation': 'script', + 'tags': 'install,python-venv', + 'name': 'test', + 'quiet': 'yes'}) +checks.check_return(r) + +r = cm.access({'action': 'search', 'automation': 'cache', + 'tags': 'get,python,virtual,name-test'}) +checks.check_list(r, "get,python-venv") + +r = cm.access({'action': 'run', + 'automation': 'script', + 'tags': 'get,dataset,preprocessed,imagenet,_NHWC', + 'quiet': 'yes'}) +checks.check_return(r) + +r = cm.access({'action': 'search', 'automation': 'cache', + 'tags': 'get,dataset,preprocessed,imagenet,-_NCHW'}) +checks.check_list(r, "_NHWC") + +r = cm.access({'action': 'search', 'automation': 'cache', + 'tags': 'get,dataset,preprocessed,imagenet,-_NHWC'}) +# checks.check_list(r, "-_NHWC", False) + + +r = cm.access({'action': 'run', 'automation': 'script', + 'tags': 'test-scripts,_v1,_v2'}) +new_env = r['new_env'] +checks.check_key_value(new_env, "CM_VAR1", "combv1v2") +checks.check_key_value(new_env, "CM_VAR2", "constv2") diff --git a/tests/script/test_install.py b/tests/script/test_install.py new file mode 100644 index 0000000000..d4fb93ec70 --- /dev/null +++ b/tests/script/test_install.py @@ -0,0 +1,15 @@ +# This test covers script installation, version, shared library install + +import cmind as cm +import check as checks + +r = cm.access({'action': 'run', + 'automation': 'script', + 'tags': 'python,src,install,_shared', + 'version': '3.9.10', + 'quiet': 'true'}) +checks.check_return(r) + +r = cm.access({'action': 'search', 'automation': 'cache', + 'tags': 'python,src,install,_shared,version-3.9.10'}) +checks.check_list(r, "python,src,install,_shared,version-3.9.10") diff --git a/tests/test_cm.py b/tests/test_cm.py new file mode 100644 index 0000000000..821e1571d6 --- /dev/null +++ b/tests/test_cm.py @@ -0,0 +1,17 @@ +try: + import cmind as cm + + r = cm.access(['test', 'script']) + if 'return' not in r: + raise Exception( + 'CM access function should always return key \'return\'!') + exit(0) + +except ImportError as e: + from sys import stderr + from subprocess import call + print( + 'WARNING: CM module for python is not installed & jupyter notebooks will not be supported', + file=stderr) + retcode = call(['cm', 'test', 'script']) + exit(retcode) diff --git a/tests/test_search_speed.py b/tests/test_search_speed.py new file mode 100644 index 0000000000..577c4f0b80 --- /dev/null +++ b/tests/test_search_speed.py @@ -0,0 +1,26 @@ +import cmind as cm +import time + +times = [] + +steps = 10 + +print('Running search with tags {} times ...'.format(steps)) + +for step in range(steps): + + start = time.time() + r = cm.access({'action': 'search', + 'automation': 'script', + 'tags': 'detect,os'}) + timer = time.time() - start + + if r['return'] > 0: + cm.error(r) + + times.append(timer) + +step = 0 +for t in times: + step += 1 + print("{}) {:0.3f} sec.".format(step, t)) diff --git a/tests/tutorials/test_tutorial_retinanet.py b/tests/tutorials/test_tutorial_retinanet.py new file mode 100644 index 0000000000..dcca78f205 --- /dev/null +++ b/tests/tutorials/test_tutorial_retinanet.py @@ -0,0 +1,37 @@ +# This test covers version, variation, compilation from src, add_deps, +# add_deps_recursive, deps, post_deps + +import check as checks +import cmind as cm +from pathlib import Path +import sys +import os + +sys.path.insert( + 1, + os.path.join( + Path(__file__).parent.parent.resolve(), + "script")) + +r = cm.access({'action': 'run', 'automation': 'script', 'tags': 'app,mlperf,inference,generic,_cpp,_retinanet,_onnxruntime,_cpu', 'adr': + {'python': {'version_min': '3.8'}, 'compiler': {'tags': "gcc"}, 'openimages-preprocessed': {'tags': '_50'}}, 'scenario': 'Offline', + 'mode': 'accuracy', 'test_query_count': '10', 'rerun': 'true', 'quiet': 'yes'}) +checks.check_return(r) + +r = cm.access({'action': 'run', 'automation': 'script', 'tags': 'app,mlperf,inference,generic,_cpp,_retinanet,_onnxruntime,_cpu', 'adr': + {'python': {'version_min': '3.8'}, 'compiler': {'tags': "gcc"}, 'openimages-preprocessed': {'tags': '_50'}}, 'scenario': 'Offline', + 'mode': 'performance', 'test_query_count': '10', 'rerun': 'true', 'quiet': 'yes'}) +checks.check_return(r) + +r = cm.access({'action': 'run', + 'automation': 'script', + 'tags': 'install,python-venv', + 'version': '3.10.8', + 'name': 'mlperf'}) +checks.check_return(r) + +r = cm.access({'action': 'run', 'automation': 'script', 'tags': 'run,mlperf,inference,generate-run-cmds,_submission,_short', 'adr': + {'python': {'name': 'mlperf', 'version_min': '3.8'}, 'compiler': {'tags': "gcc"}, 'openimages-preprocessed': {'tags': '_50'}}, 'submitter': 'Community', + 'implementation': 'cpp', 'hw_name': 'default', 'model': 'retinanet', 'backend': 'onnxruntime', 'device': 'cpu', 'scenario': 'Offline', + 'test_query_count': '10', 'clean': 'true', 'quiet': 'yes'}) +checks.check_return(r) diff --git a/tests/tutorials/test_tutorial_tvm.py b/tests/tutorials/test_tutorial_tvm.py new file mode 100644 index 0000000000..6901a31693 --- /dev/null +++ b/tests/tutorials/test_tutorial_tvm.py @@ -0,0 +1,28 @@ +# This test covers version, variation, compilation from src, add_deps, +# add_deps_recursive, deps, post_deps + +import check as checks +import cmind as cm + +from pathlib import Path +import sys +import os + +sys.path.insert( + 1, + os.path.join( + Path(__file__).parent.parent.resolve(), + "script")) + +r = cm.access({'action': 'run', 'automation': 'script', 'tags': 'run,mlperf,inference,generate-run-cmds', 'adr': + {'python': {'name': 'mlperf', 'version_min': '3.8'}}, 'submitter': 'Community', + 'implementation': 'python', 'hw_name': 'default', 'model': 'resnet50', 'backend': 'tvm-onnx', 'device': 'cpu', 'scenario': 'Offline', + 'mode': 'accuracy', 'test_query_count': '5', 'clean': 'true', 'quiet': 'yes'}) +checks.check_return(r) + + +r = cm.access({'action': 'run', 'automation': 'script', 'tags': 'run,mlperf,inference,generate-run-cmds,_submission', 'adr': + {'python': {'name': 'mlperf', 'version_min': '3.8'}}, 'submitter': 'Community', + 'implementation': 'python', 'hw_name': 'default', 'model': 'resnet50', 'backend': 'tvm-onnx', 'device': 'cpu', 'scenario': 'Offline', + 'test_query_count': '500', 'clean': 'true', 'quiet': 'yes'}) +checks.check_return(r) diff --git a/tests/tutorials/test_tutorial_tvm_pip_ge.py b/tests/tutorials/test_tutorial_tvm_pip_ge.py new file mode 100644 index 0000000000..f95521f440 --- /dev/null +++ b/tests/tutorials/test_tutorial_tvm_pip_ge.py @@ -0,0 +1,26 @@ +import check as checks +import cmind as cm + +from pathlib import Path +import sys +import os + +sys.path.insert( + 1, + os.path.join( + Path(__file__).parent.parent.resolve(), + "script")) + +r = cm.access({'action': 'run', 'automation': 'script', 'tags': 'run,mlperf,inference,generate-run-cmds', 'adr': + {'python': {'name': 'mlperf', 'version_min': '3.8'}, 'tvm': { + 'tags': '_pip-install'}, 'tvm-model': {'tags': '_graph_executor'}}, + 'submitter': 'Community', 'implementation': 'python', 'hw_name': 'default', 'model': 'resnet50', 'backend': 'tvm-onnx', + 'device': 'cpu', 'scenario': 'Offline', 'mode': 'accuracy', 'test_query_count': '5', 'clean': 'true', 'quiet': 'yes'}) +checks.check_return(r) + +r = cm.access({'action': 'run', 'automation': 'script', 'tags': 'run,mlperf,inference,generate-run-cmds,_submission,_short', 'adr': + {'python': {'name': 'mlperf', 'version_min': '3.8'}, 'tvm': { + 'tags': '_pip-install'}, 'tvm-model': {'tags': '_graph_executor'}}, + 'submitter': 'Community', 'implementation': 'python', 'hw_name': 'default', 'model': 'resnet50', 'backend': 'tvm-onnx', + 'device': 'cpu', 'scenario': 'Offline', 'test_query_count': '500', 'clean': 'true', 'quiet': 'yes'}) +checks.check_return(r) diff --git a/tests/tutorials/test_tutorial_tvm_pip_vm.py b/tests/tutorials/test_tutorial_tvm_pip_vm.py new file mode 100644 index 0000000000..ab9244bf01 --- /dev/null +++ b/tests/tutorials/test_tutorial_tvm_pip_vm.py @@ -0,0 +1,27 @@ +# This test covers version, variation, compilation from src, add_deps, +# add_deps_recursive, deps, post_deps + +import check as checks +import cmind as cm + +from pathlib import Path +import sys +import os + +sys.path.insert( + 1, + os.path.join( + Path(__file__).parent.parent.resolve(), + "script")) + +r = cm.access({'action': 'run', 'automation': 'script', 'tags': 'run,mlperf,inference,generate-run-cmds', 'adr': + {'python': {'name': 'mlperf', 'version_min': '3.8'}, 'tvm': {'tags': '_pip-install'}}, 'submitter': 'Community', + 'implementation': 'python', 'hw_name': 'default', 'model': 'resnet50', 'backend': 'tvm-onnx', 'device': 'cpu', 'scenario': 'Offline', + 'mode': 'accuracy', 'test_query_count': '5', 'clean': 'true', 'quiet': 'yes'}) +checks.check_return(r) + +r = cm.access({'action': 'run', 'automation': 'script', 'tags': 'run,mlperf,inference,generate-run-cmds,_submission,_short', 'adr': + {'python': {'name': 'mlperf', 'version_min': '3.8'}, 'tvm': {'tags': '_pip-install'}}, 'submitter': 'Community', + 'implementation': 'python', 'hw_name': 'default', 'model': 'resnet50', 'backend': 'tvm-onnx', 'device': 'cpu', 'scenario': 'Offline', + 'test_query_count': '500', 'clean': 'true', 'quiet': 'yes'}) +checks.check_return(r)