diff --git a/.github/workflows/format.yml b/.github/workflows/format.yml index c6937bac3a..66a1318632 100644 --- a/.github/workflows/format.yml +++ b/.github/workflows/format.yml @@ -26,16 +26,22 @@ jobs: python3 -m pip install autopep8 for FILE in $(git diff --name-only ${{ github.event.before }} | grep -E '.*\.py$') do - autopep8 --in-place -a $FILE - git add $FILE + # Check if the file still exists in the working tree + if [ -f "$FILE" ]; then + autopep8 --in-place -a "$FILE" + git add "$FILE" + fi done - name: Format modified C++ files run: | for FILE in $(git diff --name-only ${{ github.event.before }} | grep -E '.*\.(cc|cpp|h|hpp)$') do - clang-format -i -style=file $FILE - git add $FILE + # Check if the file still exists in the working tree + if [ -f "$FILE" ]; then + clang-format -i -style=file $FILE + git add $FILE + fi done - name: Commit and create PR diff --git a/automation/script/README-extra.md b/automation/script/README-extra.md index d63c5dc161..7fc9820677 100644 --- a/automation/script/README-extra.md +++ b/automation/script/README-extra.md @@ -2,6 +2,7 @@ # CM "script" automation +
Click here to see the table of contents. diff --git a/automation/script/module.py b/automation/script/module.py index d63f607710..ee06a2c7b2 100644 --- a/automation/script/module.py +++ b/automation/script/module.py @@ -1311,7 +1311,7 @@ def _run(self, i): r = self._call_run_deps(prehook_deps, self.local_env_keys, local_env_keys_from_meta, env, state, const, const_state, add_deps_recursive, recursion_spaces + extra_recursion_spaces, - remembered_selections, variation_tags_string, found_cached, debug_script_tags, verbose, show_time, extra_recursion_spaces, run_state) + remembered_selections, variation_tags_string, True, debug_script_tags, verbose, show_time, extra_recursion_spaces, run_state) if r['return'] > 0: return r @@ -1372,7 +1372,7 @@ def _run(self, i): r = self._call_run_deps(posthook_deps, self.local_env_keys, clean_env_keys_post_deps, env, state, const, const_state, add_deps_recursive, recursion_spaces + extra_recursion_spaces, - remembered_selections, variation_tags_string, found_cached, debug_script_tags, verbose, show_time, extra_recursion_spaces, run_state) + remembered_selections, variation_tags_string, True, debug_script_tags, verbose, show_time, extra_recursion_spaces, run_state) if r['return'] > 0: return r @@ -1383,7 +1383,7 @@ def _run(self, i): # Check chain of post dependencies on other CM scripts r = self._call_run_deps(post_deps, self.local_env_keys, clean_env_keys_post_deps, env, state, const, const_state, add_deps_recursive, recursion_spaces + extra_recursion_spaces, - remembered_selections, variation_tags_string, found_cached, debug_script_tags, verbose, show_time, extra_recursion_spaces, run_state) + remembered_selections, variation_tags_string, True, debug_script_tags, verbose, show_time, extra_recursion_spaces, run_state) if r['return'] > 0: return r @@ -1605,6 +1605,82 @@ def _run(self, i): if r['return'] > 0: return r + # Prepare common input to prepare and run script + run_script_input = { + 'path': path, + 'bat_ext': bat_ext, + 'os_info': os_info, + 'const': const, + 'state': state, + 'const_state': const_state, + 'reuse_cached': reuse_cached, + 'recursion': recursion, + 'recursion_spaces': recursion_spaces, + 'remembered_selections': remembered_selections, + 'tmp_file_run_state': self.tmp_file_run_state, + 'tmp_file_run_env': self.tmp_file_run_env, + 'tmp_file_state': self.tmp_file_state, + 'tmp_file_run': self.tmp_file_run, + 'local_env_keys': self.local_env_keys, + 'local_env_keys_from_meta': local_env_keys_from_meta, + 'posthook_deps': posthook_deps, + 'add_deps_recursive': add_deps_recursive, + 'remembered_selections': remembered_selections, + 'found_script_tags': found_script_tags, + 'variation_tags_string': variation_tags_string, + 'found_cached': False, + 'debug_script_tags': debug_script_tags, + 'verbose': verbose, + 'meta': meta, + 'self': self + } + + # Check if pre-process and detect + if str(meta.get('predeps', 'True')).lower() not in ["0", "false", "no"] and os.path.isfile( + path_to_customize_py): # possible duplicate execution - needs fix + r = utils.load_python_module( + {'path': path, 'name': 'customize'}) + if r['return'] > 0: + return r + + customize_code = r['code'] + + customize_common_input = { + 'input': i, + 'automation': self, + 'artifact': script_artifact, + 'customize': script_artifact.meta.get('customize', {}), + 'os_info': os_info, + 'recursion_spaces': recursion_spaces, + 'script_tags': script_tags, + 'variation_tags': variation_tags + } + run_script_input['customize_code'] = customize_code + run_script_input['customize_common_input'] = customize_common_input + + if repro_prefix != '': + run_script_input['repro_prefix'] = repro_prefix + if ignore_script_error: + run_script_input['ignore_script_error'] = True + if 'predeps' in dir(customize_code) and not fake_run: + + logging.debug( + recursion_spaces + + ' - Running preprocess ...') + + run_script_input['run_state'] = run_state + + ii = copy.deepcopy(customize_common_input) + ii['env'] = env + ii['state'] = state + ii['meta'] = meta + # may need to detect versions in multiple paths + ii['run_script_input'] = run_script_input + + r = customize_code.predeps(ii) + if r['return'] > 0: + return r + # Check chain of dependencies on other CM scripts if len(deps) > 0: logging.debug(recursion_spaces + @@ -1626,6 +1702,8 @@ def _run(self, i): # Clean some output files clean_tmp_files(clean_files, recursion_spaces) + # Repeated code + ''' # Prepare common input to prepare and run script run_script_input = { 'path': path, @@ -1655,6 +1733,7 @@ def _run(self, i): 'meta': meta, 'self': self } + ''' if os.path.isfile( path_to_customize_py): # possible duplicate execution - needs fix r = utils.load_python_module( diff --git a/challenge/add-derived-metrics-to-mlperf-inference/README.md b/challenge/add-derived-metrics-to-mlperf-inference/README.md deleted file mode 100644 index 8302f63d68..0000000000 --- a/challenge/add-derived-metrics-to-mlperf-inference/README.md +++ /dev/null @@ -1,32 +0,0 @@ -### Challenge - -Check past MLPerf inference results in [this MLCommons repository](https://github.com/mlcommons/cm4mlperf-results) -and add derived metrics such as result/No of cores, power efficiency, device cost, operational costs, etc. - -Add clock speed as a third dimension to graphs and improve Bar graph visualization. - -Read [this documentation](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/README.md) -to run reference implementations of MLPerf inference benchmarks -using the CM automation language and use them as a base for your developments. - -Check [this ACM REP'23 keynote](https://doi.org/10.5281/zenodo.8105339) to learn more about our open-source project and long-term vision. - - -### Prizes - -* *All contributors will receive 1 point for submitting valid results for 1 complete benchmark on one system.* -* *All contributors will receive an official MLCommons Collective Knowledge contributor award (see [this example](https://ctuning.org/awards/ck-award-202307-zhu.pdf)).* - - -### Organizers - -* [MLCommons](https://cKnowledge.org/mlcommons-taskforce) -* [cTuning.org](https://www.linkedin.com/company/ctuning-foundation) -* [cKnowledge.org](https://www.linkedin.com/company/cknowledge) - -### Results - -All accepted results will be publicly available in the CM format with derived metrics -in this [MLCommons repository](https://github.com/mlcommons/cm4mlperf-results), -in [MLCommons Collective Knowledge explorer](https://access.cknowledge.org/playground/?action=experiments) -and at official [MLCommons website](https://mlcommons.org). diff --git a/challenge/add-derived-metrics-to-mlperf-inference/_cm.json b/challenge/add-derived-metrics-to-mlperf-inference/_cm.json deleted file mode 100644 index cbdc212467..0000000000 --- a/challenge/add-derived-metrics-to-mlperf-inference/_cm.json +++ /dev/null @@ -1,22 +0,0 @@ -{ - "alias": "add-derived-metrics-to-mlperf-inference", - "automation_alias": "challenge", - "automation_uid": "3d84abd768f34e08", - "date_close_extension": true, - "date_open": "20240204", - "points": 2, - "tags": [ - "modularize", - "optimize", - "reproduce", - "replicate", - "benchmark", - "automate", - "derived-metrics", - "mlperf-inference", - "mlperf-inference-derived-metrics" - ], - "title": "Add derived metrics to MLPerf inference benchmarks (power efficiency, results / No of cores, costs, etc)", - "trophies": true, - "uid": "c65b56d7770946ee" -} diff --git a/challenge/automate-mlperf-inference-v3.1-and-v4.0-2024/README.md b/challenge/automate-mlperf-inference-v3.1-and-v4.0-2024/README.md deleted file mode 100644 index a2059c0fe8..0000000000 --- a/challenge/automate-mlperf-inference-v3.1-and-v4.0-2024/README.md +++ /dev/null @@ -1,4 +0,0 @@ -20240220: -* A prototype of a GUI to generate CM commands to run MLPerf inference benchmarks is ready: [link](https://access.cknowledge.org/playground/?action=howtorun&bench_uid=39877bb63fb54725) -* A prototype of the infrastructure to reproduce MLPerf inference benchmark results is ready: [link](https://access.cknowledge.org/playground/?action=reproduce) -* On-going efforts: https://github.com/mlcommons/ck/issues/1052 diff --git a/challenge/automate-mlperf-inference-v3.1-and-v4.0-2024/_cm.yaml b/challenge/automate-mlperf-inference-v3.1-and-v4.0-2024/_cm.yaml deleted file mode 100644 index b8b519d27f..0000000000 --- a/challenge/automate-mlperf-inference-v3.1-and-v4.0-2024/_cm.yaml +++ /dev/null @@ -1,21 +0,0 @@ -alias: automate-mlperf-inference-v3.1-and-v4.0-2024 -uid: f89f152fc2614240 - -automation_alias: challenge -automation_uid: 3d84abd768f34e08 - -title: Add MLCommons CM workflows and unifed interface to automate MLPerf inference v3.1 and v4.0 benchmarks (Intel, Nvidia, Qualcomm, Arm64, TPU ...) - -date_open: '20231215' -date_close: '20240315' - -hot: true - -tags: -- automate -- mlperf-inference-v3.1-and-v4.0 -- 2024 - -experiments: -- tags: mlperf-inference,v3.1 -- tags: mlperf-inference,v4.0 diff --git a/challenge/compose-high-performance-and-cost-efficient-ai-systems-based-on-mlperf-4.0-2024/README.md b/challenge/compose-high-performance-and-cost-efficient-ai-systems-based-on-mlperf-4.0-2024/README.md deleted file mode 100644 index adfbea7263..0000000000 --- a/challenge/compose-high-performance-and-cost-efficient-ai-systems-based-on-mlperf-4.0-2024/README.md +++ /dev/null @@ -1,10 +0,0 @@ -This challenge is under preparation. You can read about the motivation behind this challenge in our [invited talk at MLPerf-Bench @ HPCA'24](https://doi.org/10.5281/zenodo.10786893). - -We plan to extend [MLCommons CM framework](https://github.com/mlcommons/ck) -to automatically compose high-performance and cost-efficient AI systems -based on MLPerf inference v4.0 results and [CM automation recipes](https://access.cknowledge.org/playground/?action=scripts). - -* A prototype of a GUI to generate CM commands to run MLPerf inference benchmarks is ready: [link](https://access.cknowledge.org/playground/?action=howtorun&bench_uid=39877bb63fb54725) -* A prototype of the infrastructure to reproduce MLPerf inference benchmark results is ready: [link](https://access.cknowledge.org/playground/?action=reproduce) - -Contact the [MLCommons Task Force on Automation and Reproducibility](https://github.com/mlcommons/ck/blob/master/docs/taskforce.md) for more details. diff --git a/challenge/compose-high-performance-and-cost-efficient-ai-systems-based-on-mlperf-4.0-2024/_cm.yaml b/challenge/compose-high-performance-and-cost-efficient-ai-systems-based-on-mlperf-4.0-2024/_cm.yaml deleted file mode 100644 index b1d4fe9f18..0000000000 --- a/challenge/compose-high-performance-and-cost-efficient-ai-systems-based-on-mlperf-4.0-2024/_cm.yaml +++ /dev/null @@ -1,25 +0,0 @@ -alias: compose-high-performance-and-cost-efficient-ai-systems-based-on-mlperf-4.0-2024 -uid: 7c983102d89e4869 - -automation_alias: challenge -automation_uid: 3d84abd768f34e08 - -title: "Compose high-performance and cost-efficint AI systems using MLCommons' Collective Mind and MLPerf inference" - -date_open: '20240101' - -tags: -- compose -- ai -- systems -- mlperf-inference-v4.0 -- cm -- mlcommons-cm -- mlperf -- v4.0 -- performance -- energy -- cost - -experiments: -- tags: mlperf-inference,v4.0 diff --git a/challenge/connect-mlperf-inference-v3.1-with-openbenchmarking/README.md b/challenge/connect-mlperf-inference-v3.1-with-openbenchmarking/README.md deleted file mode 100644 index 306341271c..0000000000 --- a/challenge/connect-mlperf-inference-v3.1-with-openbenchmarking/README.md +++ /dev/null @@ -1,30 +0,0 @@ -### Challenge - -Connect CM workflows to run MLPerf inference benchmarks with [OpenBenchmarking.org](https://openbenchmarking.org). - -Read [this documentation](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/README.md) -to run reference implementations of MLPerf inference benchmarks -using the CM automation language and use them as a base for your developments. - -Check [this ACM REP'23 keynote](https://doi.org/10.5281/zenodo.8105339) to learn more about our open-source project and long-term vision. - - -### Prizes - -* *All contributors will receive 1 point for submitting valid results for 1 complete benchmark on one system.* -* *All contributors will receive an official MLCommons Collective Knowledge contributor award (see [this example](https://ctuning.org/awards/ck-award-202307-zhu.pdf)).* - - - -### Organizers - -* Michael Larabel -* Grigori Fursin -* [MLCommons](https://cKnowledge.org/mlcommons-taskforce) -* [cTuning.org](https://www.linkedin.com/company/ctuning-foundation) -* [cKnowledge.org](https://www.linkedin.com/company/cknowledge) - -### Results - -Results will be available at [OpenBenchmark.org](https://openbenchmarking.org) -and [MLCommons CK playgronud](https://access.cknowledge.org/playground/?action=experiments). diff --git a/challenge/connect-mlperf-inference-v3.1-with-openbenchmarking/_cm.json b/challenge/connect-mlperf-inference-v3.1-with-openbenchmarking/_cm.json deleted file mode 100644 index c1e65aadbd..0000000000 --- a/challenge/connect-mlperf-inference-v3.1-with-openbenchmarking/_cm.json +++ /dev/null @@ -1,22 +0,0 @@ -{ - "alias": "connect-mlperf-inference-v3.1-with-openbenchmarking", - "automation_alias": "challenge", - "automation_uid": "3d84abd768f34e08", - "date_open": "20240101", - "date_close_extension": true, - "points": 2, - "tags": [ - "modularize", - "optimize", - "reproduce", - "replicate", - "benchmark", - "automate", - "openbenchmarking", - "mlperf-inference", - "mlperf-inference-openbenchmarking" - ], - "title": "Run MLPerf inference benchmarks using CM via OpenBenchmarking.org", - "trophies": true, - "uid": "534592626eb44efe" -} diff --git a/challenge/connect-mlperf-with-medperf/README.md b/challenge/connect-mlperf-with-medperf/README.md deleted file mode 100644 index f2f572bd48..0000000000 --- a/challenge/connect-mlperf-with-medperf/README.md +++ /dev/null @@ -1,23 +0,0 @@ -### Challenge - -Evaluate models from [MLCommons MedPerf platform](https://www.medperf.org) in terms of latency, throughput, power consumption and other metrics -using MLPerf loadgen and MLCommons CM automation language. - -See the [Nature 2023 article about MedPerf](https://www.nature.com/articles/s42256-023-00652-2) -and [ACM REP'23 keynote about CM](https://doi.org/10.5281/zenodo.8105339) to learn more about these projects. - -Read [this documentation](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/README.md) -to run reference implementations of MLPerf inference benchmarks -using the CM automation language and use them as a base for your developments. - - -### Prizes - -* *All contributors will receive an official MLCommons Collective Knowledge contributor award (see [this example](https://ctuning.org/awards/ck-award-202307-zhu.pdf)).* - - -### Organizers - -* [cKnowledge.org](https://www.linkedin.com/company/cknowledge) -* [cTuning.org](https://www.linkedin.com/company/ctuning-foundation) -* [MLCommons](https://cKnowledge.org/mlcommons-taskforce) diff --git a/challenge/connect-mlperf-with-medperf/_cm.json b/challenge/connect-mlperf-with-medperf/_cm.json deleted file mode 100644 index d48d0a9fea..0000000000 --- a/challenge/connect-mlperf-with-medperf/_cm.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "alias": "connect-mlperf-with-medperf", - "automation_alias": "challenge", - "automation_uid": "3d84abd768f34e08", - "date_close_extension": true, - "date_open": "20240105", - "points": 2, - "tags": [ - "modularize", - "optimize", - "reproduce", - "replicate", - "benchmark", - "automate", - "medperf", - "mlperf-inference", - "mlperf-inference-medperf", - "mlperf-inference-medperf", - "mlperf-inference-medperf-v3.1", - "mlperf-inference-medperf-v3.1-2023", - "v3.1" - ], - "title": "Connect MedPerf with MLPerf and CM", - "trophies": true, - "uid": "c26d1fbf89164728" -} diff --git a/challenge/optimize-mlperf-inference-scc2023/README.md b/challenge/optimize-mlperf-inference-scc2023/README.md deleted file mode 100644 index 62a4826ad2..0000000000 --- a/challenge/optimize-mlperf-inference-scc2023/README.md +++ /dev/null @@ -1,16 +0,0 @@ -### CM tutorial - -https://github.com/mlcommons/ck/blob/master/docs/tutorials/scc23-mlperf-inference-bert.md - -### Challenge - -Reproduce and optimize MLPerf inference benchmarks during Student Cluster Competition at SuperComputing'23. - -See our [related challange from 2022]()https://access.cknowledge.org/playground/?action=challenges&name=repro-mlperf-inference-retinanet-scc2022). - -### Organizers - -* [MLCommons taskforce on automation and reproducibility](https://cKnowledge.org/mlcommons-taskforce) -* [cTuning foundation](https://cTuning.org) -* [cKnowledge.org](https://cKnowledge.org) - diff --git a/challenge/optimize-mlperf-inference-scc2023/_cm.json b/challenge/optimize-mlperf-inference-scc2023/_cm.json deleted file mode 100644 index 021872b15a..0000000000 --- a/challenge/optimize-mlperf-inference-scc2023/_cm.json +++ /dev/null @@ -1,20 +0,0 @@ -{ - "alias": "optimize-mlperf-inference-scc2023", - "automation_alias": "challenge", - "automation_uid": "3d84abd768f34e08", - "date_close": "20231115", - "date_open": "20230915", - "tags": [ - "automate", - "modularize", - "reproduce", - "replicate", - "benchmark", - "mlperf", - "mlperf-inference", - "mlperf-inference-scc", - "mlperf-inference-scc-2023" - ], - "title": "Reproduce and optimize MLPerf inference v3.1 benchmarks at the Student Cluster Competition'23 at SuperComputing'23 using CM", - "uid": "ddaf594f84b14bc2" -} diff --git a/challenge/optimize-mlperf-inference-scc2024/README.md b/challenge/optimize-mlperf-inference-scc2024/README.md deleted file mode 100644 index 1f9be23af2..0000000000 --- a/challenge/optimize-mlperf-inference-scc2024/README.md +++ /dev/null @@ -1,7 +0,0 @@ -The [MLCommons](https://mlcommons.org), [cTuning foundation](https://cTuning.org) and [cKnowledge.org](https://cKnowledge.org) -are preparing a unified interface to run MLPerf inference benchmark at the Student Cluster Competition'24. - -See [the CM-MLPerf tutorial for SCC'23](https://github.com/mlcommons/ck/blob/master/docs/tutorials/scc23-mlperf-inference-bert.md). -Note that the MLPerf model will change in SCC'24 - please stay tuned for more details! - -See https://sc24.supercomputing.org/students/student-cluster-competition for more details about SCC. diff --git a/challenge/optimize-mlperf-inference-scc2024/_cm.json b/challenge/optimize-mlperf-inference-scc2024/_cm.json deleted file mode 100644 index ab75aa27a6..0000000000 --- a/challenge/optimize-mlperf-inference-scc2024/_cm.json +++ /dev/null @@ -1,19 +0,0 @@ -{ - "alias": "optimize-mlperf-inference-scc2024", - "automation_alias": "challenge", - "automation_uid": "3d84abd768f34e08", - "date_open": "20241001", - "tags": [ - "automate", - "modularize", - "reproduce", - "replicate", - "benchmark", - "mlperf", - "mlperf-inference", - "mlperf-inference-scc", - "mlperf-inference-scc-2024" - ], - "title": "Run and optimize the MLPerf inference benchmark using CM at the Student Cluster Competition'24 at SuperComputing'24", - "uid": "f7fcba4c43ab4412" -} diff --git a/challenge/optimize-mlperf-inference-v2.1-2022/README.md b/challenge/optimize-mlperf-inference-v2.1-2022/README.md deleted file mode 100644 index d0ac7cf15b..0000000000 --- a/challenge/optimize-mlperf-inference-v2.1-2022/README.md +++ /dev/null @@ -1,18 +0,0 @@ -### Challenge - -Prepare, optimize and reproduce MLPerf inference v2.1 benchmarks across diverse implementations, software and hardware -using the [MLCommons CK framework](https://github.com/mlcommons/ck). - -### Organizers - -* [MLCommons taskforce on automation and reproducibility](https://cKnowledge.org/mlcommons-taskforce) -* [cTuning foundation](https://cTuning.org) -* [OctoML](https://octoml.ai) - -### Status - -This challenge has been successfully completed. - -### Results - -Results are available [here](https://access.cknowledge.org/playground/?action=experiments&tags=mlperf-inference,v2.1). diff --git a/challenge/optimize-mlperf-inference-v2.1-2022/_cm.json b/challenge/optimize-mlperf-inference-v2.1-2022/_cm.json deleted file mode 100644 index 31cb5dffd2..0000000000 --- a/challenge/optimize-mlperf-inference-v2.1-2022/_cm.json +++ /dev/null @@ -1,27 +0,0 @@ -{ - "alias": "optimize-mlperf-inference-v2.1-2022", - "automation_alias": "challenge", - "automation_uid": "3d84abd768f34e08", - "date_close": "20220901", - "date_open": "20220701", - "experiments": [ - { - "tags": "mlperf-inference,v2.1" - } - ], - "tags": [ - "modularize", - "optimize", - "reproduce", - "replicate", - "automate", - "benchmark", - "mlperf", - "mlperf-inference", - "mlperf-inference-v2.1", - "mlperf-inference-v2.1-2022", - "v2.1" - ], - "title": "Run and optimize MLPerf inference v2.1 benchmarks", - "uid": "2e13154b7fbb412d" -} diff --git a/challenge/optimize-mlperf-inference-v3.0-2023/README.md b/challenge/optimize-mlperf-inference-v3.0-2023/README.md deleted file mode 100644 index da6decc8c7..0000000000 --- a/challenge/optimize-mlperf-inference-v3.0-2023/README.md +++ /dev/null @@ -1,74 +0,0 @@ -### Challenge - -Run MLPerf inference v3.0 benchmarks out-of-the-box across diverse implementations, software and hardware -using the [MLCommons CM automation language](https://github.com/mlcommons/ck) -and submit public results to the MLPerf inference v3.0 via [cTuning foundation](https://cTuning.org). - -* [GUI to run MLPerf inference benchmarks](https://cknowledge.org/mlperf-inference-gui) -* [GUI to prepare MLPerf inference submissions](https://cknowledge.org/mlperf-inference-submission-gui) - -### Organizers - -* [MLCommons Task Force on Automation and Reproducibility](https://github.com/mlcommons/ck/blob/master/docs/taskforce.md) -* [cTuning foundation](https://cTuning.org) -* [cKnowledge](https://cKnowledge.org) - -### Status - -This challenge has been successfully completed. - -### Results - -Official results: -* https://github.com/mlcommons/inference_results_v3.0/tree/main/closed/cTuning -* https://github.com/mlcommons/inference_results_v3.0/tree/main/open/cTuning - -Results in the MLCommons CK/CM format: -* https://github.com/mlcommons/cm4mlperf-results - -Visualization and comparison with derived metrics: -* [MLCommons Collective Knowledge Playground](https://access.cknowledge.org/playground/?action=experiments&tags=mlperf-inference,v3.0). - -### The outcome - -We are very pleased to announce the successful outcome of the 1st -community challenge to run, reproduce and optimize MLPerf inference v3.0 -benchmarks: our MLCommons CK/CM workflow automation framework has helped -to prepare more than 80% of all submission results including 98% of power -results with very diverse technology and benchmark implementations from -Neural Magic, Qualcomm, cKnowledge Ltd, KRAI, cTuning foundation, Dell -Technologies, Hewlett Packard Enterprise, Lenovo, Hugging Face, NVIDIA, -Intel Corporation, AMD and Apple across diverse CPUs, GPUs and DSPs with -PyTorch, ONNX, QAIC, TF/TFLite, TVM and TensorRT using popular cloud -providers (GCP, AWS, Azure) and individual servers and edge devices -provided by our [volunteers](https://access.cknowledge.org/playground/?action=contributors). - -You can now see and compare all MLPerf inference results v3.0, v2.1 and -v2.0 online together with reproducibility reports including the -[MLPerf BERT model](https://huggingface.co/ctuning/mlperf-inference-bert-onnx-fp32-squad-v1.1) -from the [Hugging Face Zoo](https://www.linkedin.com/company/huggingface/?lipi=urn%3Ali%3Apage%3Ad_flagship3_pulse_read%3B4CDUdiVxT7WqLJNXO%2BI5bQ%3D%3D) -on [Nvidia Jetson Orin platform](https://github.com/mlcommons/ck/blob/master/cm-mlops/challenge/optimize-mlperf-inference-v3.0-2023/docs/setup-nvidia-jetson-orin.md#reproducing-the-nvidia-jetson-agx-orin-submission). -You can even create your own derived metrics (such as performance per Watt), -provide your own constraints using this [MLCommons repository](https://github.com/mlcommons/cm_inference_results) and visualize -them as shown in [this example](https://access.cknowledge.org/playground/?action=experiments&name=e472410ee67c41f9&x=Result&y=Power_Efficiency&filter=result[%27Result_Power%27]%3C35&derived_metrics=result%5B%27Power_Efficiency%27%5D%3D1000%2Fresult%5B%27Result_Power%27%5D&c=accelerator_model_name&axis_key_s=version). - -Additional thanks to [Michael Goin](https://www.linkedin.com/in/michael-goin) -from [Neural Magic](https://www.linkedin.com/company/neural-magic/?lipi=urn%3Ali%3Apage%3Ad_flagship3_pulse_read%3B4CDUdiVxT7WqLJNXO%2BI5bQ%3D%3D), our international -students including [Himanshu Dutta](https://www.linkedin.com/in/ACoAACpPCiMB7zUNStsqBmaOCtd100a7wXBGu_M?lipi=urn%3Ali%3Apage%3Ad_flagship3_pulse_read%3B4CDUdiVxT7WqLJNXO%2BI5bQ%3D%3D), -[Aditya Kumar Shaw](https://www.linkedin.com/in/ACoAACJ3ikUBjuHqi35ibm8CG6IEYv-v_VsobIs?lipi=urn%3Ali%3Apage%3Ad_flagship3_pulse_read%3B4CDUdiVxT7WqLJNXO%2BI5bQ%3D%3D), -Sachin Mudaliyar, [Thomas Zhu](https://www.linkedin.com/in/hanwen-zhu-483614189), -and all [CK/CM users and contributors](https://github.com/mlcommons/ck/blob/master/CONTRIBUTING.md) for helping us to -validate, use and improve this open-source technology to automate -benchmarking and optimization of AI/ML systems in terms of performance, -accuracy, power and costs! We are also grateful to [HiPEAC](https://www.linkedin.com/company/hipeac) -and [OctoML](https://www.linkedin.com/company/octoml) for -sponsoring initial development and Peter Mattson, David Kanter, Vijay -Janapa Reddi and Alexandros Karargyris for fruitful discussions. - - -### Dissemination - -* [Forbes article](https://www.forbes.com/sites/karlfreund/2023/04/05/nvidia-performance-trounces-all-competitors-who-have-the-guts-to-submit-to-mlperf-inference-30/?sh=3c38d2866676) -* [ZDNet article](https://www.zdnet.com/article/nvidia-dell-qualcomm-speed-up-ai-results-in-latest-benchmark-tests) -* [LinkedIn article from Grigori Fursin (MLCommons Task Force co-chair)]( https://www.linkedin.com/pulse/announcing-my-new-project-reproducible-optimization-co-design-fursin ) -* [Linkedin article from Arjun Suresh (MLCommons Task Force co-chair)](https://www.linkedin.com/posts/arjunsuresh_nvidia-performance-trounces-all-competitors-activity-7049500972275929088-nnnx?utm_source=share&utm_medium=member_desktop) diff --git a/challenge/optimize-mlperf-inference-v3.0-2023/_cm.json b/challenge/optimize-mlperf-inference-v3.0-2023/_cm.json deleted file mode 100644 index 0baf3cfeea..0000000000 --- a/challenge/optimize-mlperf-inference-v3.0-2023/_cm.json +++ /dev/null @@ -1,27 +0,0 @@ -{ - "alias": "optimize-mlperf-inference-v3.0-2023", - "automation_alias": "challenge", - "automation_uid": "3d84abd768f34e08", - "date_close": "20230301", - "date_open": "20230201", - "experiments": [ - { - "tags": "mlperf-inference,v3.0" - } - ], - "tags": [ - "modularize", - "optimize", - "reproduce", - "replicate", - "automate", - "benchmark", - "mlperf", - "mlperf-inference", - "mlperf-inference-v3.0", - "mlperf-inference-v3.0-2023", - "v3.0" - ], - "title": "Run and optimize MLPerf inference v3.0 benchmarks", - "uid": "57cbc3384d7640f9" -} diff --git a/challenge/optimize-mlperf-inference-v3.0-2023/docs/crowd-benchmark-mlperf-bert-inference-cuda.md b/challenge/optimize-mlperf-inference-v3.0-2023/docs/crowd-benchmark-mlperf-bert-inference-cuda.md deleted file mode 100644 index f6a17979ca..0000000000 --- a/challenge/optimize-mlperf-inference-v3.0-2023/docs/crowd-benchmark-mlperf-bert-inference-cuda.md +++ /dev/null @@ -1,281 +0,0 @@ -# Crowd-benchmarking MLPerf BERT inference - -
-Click here to see the table of contents. - -* [Crowd-benchmarking MLPerf BERT inference](#crowd-benchmarking-mlperf-bert-inference) -* [System preparation](#system-preparation) - * [Minimal system requirements](#minimal-system-requirements) - * [Install CM (CK2) automation meta-framework](#install-cm-ck2-automation-meta-framework) - * [Pull CM repository with portable automation recipes](#pull-cm-repository-with-portable-automation-recipes) - * [Detect or install CUDA](#detect-or-install-cuda) - * [Test CUDA installation](#test-cuda-installation) - * [Install Python virtual environment](#install-python-virtual-environment) - * [Detect or install cuDNN](#detect-or-install-cudnn) - * [Detect or install TensorRT](#detect-or-install-tensorrt) - * [Run MLPerf inference benchmark with BERT](#run-mlperf-inference-benchmark-with-bert) - * [Try ONNX runtime backend](#try-onnx-runtime-backend) - * [Do a test run to detect and record the system performance](#do-a-test-run-to-detect-and-record-the-system-performance) - * [Do a full accuracy run for all the scenarios](#do-a-full-accuracy-run-for-all-the-scenarios) - * [Do a full performance run for all the scenarios](#do-a-full-performance-run-for-all-the-scenarios) - * [Populate the README files](#populate-the-readme-files) - * [Generate MLPerf submission tree](#generate-mlperf-submission-tree) - * [Push the results to GitHub repo](#push-the-results-to-github-repo) - * [Try PyTorch backend](#try-pytorch-backend) - * [Test composable ML benchmark with other models, data sets, frameworks and platforms](#test-composable-ml-benchmark-with-other-models-data-sets-frameworks-and-platforms) -* [The next steps](#the-next-steps) - -
- - -This is a pilot community project to collaboratively run MLPerf BERT inference benchmark -across diverse platforms provided by volunteers similar to [SETI@home](https://setiathome.berkeley.edu/). -However, instead of searching for extraterrestrial intelligence, we are -searching for optimal software/hardware combination to run various AI and ML workloads -in terms of performance, accuracy, power and costs ... - -This benchmark is composed from [portable and reusable automation recipes](https://github.com/mlcommons/ck/blob/master/docs/list_of_scripts.md) -developed by [MLCommons taskforce on automation and reproducibility](https://github.com/mlcommons/ck/blob/master/docs/taskforce.md) -to modularize complex AI and ML Systems and automate their benchmarking, design space exploration, optimization and deployment -across continuously evolving software, hardware, models and data. - -*If you submit your results before 1pm PST on Friday 3rd, 2023, - they will be accepted for the official MLPerf inference v3.0 submission round - and your name acknowledged in the notes!* - - -# System preparation - -## Minimal system requirements - -* CPU: any x86-64 or Arm64 based machine -* GPU: any relatively modern Nvidia GPU with 8GB+ memory and CUDA 11.4+ -* OS: we have tested this automation on Ubuntu 20.04, Ubuntu 22.04 and Debian 10 -* Disk space: ~10GB -* Python: 3.8+ -* All other dependencies (artifacts and tools) will be installed by the CM meta-framework aka (CK2) - -## Install CM (CK2) automation meta-framework - -Follow [this guide](https://github.com/mlcommons/ck/blob/master/docs/installation.md) to install the [MLCommons CM framework](https://github.com/mlcommons/ck) -(the 2nd generation on the Collective Mind framework) on your system. - -## Pull CM repository with portable automation recipes - -Pull MLCommons CM repository with [cross-platform CM scripts](https://github.com/mlcommons/ck/blob/master/docs/list_of_scripts.md) -supporting portable MLOps and DevOps: - -```bash -cm pull repo mlcommons@ck --checkout=681547519f4d9a8991d992d1300c90cfde06e9b9 -``` - -CM pulls all such repositories into the `$HOME/CM` directory to search for portable CM automation recipes and artifacts. - -We use the unified CM CLI & Python API of [portable and reusable CM scripts](https://github.com/mlcommons/ck/blob/master/docs/list_of_scripts.md) -to compose portable automation pipelines (also implemented as CM scripts) that can automatically detect or install all necessary artifacts (tools, models, datasets, libraries, etc) -required to run a given software project such as the MLPerf inference benchmark. - -These CM scripts simply wrap existing native scripts and tools as simple micro-services -with a human-readable CLI and simple Python API to be able to easily connect them together -and run on any platform in a unified way. - -## Detect or install CUDA - -Run the following CM script: -```bash -cm run script "get cuda" --out=json -``` - -If CUDA is automatically detected, it will be registered in the CM cache: -```bash -cm show cache --tags=get,cuda -``` - -Otherwise, this script will attempt to download and install the latest CUDA -from Nvidia website. - -Please report any issue with CM scripts [here](https://github.com/mlcommons/ck/issues). - -### Test CUDA installation - -You can test if CUDA toolkit and driver was detected or installed successfully using the following command: -```bash -cm run script "get cuda-devices" -``` - -You should see similar output: -```txt -Checking compiler version ... - -nvcc: NVIDIA (R) Cuda compiler driver -Copyright (c) 2005-2022 NVIDIA Corporation -Built on Wed_Sep_21_10:33:58_PDT_2022 -Cuda compilation tools, release 11.8, V11.8.89 -Build cuda_11.8.r11.8/compiler.31833905_0 - -Compiling program ... - -Running program ... - - - Running postprocess ... -GPU Device ID: 0 -GPU Name: Tesla K80 -GPU compute capability: 3.7 -CUDA driver version: 11.4 -CUDA runtime version: 11.8 -Global memory: 11997020160 -Max clock rate: 823.500000 MHz -Total amount of shared memory per block: 49152 -Total number of registers available per block: 65536 -Warp size: 32 -Maximum number of threads per multiprocessor: 2048 -Maximum number of threads per block: 1024 -Max dimension size of a thread block X: 1024 -Max dimension size of a thread block Y: 1024 -Max dimension size of a thread block Z: 64 -Max dimension size of a grid size X: 2147483647 -Max dimension size of a grid size Y: 65535 -Max dimension size of a grid size Z: 65535 - - - running time of script "get,cuda-devices": 4.16 sec. - -``` - -## Install Python virtual environment - -```bash -cm run script "get sys-utils-cm" --quiet - -cm run script "install python-venv" --name=mlperf-cuda -``` - -If you want to install specific version of Python use the following command: -```bash -cm run script "install python-venv" --version=3.10.8 --name=mlperf-cuda -``` - -## Detect or install cuDNN - -```bash -cm run script "get cudnn" -``` - -If cuDNN is not detected on your system, you can download a TAR file -from [Nvidia website](https://developer.nvidia.com/cudnn) and then use the same CM script -to install it as follows: -```bash -cm run script "get cudnn" --tar_file= -``` - -We have tested this project with the following tar file `cudnn-linux-x86_64-8.7.0.84_cuda11-archive.tar.xz`. - -## Detect or install TensorRT - -```bash -cm run script "get tensorrt" -``` -If TensorRT is not detected on your system, you can download a TAR file -from [Nvidia website](https://developer.nvidia.com/tensorrt) and then use the same CM script -to install it as follows: -```bash -cm run script "get tensorrt" --tar_file= -``` - -We have tested this project with the following tar file `TensorRT-8.5.1.7.Linux.x86_64-gnu.cuda-11.8.cudnn8.6.tar.gz`. - - -## Run MLPerf inference benchmark with BERT - -### Try ONNX runtime backend - -#### Do a test run to detect and record the system performance - -```bash -cm run script --tags=generate-run-cmds,inference,_find-performance,_all-scenarios \ - --adr.python.name=mlperf-cuda --model=bert-99 --implementation=reference \ - --device=cuda --backend=onnxruntime --quiet -``` - -#### Do a full accuracy run for all the scenarios - -```bash -cm run script --tags=generate-run-cmds,inference,_accuracy-only,_all-scenarios \ - --adr.python.name=mlperf-cuda --model=bert-99 --device=cuda \ - --implementation=reference --backend=onnxruntime --quiet \ - --execution-mode=valid --results_dir=$HOME/inference_3.0_results -``` - -#### Do a full performance run for all the scenarios - -```bash -cm run script --tags=generate-run-cmds,inference,_performance-only,_all-scenarios \ - --adr.python.name=mlperf-cuda --model=bert-99 --device=cuda \ - --implementation=reference --backend=onnxruntime --quiet \ - --execution-mode=valid --results_dir=$HOME/inference_3.0_results -``` - -#### Populate the README files - -```bash -cm run script --tags=generate-run-cmds,inference,_populate-readme,_all-scenarios \ - --adr.python.name=mlperf-cuda --model=bert-99 --device=cuda \ - --implementation=reference --backend=onnxruntime --quiet \ - --execution-mode=valid --results_dir=$HOME/inference_3.0_results -``` - -#### Generate MLPerf submission tree - -We should use the master branch of MLCommons inference repo for the submission checker. -You can use `--hw_note_extra` option to add your name to the notes. - -```bash -cm run script --tags=generate,inference,submission \ - --results_dir=$HOME/inference_3.0_results/valid_results \ - --adr.python.name=mlperf-cuda \ - --device=cuda --submission_dir=$HOME/inference_submission_tree --clean \ - --run-checker --submitter=cTuning --adr.inference-src.version=master - --hw_notes_extra="Result taken by " --quiet -``` - -#### Push the results to GitHub repo - -First create a fork of [this GitHub repo with aggregated results](https://github.com/ctuning/mlperf_inference_submissions_v3.0). -Then run the following command after replacing `--repo_url` with your fork URL. - -```bash -cm run script --tags=push,github,mlperf,inference,submission \ - --submission_dir=$HOME/inference_submission_tree \ - --adr.python.name=mlperf-cuda \ - --repo_url=https://github.com/ctuning/mlperf_inference_submissions_v3.0 \ - --commit_message="Bert crowd-results added" -``` - -Create a PR to the [GitHub repo with aggregated results](https://github.com/ctuning/mlperf_inference_submissions_v3.0/) - - - -### Try PyTorch backend - -You can run the same commands with PyTorch by rerunning all above commands and replacing `--backend=onnxruntime` with `--backend=pytorch`. - -For example, - -```bash -cm run script --tags=generate-run-cmds,inference,_accuracy-only,_all-scenarios \ - --adr.python.name=mlperf-cuda --model=bert-99 --device=cuda \ - --implementation=reference --backend=pytorch --execution-mode=valid \ - --results_dir=$HOME/inference_3.0_results --quiet -``` - - -## Test composable ML benchmark with other models, data sets, frameworks and platforms - -* [GUI to prepare CM command line and run benchmark](https://cknowledge.org/mlperf-inference-gui) -* [GUI to compare performance, accuracy, power and costs of ML/SW/HW combinations](https://cKnowledge.org/cm-gui-graph) - - -# The next steps - -Please follow the [cTuning foundation](https://cTuning.org), [cKnowledge.org](https://cKnowledge.org) -and [MLCommons](https://mlcommons.org). - diff --git a/challenge/optimize-mlperf-inference-v3.0-2023/docs/generate-3d-unet-submission.md b/challenge/optimize-mlperf-inference-v3.0-2023/docs/generate-3d-unet-submission.md deleted file mode 100644 index 38f69a5d53..0000000000 --- a/challenge/optimize-mlperf-inference-v3.0-2023/docs/generate-3d-unet-submission.md +++ /dev/null @@ -1,59 +0,0 @@ -## Setup -Please follow the MLCommons CK [installation guide](https://github.com/mlcommons/ck/blob/master/docs/installation.md) to install CM. -Download the ck repo to get the CM script for MLPerf submission - -``` -cm pull repo mlcommons@ck --checkout=681547519f4d9a8991d992d1300c90cfde06e9b9 -``` - -## Run Commands - -3d-unet has two variants - `3d-unet-99` and `3d-unet-99.9` where the `99` and `99.9` specifies the required accuracy constraint with respect to the reference floating point model. Both models can be submitter under edge as well as datacenter category. - -Since 3d-unet is one of the slowest running model, we are only running it using nvidia-implementation where the model is quantized and run on TensorRT backend on Nvidia GPU. - -For `3d-unet-99.9` runs, simply replace `3d-unet-99` with `3d-unet-99.9`. - -### TensorRT backend - -#### Do a test run to detect and record the system performance - -``` -cm run script --tags=generate-run-cmds,inference,_find-performance,_all-scenarios \ ---model=3d-unet-99 --implementation=nvidia-original --device=cuda --backend=tensorrt \ ---category=edge --division=open --quiet -``` -* Use `--category=datacenter` to run datacenter scenarios -* Use `--division=closed` to run all scenarios for the closed division (compliance tests are skipped for `_find-performance` mode) - -#### Do a full accuracy and performance runs for all the scenarios - -``` -cm run script --tags=generate-run-cmds,inference,_all-modes,_all-scenarios \ ---model=3d-unet-99 --device=cuda --implementation=nvidia-original --backend=tensorrt \ ---execution-mode=valid --results_dir=$HOME/inference_3.0_results \ ---category=edge --division=open --quiet -``` - -* Use `--power=yes` for measuring power. It is ignored for accuracy and compliance runs -* Use `--division=closed` to run all scenarios for the closed division including the compliance tests -* `--offline_target_qps`, `--server_target_qps` and `--singlestream_target_latency` can be used to override the determined performance numbers - -#### Populate the README files -``` -cm run script --tags=generate-run-cmds,inference,_populate-readme,_all-scenarios \ ---model=3d-unet-99 --device=cuda --implementation=nvidia-original --backend=tensorrt \ ---execution-mode=valid --results_dir=$HOME/inference_3.0_results \ ---category=edge --division=open --quiet -``` - -#### Generate actual submission tree - -Here, we are copying the performance and accuracy log files (compliance logs also in the case of closed division) from the results directory to the submission tree following the [directory structure required by MLCommons Inference](https://github.com/mlcommons/policies/blob/master/submission_rules.adoc#inference-1). After the submission tree is generated, [accuracy truncate script](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/truncate-mlperf-inference-accuracy-log) is called to truncate accuracy logs and then the [submission checker](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/run-mlperf-inference-submission-checker) is called to validate the generated submission tree. - -We should use the master branch of MLCommons inference repo for the submission checker. You can use `--hw_note_extra` option to add your name to the notes. -``` -cm run script --tags=generate,inference,submission --results_dir=$HOME/inference_3.0_results/valid_results \ ---device=cpu --submission_dir=$HOME/inference_submission_tree --clean --run-checker --submitter=cTuning ---adr.inference-src.version=master --hw_notes_extra="Result taken by NAME" --quiet -``` diff --git a/challenge/optimize-mlperf-inference-v3.0-2023/docs/generate-bert-submission.md b/challenge/optimize-mlperf-inference-v3.0-2023/docs/generate-bert-submission.md deleted file mode 100644 index 8aebb068f0..0000000000 --- a/challenge/optimize-mlperf-inference-v3.0-2023/docs/generate-bert-submission.md +++ /dev/null @@ -1,80 +0,0 @@ -## Setup -Please follow the MLCommons CK [installation guide](https://github.com/mlcommons/ck/blob/master/docs/installation.md) to install CM. -Download the ck repo to get the CM script for MLPerf submission - -``` -cm pull repo mlcommons@ck --checkout=681547519f4d9a8991d992d1300c90cfde06e9b9 -``` - -## Run Commands - -Bert has two variants - `bert-99` and `bert-99.9` where the `99` and `99.9` specifies the required accuracy constraint with respect to the reference floating point model. `bert-99.9` model is applicable only on a datacenter system. - -On edge category `bert-99` has Offline and SingleStream scenarios and in datacenter category both `bert-99` and `bert-99.9` have Offline and Server scenarios. The below commands are assuming an edge category system. - -### Onnxruntime backend - -#### Do a test run to detect and record the system performance - -``` -cm run script --tags=generate-run-cmds,inference,_find-performance,_all-scenarios \ ---model=bert-99 --implementation=reference --device=cpu --backend=onnxruntime \ ---category=edge --division=open --quiet -``` -* Use `--device=cuda` to run the inference on Nvidia GPU -* Use `--division=closed` to run all scenarios for the closed division including the compliance tests -* Use `--category=datacenter` to run datacenter scenarios - -#### Do a full accuracy and performance runs for all the scenarios - -``` -cm run script --tags=generate-run-cmds,inference,_all-modes,_all-scenarios \ ---model=bert-99 --device=cpu --implementation=reference --backend=onnxruntime \ ---execution-mode=valid --results_dir=$HOME/inference_3.0_results \ ---category=edge --division=open --quiet -``` - -* Use `--power=yes` for measuring power. It is ignored for accuracy and compliance runs -* Use `--division=closed` to run all scenarios for the closed division including the compliance tests -* `--offline_target_qps`, `--server_target_qps` and `--singlestream_target_latency` can be used to override the determined performance numbers - -#### Populate the README files -``` -cm run script --tags=generate-run-cmds,inference,_populate-readme,_all-scenarios \ ---model=bert-99 --device=cpu --implementation=reference --backend=onnxruntime \ ---execution-mode=valid --results_dir=$HOME/inference_3.0_results \ ---category=edge --division=open --quiet -``` - -#### Generate actual submission tree - -Here, we are copying the performance and accuracy log files (compliance logs also in the case of closed division) from the results directory to the submission tree following the [directory structure required by MLCommons Inference](https://github.com/mlcommons/policies/blob/master/submission_rules.adoc#inference-1). After the submission tree is generated, [accuracy truncate script](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/truncate-mlperf-inference-accuracy-log) is called to truncate accuracy logs and then the [submission checker](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/run-mlperf-inference-submission-checker) is called to validate the generated submission tree. - -We should use the master branch of MLCommons inference repo for the submission checker. You can use `--hw_note_extra` option to add your name to the notes. -``` -cm run script --tags=generate,inference,submission --results_dir=$HOME/inference_3.0_results/valid_results \ ---device=cpu --submission_dir=$HOME/inference_submission_tree --clean --run-checker --submitter=cTuning ---adr.inference-src.version=master --hw_notes_extra="Result taken by NAME" --quiet -``` - - -## Tensorflow backend - -Same commands as for `onnxruntime` should work by replacing `backend=onnxruntime` with `--backend=tf`. For example, - -``` -cm run script --tags=generate-run-cmds,inference,_accuracy-only,_all-scenarios \ ---model=bert-99 --device=cpu --implementation=reference --backend=tf --execution-mode=valid \ ---results_dir=$HOME/inference_3.0_results --quiet -``` - -## Pytorch backend - -Same commands as for `onnxruntime` should work by replacing `backend=onnxruntime` with `--backend=pytorch`. For example, - -``` -cm run script --tags=generate-run-cmds,inference,_accuracy-only,_all-scenarios \ ---model=bert-99 --device=cpu --implementation=reference --backend=pytorch \ ---execution-mode=valid --results_dir=$HOME/inference_3.0_results --quiet -``` - diff --git a/challenge/optimize-mlperf-inference-v3.0-2023/docs/generate-resnet50-submission.md b/challenge/optimize-mlperf-inference-v3.0-2023/docs/generate-resnet50-submission.md deleted file mode 100644 index 6d6ba275fd..0000000000 --- a/challenge/optimize-mlperf-inference-v3.0-2023/docs/generate-resnet50-submission.md +++ /dev/null @@ -1,82 +0,0 @@ -## Setup -Please follow the MLCommons CK [installation guide](https://github.com/mlcommons/ck/blob/master/docs/installation.md) to install CM. -Download the ck repo to get the CM script for MLPerf submission - -``` -cm pull repo mlcommons@ck --checkout=681547519f4d9a8991d992d1300c90cfde06e9b9 -``` - -## Run Commands - -We need to get imagenet full dataset to make image-classification submissions for MLPerf inference. Since this dataset is not publicly available via a URL please follow the instructions given [here](https://github.com/mlcommons/ck/blob/master/cm-mlops/script/get-dataset-imagenet-val/README-extra.md) to download the dataset and register in CM. - -On edge category ResNet50 has Offline, SingleStream and MultiStream scenarios and in datacenter category it has Offline and Server scenarios. The below commands are assuming an edge category system. - -### Onnxruntime backend - -#### Do a test run to detect and record the system performance - -``` -cm run script --tags=generate-run-cmds,inference,_find-performance,_all-scenarios \ ---model=resnet50 --implementation=reference --device=cpu --backend=onnxruntime \ ---category=edge --division=open --quiet -``` -* Use `--device=cuda` to run the inference on Nvidia GPU -* Use `--division=closed` to run all scenarios for the closed division (compliance tests are skipped for `_find-performance` mode) -* Use `--category=datacenter` to run datacenter scenarios - -#### Do a full accuracy and performance runs for all the scenarios - -``` -cm run script --tags=generate-run-cmds,inference,_all-modes,_all-scenarios --model=resnet50 \ ---device=cpu --implementation=reference --backend=onnxruntime \ ---execution-mode=valid --results_dir=$HOME/inference_3.0_results \ ---category=edge --division=open --quiet -``` - -* Use `--power=yes` for measuring power. It is ignored for accuracy and compliance runs -* Use `--division=closed` to run all scenarios for the closed division including the compliance tests -* `--offline_target_qps`, `--server_target_qps`, `--singlestream_target_latency` and `multistream_target_latency` can be used to override the determined performance numbers - -#### Populate the README files -``` -cm run script --tags=generate-run-cmds,inference,_populate-readme,_all-scenarios \ ---model=resnet50 --device=cpu --implementation=reference --backend=onnxruntime \ ---execution-mode=valid --results_dir=$HOME/inference_3.0_results \ ---category=edge --division=open --quiet -``` - -#### Generate actual submission tree - -Here, we are copying the performance and accuracy log files (compliance logs also in the case of closed division) from the results directory to the submission tree following the [directory structure required by MLCommons Inference](https://github.com/mlcommons/policies/blob/master/submission_rules.adoc#inference-1). After the submission tree is generated, [accuracy truncate script](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/truncate-mlperf-inference-accuracy-log) is called to truncate accuracy logs and then the [submission checker](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/run-mlperf-inference-submission-checker) is called to validate the generated submission tree. - -We should use the master branch of MLCommons inference repo for the submission checker. You can use `--hw_note_extra` option to add your name to the notes. -``` -cm run script --tags=generate,inference,submission --results_dir=$HOME/inference_3.0_results/valid_results \ ---submission_dir=$HOME/inference_submission_tree --clean \ ---run-checker --submitter=cTuning --adr.inference-src.version=master \ ---hw_notes_extra="Result taken by NAME" --quiet -``` - - -## Tensorflow backend - -Same commands as for `onnxruntime` should work by replacing `backend=onnxruntime` with `--backend=tf`. For example, - -``` -cm run script --tags=generate-run-cmds,inference,_all-modes,_all-scenarios \ ---model=resnet50 --device=cpu --implementation=reference --backend=tf \ ---execution-mode=valid --results_dir=$HOME/inference_3.0_results \ ---category=edge --division=open --quiet -``` - -## TVM backend - -Same commands as for `onnxruntime` should work by replacing `backend=onnxruntime` with `--backend=tvm-onnx`. (Only `--device=cpu` is currently supported for TVM) For example, - -``` -cm run script --tags=generate-run-cmds,inference,_all-modes,_all-scenarios \ ---model=resnet50 --device=cpu --implementation=reference --backend=tvm-onnx \ ---execution-mode=valid --results_dir=$HOME/inference_3.0_results \ ---category=edge --division=open --quiet -``` diff --git a/challenge/optimize-mlperf-inference-v3.0-2023/docs/generate-retinanet-submission.md b/challenge/optimize-mlperf-inference-v3.0-2023/docs/generate-retinanet-submission.md deleted file mode 100644 index 4eedba9f31..0000000000 --- a/challenge/optimize-mlperf-inference-v3.0-2023/docs/generate-retinanet-submission.md +++ /dev/null @@ -1,67 +0,0 @@ -## Setup -Please follow the MLCommons CK [installation guide](https://github.com/mlcommons/ck/blob/master/docs/installation.md) to install CM. -Download the ck repo to get the CM script for MLPerf submission - -``` -cm pull repo mlcommons@ck --checkout=681547519f4d9a8991d992d1300c90cfde06e9b9 -``` - -## Run Commands - - -### Onnxruntime backend - -#### Do a test run to detect and record the system performance - -``` -cm run script --tags=generate-run-cmds,inference,_find-performance,_all-scenarios \ ---model=retinanet --implementation=reference --device=cpu --backend=onnxruntime \ ---category=edge --division=open --quiet -``` -* Use `--device=cuda` to run the inference on Nvidia GPU -* Use `--division=closed` to run all scenarios for the closed division (compliance tests are skipped for `_find-performance` mode) -* Use `--category=datacenter` to run datacenter scenarios - -#### Do a full accuracy and performance runs for all the scenarios - -``` -cm run script --tags=generate-run-cmds,inference,_all-modes,_all-scenarios \ ---model=retinanet --device=cpu --implementation=reference --backend=onnxruntime \ ---execution-mode=valid --results_dir=$HOME/inference_3.0_results \ ---category=edge --division=open --quiet -``` - -* Use `--power=yes` for measuring power. It is ignored for accuracy and compliance runs -* Use `--division=closed` to run all scenarios for the closed division including the compliance tests -* `--offline_target_qps`, `--server_target_qps`, `--singlestream_target_latency` and `multistream_target_latency` can be used to override the determined performance numbers - -#### Populate the README files -``` -cm run script --tags=generate-run-cmds,inference,_populate-readme,_all-scenarios \ ---model=retinanet --device=cpu --implementation=reference --backend=onnxruntime \ ---execution-mode=valid --results_dir=$HOME/inference_3.0_results \ ---category=edge --division=open --quiet -``` - -#### Generate actual submission tree - -Here, we are copying the performance and accuracy log files (compliance logs also in the case of closed division) from the results directory to the submission tree following the [directory structure required by MLCommons Inference](https://github.com/mlcommons/policies/blob/master/submission_rules.adoc#inference-1). After the submission tree is generated, [accuracy truncate script](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/truncate-mlperf-inference-accuracy-log) is called to truncate accuracy logs and then the [submission checker](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/run-mlperf-inference-submission-checker) is called to validate the generated submission tree. - -We should use the master branch of MLCommons inference repo for the submission checker. You can use `--hw_note_extra` option to add your name to the notes. -``` -cm run script --tags=generate,inference,submission --results_dir=$HOME/inference_3.0_results/valid_results \ ---device=cpu --submission_dir=$HOME/inference_submission_tree --clean --run-checker --submitter=cTuning ---adr.inference-src.version=master --hw_notes_extra="Result taken by NAME" --quiet -``` - - -## Pytorch backend - -Same commands as for `onnxruntime` should work by replacing `backend=onnxruntime` with `--backend=pytorch`. For example, - -``` -cm run script --tags=generate-run-cmds,inference,_all-modes,_all-scenarios \ ---model=retinanet --device=cpu --implementation=reference --backend=pytorch \ ---execution-mode=valid --results_dir=$HOME/inference_3.0_results \ ---category=edge --division=open --quiet -``` diff --git a/challenge/optimize-mlperf-inference-v3.0-2023/docs/generate-rnnt-submission.md b/challenge/optimize-mlperf-inference-v3.0-2023/docs/generate-rnnt-submission.md deleted file mode 100644 index d7191c808d..0000000000 --- a/challenge/optimize-mlperf-inference-v3.0-2023/docs/generate-rnnt-submission.md +++ /dev/null @@ -1,53 +0,0 @@ -## Setup -Please follow the MLCommons CK [installation guide](https://github.com/mlcommons/ck/blob/master/docs/installation.md) to install CM. -Download the ck repo to get the CM script for MLPerf submission - -``` -cm pull repo mlcommons@ck --checkout=681547519f4d9a8991d992d1300c90cfde06e9b9 -``` - -## Run Commands - -### TensorRT backend - -#### Do a test run to detect and record the system performance - -``` -cm run script --tags=generate-run-cmds,inference,_find-performance,_all-scenarios \ ---model=rnnt --implementation=nvidia-original --device=cuda --backend=tensorrt \ ---category=edge --division=open --quiet -``` -* Use `--category=datacenter` to run datacenter scenarios -* Use `--division=closed` to run all scenarios for the closed division (compliance tests are skipped for `_find-performance` mode) - -#### Do a full accuracy and performance runs for all the scenarios - -``` -cm run script --tags=generate-run-cmds,inference,_all-modes,_all-scenarios \ ---model=rnnt --device=cuda --implementation=nvidia-original --backend=tensorrt \ ---execution-mode=valid --results_dir=$HOME/inference_3.0_results \ ---category=edge --division=open --quiet -``` - -* Use `--power=yes` for measuring power. It is ignored for accuracy and compliance runs -* Use `--division=closed` to run all scenarios for the closed division including the compliance tests -* `--offline_target_qps`, `--server_target_qps` and `--singlestream_target_latency` can be used to override the determined performance numbers - -#### Populate the README files -``` -cm run script --tags=generate-run-cmds,inference,_populate-readme,_all-scenarios \ ---model=rnnt --device=cuda --implementation=nvidia-original --backend=tensorrt \ ---execution-mode=valid --results_dir=$HOME/inference_3.0_results \ ---category=edge --division=open --quiet -``` - -#### Generate actual submission tree - -Here, we are copying the performance and accuracy log files (compliance logs also in the case of closed division) from the results directory to the submission tree following the [directory structure required by MLCommons Inference](https://github.com/mlcommons/policies/blob/master/submission_rules.adoc#inference-1). After the submission tree is generated, [accuracy truncate script](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/truncate-mlperf-inference-accuracy-log) is called to truncate accuracy logs and then the [submission checker](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/run-mlperf-inference-submission-checker) is called to validate the generated submission tree. - -We should use the master branch of MLCommons inference repo for the submission checker. You can use `--hw_note_extra` option to add your name to the notes. -``` -cm run script --tags=generate,inference,submission --results_dir=$HOME/inference_3.0_results/valid_results \ ---device=cpu --submission_dir=$HOME/inference_submission_tree --clean --run-checker --submitter=cTuning ---adr.inference-src.version=master --hw_notes_extra="Result taken by NAME" --quiet -``` diff --git a/challenge/optimize-mlperf-inference-v3.0-2023/docs/setup-aws-instance.md b/challenge/optimize-mlperf-inference-v3.0-2023/docs/setup-aws-instance.md deleted file mode 100644 index e1691c21ac..0000000000 --- a/challenge/optimize-mlperf-inference-v3.0-2023/docs/setup-aws-instance.md +++ /dev/null @@ -1,48 +0,0 @@ -The below instructions are for creating an AWS instance from the CLI. You can also create an instance via web and setup CM on it. - -## Prerequisites - -1. AWS Key, secret and token -2. `*.pem` ssh key file to be used to create the instance (public key from here will be copied to the `$HOME/.ssh/authorized_keys` file in the created instance) - -## Run Commands - -We need to get imagenet full dataset to make image-classification submissions for MLPerf inference. Since this dataset is not publicly available via a URL please follow the instructions given [here](https://github.com/mlcommons/ck/blob/master/cm-mlops/script/get-dataset-imagenet-val/README-extra.md) to download the dataset and register in CM. - -### Update Access Details - -``` -cd $HOME/CM/repos/mlcommon@ck/cm-mlops/script/run-terraform/aws/ -cp credentials.example credentials.sh -``` -Update `credentials.sh` with your AWS Key, Secret and Token - -### Create an AWS Instance - - -``` -cm run script --tags=run,terraform,_m7g.xlarge,_storage_size.500,_ubuntu.2204,_us-west-2 \ ---cminit --key_file=$HOME/cmuser.pem -``` - -The above command will output the IP of the created instance which will be having CM setup already done. - -`_m7g.xlarge,_storage_size.500,_ubuntu.2204` variations can be changed to launch a different instance. Below are the variation combinations we used for MLPerf inference 3.0 submissions. - -* `_g4dn.xlarge` -* `_a1.2xlarge,_storage_size.130,_ubuntu.2204` -* `_c5.4xlarge,_storage_size.130,_ubuntu.2204` -* `_m7g.2xlarge,_storage_size.500,_ubuntu.2204` -* `_inf1.2xlarge,_storage_size.500,_amazon-linux-2-kernel.510` -* `_t2.medium,_storage_size.200,_rhel.9` - -### Copy the needed files from the local machine - -Copy the imagenet dataset to the created instance. For example, - -``` -rsync -avz -e 'ssh -i $HOME/cmuser.pem' $HOME/imagenet-2012-val/ ubuntu@54.189.93.134: -``` -For using [nvidia-original implementation](https://github.com/mlcommons/ck/tree/main/cm-mlops/script/reproduce-mlperf-inference-nvidia) tar files for cuDNN and TensorRT are needed to be downloaded locally from Nvidia website and copied to the AWS instance similar to the above command. - -Once all the required files are copied over, login to the instance and follow the individual benchmark instructions from the README files given [here](./) diff --git a/challenge/optimize-mlperf-inference-v3.0-2023/docs/setup-gcp-instance.md b/challenge/optimize-mlperf-inference-v3.0-2023/docs/setup-gcp-instance.md deleted file mode 100644 index 6bd16556a3..0000000000 --- a/challenge/optimize-mlperf-inference-v3.0-2023/docs/setup-gcp-instance.md +++ /dev/null @@ -1,35 +0,0 @@ -The below instructions are for creating a Google Cloud instance from the CLI. You can also create an instance via web and setup CM on it. - -## Prerequisites - -Please follow the authentication instructions given [here](https://github.com/ctuning/mlcommons-ck/blob/master/cm-mlops/script/run-terraform/README-about.md). - - -## Run Commands - -We need to get imagenet full dataset to make image-classification submissions for MLPerf inference. Since this dataset is not publicly available via a URL please follow the instructions given [here](https://github.com/mlcommons/ck/blob/master/cm-mlops/script/get-dataset-imagenet-val/README-extra.md) to download the dataset and register in CM. - - -### Create a GCP Instance - - -``` -cm run script --tags=run,terraform,_gcp,_n1-highmem.4,_gcp_project.mlperf-inference-tests --cminit -``` - -The above command will output the IP of the created instance which will be having CM setup already done. - -`_n1-highmem.4` variation can be changed to launch a different instance. Below are the variation combinations we used for MLPerf inference 3.0 submissions. - -* `_n1-standard.4` - -### Copy the needed files - -Copy the imagenet dataset to the created instance. For example, - -``` -rsync -avz -e 'ssh -i $HOME/cmuser.pem' $HOME/imagenet-2012-val/ ubuntu@54.189.93.134: -``` -For using [nvidia-original implementation](https://github.com/mlcommons/ck/tree/main/cm-mlops/script/reproduce-mlperf-inference-nvidia) tar files for cuDNN and TensorRT are needed to be downloaded locally from Nvidia website and copied to the AWS instance similar to the above command. - -Once all the required files are copied over, login to the instance and follow the individual benchmark instructions from the README files given [here](./) diff --git a/challenge/optimize-mlperf-inference-v3.0-2023/docs/setup-nvidia-jetson-orin.md b/challenge/optimize-mlperf-inference-v3.0-2023/docs/setup-nvidia-jetson-orin.md deleted file mode 100644 index 68db00ea0e..0000000000 --- a/challenge/optimize-mlperf-inference-v3.0-2023/docs/setup-nvidia-jetson-orin.md +++ /dev/null @@ -1,53 +0,0 @@ -## Setup -We used Nvidia Jetson AGX Orin developer kit with 32GB RAM and 64GB eMMC. We also connected a 500GB SSD disk via USB and Wifi connection for internet connectivity. - -We used the out of the box developer kit image which was running Ubuntu 20.04 and JetPack 5.0.1 Developer Preview (L4T 34.1.1) with CUDA 11.4. We were also using the default 4k page size (Nvidia recommends 64k for MLPerf inference). - -[cuDNN 8.6.0](https://developer.nvidia.com/compute/cudnn/secure/8.6.0/local_installers/11.8/cudnn-local-repo-ubuntu2004-8.6.0.163_1.0-1_arm64.deb) and [TensorRT 8.5.2.2](https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/secure/8.5.3/local_repos/nv-tensorrt-local-repo-ubuntu2004-8.5.3-cuda-11.8_1.0-1_arm64.deb) were downloaded as Debian packages on a host machine, copied over to Nvidia Jetson Orin and installed. - - -We need to get imagenet full dataset to make image-classification submissions for MLPerf inference. Since this dataset is not publicly available via a URL please follow the instructions given [here](https://github.com/mlcommons/ck/blob/master/cm-mlops/script/get-dataset-imagenet-val/README-extra.md) to download the dataset. - -### Copy the needed files from a host machine - -Copy the imagenet dataset to the created instance. For example, - -``` -rsync -avz $HOME/imagenet-2012-val/ user@192.168.0.27: -``` - -Login to Orin and register the imagenet dataset as -``` -cm run script --tags=get,imagenet,dataset,_2012,_full --input=$HOME/imagenet-2012-val -``` - -Once all the required files are copied over, follow the individual benchmark instructions from the README files given [here](./) All the required dependencies should be resolved by CM. - -### Power Measurement Setup - -We were measuring power in the peak performance mode (MaxN) except for one SUT where the energy efficiency mode was changed to Max15. Our aim was to showcase the out of the box performance of Nvidia Jetson AGX Orin including the power usage. - -## Reproducing the Nvidia Jetson AGX Orin Submission - -After our submission we followed the instructions from Nvidia in the inference v3.0 repository and tried to reproduce the numbers from Nvidia. For MaxN mode we were able to match the numbers by Nvidia using same versions of CUDA, cuDNN and TensorRT but outside of docker. For MaxQ mode, we could get the same performance as Nvidia but our power usage was about 5W higher. - -### Performance results MaxN - -The below table shows the performance comparison of our results under different settings and the Nvidia submission for MLPerf inference 3.0. We'll be updating our instructions for easier reproducibility of these numbers including CM scripts for flashing the L4T image and rebuilding the kernel for 64k pagesize. - - -| Workload | Results | L4T | PAGESIZE | Power Mode | FAN Dynamic Speed control | Offline Accuracy | Offline Performance | SingleStream Accuracy | SingleStream Performance | MultiStream Accuracy | MultiStream Performance | -| --------- | --------------------------------- | ----- | -------- | ---------- | ------------------------- | ---------------- | ------------------- | --------------------- | ------------------------ | -------------------- | ----------------------- | -| ResNet50 | Nvidia Submitted (docker) | r35.3 | 64k | MaxN | active | 75.934 | 6438.1 | 76.032 | 0.633479 | 76.032 | 2.187731 | -| ResNet50 | cTuning Submitted | r34.1.1 | 4k | MaxN | active | 75.934 | 4697 | 76.032 | 0.72 | 76.032 | 2.57 | -| ResNet50 | MLCommons taskforce on reproducibility | r35.2.1 | 4k | MaxN | active | 75.85 | 6172 | 76.056 | 0.644 | 76.056 | 2.074 | -| ResNet50 | MLCommons taskforce on reproducibility | r35.3 | 64k | MaxN | active | 75.85 | 6430 | 76.056 | 0.659 | 76.056 | 2.20 | -| RetinaNet | Nvidia Submitted (docker) | r35.3 | x | MaxN | active | 37.372 | 92.4048 | 37.403 | 13.924457 | 37.519 | 104.680313 | -| RetinaNet | MLCommons taskforce on reproducibility | r35.2.1 | 4k | MaxN | active | 37.346 | 80.0854 (no DLA) | 37.350 | 14,19 | 37.409 | 105.344828 | -| RetinaNet | MLCommons taskforce on reproducibility | r35.3 | 64k | MaxN | active | 37.345 | 94.6886 | 37.340 | 14.073 | 37.488 | 103.8 | -| BERT | Nvidia Submitted (docker) | r35.3 | x | MaxN | active | 90.552 | 544.243 | 90.344 | 5.635431 | NA | NA | -| BERT | cTuning Submitted | r34.1.1 | 4k | MaxN | active | 90.552 | 449.96 | 90.344 | 7.8 | NA | NA | -| BERT | MLCommons taskforce on reproducibility | r35.2.1 | 4k | MaxN | active | 90.562 | 527 (128 batchsize) | 90.311 | 6.636 | NA | NA | -| BERT | MLCommons taskforce on reproducibility | r35.3 | 64k | MaxN | active | 90.552 | 539 | 90.344 | 6.31 | NA | NA | - - diff --git a/challenge/optimize-mlperf-inference-v3.1-2023/README.md b/challenge/optimize-mlperf-inference-v3.1-2023/README.md deleted file mode 100644 index 6362f3eb66..0000000000 --- a/challenge/optimize-mlperf-inference-v3.1-2023/README.md +++ /dev/null @@ -1,83 +0,0 @@ -### Introduction - -Our goal is to help the community benchmark and optimize various AI/ML applications -across diverse software and hardware provided by volunteers similar to SETI@home! - -Open-source [MLPerf inference benchmarks](https://arxiv.org/abs/1911.02549) -were developed by a [consortium of 50+ companies and universities (MLCommons)](https://mlcommons.org) -to enable trustable and reproducible comparison of AI/ML systems -in terms of latency, throughput, power consumption, accuracy and other metrics -across diverse software/hardware stacks from different vendors. - -However, running MLPerf inference benchmarks and submitting results [turned out to be a challenge](https://doi.org/10.5281/zenodo.8144274) -even for experts and could easily take many weeks to prepare. That's why [MLCommons](https://mlcommons.org), -[cTuning.org](https://www.linkedin.com/company/ctuning-foundation) -and [cKnowledge.org](https://www.linkedin.com/company/cknowledge) -decided to develop an open-source, technology-agnostic -and non-intrusive [Collective Mind automation language (CM)](https://github.com/mlcommons/ck) -and [Collective Knowledge Playground (CK)](https://access.cknowledge.org/playground/?action=experiments) -to help anyone run, reproduce, optimize and compare MLPerf inference benchmarks out-of-the-box -across diverse software, hardware, models and data sets. - -You can read more about our vision, open-source technology and future plans -in this [presentation](https://doi.org/10.5281/zenodo.8105339). - - - -### Advanced challenge - -We would like to ask volunteers run various MLPerf inference benchmarks -on diverse CPUs (Intel, AMD, Arm) and Nvidia GPUs similar to SETI@home -across different framework (ONNX, PyTorch, TF, TFLite) -either natively or in a cloud (AWS, Azure, GCP, Alibaba, Oracle, OVHcloud, ...) -and submit results to MLPerf inference v3.1. - -However, since some benchmarks may take 1..2 days to run, we suggest to start in the following order (these links describe CM commands to run benchmarks and submit results): -* [CPU: Reference implementation of Image Classification with ResNet50 (open and then closed division)](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/resnet50/README_reference.md) -* [CPU: TFLite C++ implementation of Image classification with variations of MobileNets and EfficientNets (open division)](https://github.com/mlcommons/ck/blob/master/cm-mlops/script/run-mlperf-inference-mobilenet-models/README-about.md) -* [Nvidia GPU: Nvidia optimized implementation of Image Classification with ResNet50 (open and then closed division)](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/resnet50/README_nvidia.md) -* [Nvidia GPU: Nvidia optimized implementation of Language processing with BERT large (open and then closed division)](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/bert/README_nvidia.md) -* [Nvidia GPU: Reference implementation of Image Classification with ResNet50 (open and then closed division)](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/bert/README_nvidia.md) -* [Nvidia GPU: Reference implementation of Language processing with BERT large (open and then closed division)](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/resnet50/README_reference.md) -* [Nvidia GPU (24GB of memory min): Reference implementation of Language processing with GPT-J 6B (open)](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/gpt-j/README_reference.md) -* [Nvidia GPU: Nvidia optimized implementation of all other models (open and closed division)](https://github.com/ctuning/mlcommons-ck/blob/master/docs/mlperf/inference/README.md#run-benchmarks-and-submit-results) - -Please read [this documentation](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/README.md) -to set up and run above benchmarks using CM. - -You can register your participation for the [Collective Knowledge leaderboard]( https://access.cKnowledge.org/playground/?action=contributors ) -using this [guide](https://github.com/mlcommons/ck/blob/master/platform/register.md). - -Please report encountered problems using [GitHub issues](https://github.com/mlcommons/ck/issues) -to help the community -improve the portability of the CM automation for MLPerf and other benchmarks and projects. - -Looking forward to your submissions and happy hacking! - - - -### Prizes - -* *All submitters will receive 1 point for submitting valid results for 1 complete benchmark on one system.* -* *All submitters will receive an official MLCommons Collective Knowledge contributor award (see [this example](https://ctuning.org/awards/ck-award-202307-zhu.pdf)).* -* *The top contributors will receive cash prizes from [MLCommons organizations](https://mlcommons.org) and [cKnowledge.org](https://www.linkedin.com/company/cknowledge)*. - - -### Organizers - -* [MLCommons](https://cKnowledge.org/mlcommons-taskforce) -* [cTuning.org](https://www.linkedin.com/company/ctuning-foundation) -* [cKnowledge.org](https://www.linkedin.com/company/cknowledge) - - -### Status - -You can see shared results in [this repostiory](https://github.com/ctuning/mlperf_inference_submissions_v3.1) -with PRs from participants [here](https://github.com/ctuning/mlperf_inference_submissions_v3.1/pulls). - -### Results - -All accepted results will be publicly available in the CM format with derived metrics -in this [MLCommons repository](https://github.com/mlcommons/cm4mlperf-results), -in [MLCommons Collective Knowledge explorer](https://access.cknowledge.org/playground/?action=experiments) -and at official [MLCommons website](https://mlcommons.org). diff --git a/challenge/optimize-mlperf-inference-v3.1-2023/_cm.json b/challenge/optimize-mlperf-inference-v3.1-2023/_cm.json deleted file mode 100644 index a30c26c928..0000000000 --- a/challenge/optimize-mlperf-inference-v3.1-2023/_cm.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "alias": "optimize-mlperf-inference-v3.1-2023", - "automation_alias": "challenge", - "automation_uid": "3d84abd768f34e08", - "date_close": "20230817", - "date_open": "20230704", - "experiments": [], - "points": 1, - "sort": -10, - "tags": [ - "modularize", - "optimize", - "reproduce", - "replicate", - "benchmark", - "automate", - "mlperf", - "mlperf-inference", - "mlperf-inference-v3.1", - "mlperf-inference-v3.1-2023", - "v3.1" - ], - "title": "Crowd-benchmark all MLPerf inference benchmarks similar to SETI@home (latency, throughput, power consumption, accuracy, costs)", - "trophies": true, - "uid": "3e971d8089014d1f" -} diff --git a/challenge/optimize-mlperf-inference-v3.1-2023/docs/generate-3d-unet-submission.md b/challenge/optimize-mlperf-inference-v3.1-2023/docs/generate-3d-unet-submission.md deleted file mode 100644 index 9806c22647..0000000000 --- a/challenge/optimize-mlperf-inference-v3.1-2023/docs/generate-3d-unet-submission.md +++ /dev/null @@ -1,67 +0,0 @@ -## Setup - -Please follow this [installation guide](https://github.com/mlcommons/ck/blob/master/docs/installation.md) -to install the MLCommons CM reproducibility and automation language in your native environment or Docker container. - -Then install the repository with CM automation scripts to run MLPerf benchmarks out-of-the-box -across different software, hardware, models and data sets: - - -``` -cm pull repo mlcommons@ck -``` - -Note that you can install Python virtual environment via CM to avoid contaminating -your local Python installation as described [here](https://github.com/mlcommons/ck/blob/master/cm-mlops/automation/script/README-extra.md#using-python-virtual-environments). - -## Run Commands - -3d-unet has two variants - `3d-unet-99` and `3d-unet-99.9` where the `99` and `99.9` specifies the required accuracy constraint with respect to the reference floating point model. Both models can be submitter under edge as well as datacenter category. - -Since 3d-unet is one of the slowest running model, we are only running it using nvidia-implementation where the model is quantized and run on TensorRT backend on Nvidia GPU. - -For `3d-unet-99.9` runs, simply replace `3d-unet-99` with `3d-unet-99.9`. - -### TensorRT backend - -#### Do a test run to detect and record the system performance - -``` -cm run script --tags=generate-run-cmds,inference,_find-performance,_all-scenarios \ ---model=3d-unet-99 --implementation=nvidia-original --device=cuda --backend=tensorrt \ ---category=edge --division=open --quiet -``` -* Use `--category=datacenter` to run datacenter scenarios -* Use `--division=closed` to run all scenarios for the closed division (compliance tests are skipped for `_find-performance` mode) - -#### Do a full accuracy and performance runs for all the scenarios - -``` -cm run script --tags=generate-run-cmds,inference,_all-modes,_all-scenarios \ ---model=3d-unet-99 --device=cuda --implementation=nvidia-original --backend=tensorrt \ ---execution-mode=valid --results_dir=$HOME/inference_3.1_results \ ---category=edge --division=open --quiet -``` - -* Use `--power=yes` for measuring power. It is ignored for accuracy and compliance runs -* Use `--division=closed` to run all scenarios for the closed division including the compliance tests -* `--offline_target_qps`, `--server_target_qps` and `--singlestream_target_latency` can be used to override the determined performance numbers - -#### Populate the README files -``` -cm run script --tags=generate-run-cmds,inference,_populate-readme,_all-scenarios \ ---model=3d-unet-99 --device=cuda --implementation=nvidia-original --backend=tensorrt \ ---execution-mode=valid --results_dir=$HOME/inference_3.1_results \ ---category=edge --division=open --quiet -``` - -#### Generate actual submission tree - -Here, we are copying the performance and accuracy log files (compliance logs also in the case of closed division) from the results directory to the submission tree following the [directory structure required by MLCommons Inference](https://github.com/mlcommons/policies/blob/master/submission_rules.adoc#inference-1). After the submission tree is generated, [accuracy truncate script](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/truncate-mlperf-inference-accuracy-log) is called to truncate accuracy logs and then the [submission checker](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/run-mlperf-inference-submission-checker) is called to validate the generated submission tree. - -We should use the master branch of MLCommons inference repo for the submission checker. You can use `--hw_note_extra` option to add your name to the notes. -``` -cm run script --tags=generate,inference,submission --results_dir=$HOME/inference_3.1_results/valid_results \ ---device=cpu --submission_dir=$HOME/inference_submission_tree --clean --run-checker --submitter=cTuning ---adr.inference-src.version=master --hw_notes_extra="Result taken by NAME" --quiet -``` diff --git a/challenge/optimize-mlperf-inference-v3.1-2023/docs/generate-bert-submission.md b/challenge/optimize-mlperf-inference-v3.1-2023/docs/generate-bert-submission.md deleted file mode 100644 index c43363c1e9..0000000000 --- a/challenge/optimize-mlperf-inference-v3.1-2023/docs/generate-bert-submission.md +++ /dev/null @@ -1,113 +0,0 @@ -## Setup - -Please follow this [installation guide](https://github.com/mlcommons/ck/blob/master/docs/installation.md) -to install the MLCommons CM reproducibility and automation language in your native environment or Docker container. - -Then install the repository with CM automation scripts to run MLPerf benchmarks out-of-the-box -across different software, hardware, models and data sets: - - -``` -cm pull repo mlcommons@ck -``` - -Note that you can install Python virtual environment via CM to avoid contaminating -your local Python installation as described [here](https://github.com/mlcommons/ck/blob/master/cm-mlops/automation/script/README-extra.md#using-python-virtual-environments). - -## Run Commands - -Bert has two variants - `bert-99` and `bert-99.9` where the `99` and `99.9` specifies the required accuracy constraint with respect to the reference floating point model. `bert-99.9` model is applicable only on a datacenter system. - -On edge category `bert-99` has Offline and SingleStream scenarios and in datacenter category both `bert-99` and `bert-99.9` have Offline and Server scenarios. The below commands are assuming an edge category system. - -### Onnxruntime backend (Reference implementation) - -#### Do a test run to detect and record the system performance - -``` -cm run script --tags=generate-run-cmds,inference,_find-performance,_all-scenarios \ ---model=bert-99 --implementation=reference --device=cpu --backend=onnxruntime \ ---category=edge --division=open --quiet -``` -* Use `--device=cuda` to run the inference on Nvidia GPU -* Use `--division=closed` to run all scenarios for the closed division including the compliance tests -* Use `--category=datacenter` to run datacenter scenarios - -#### Do a full accuracy and performance runs for all the scenarios - -``` -cm run script --tags=generate-run-cmds,inference,_all-modes,_all-scenarios \ ---model=bert-99 --device=cpu --implementation=reference --backend=onnxruntime \ ---execution-mode=valid --results_dir=$HOME/inference_3.1_results \ ---category=edge --division=open --quiet -``` - -* Use `--power=yes` for measuring power. It is ignored for accuracy and compliance runs. This requires a power analyzer as described [here](https://github.com/ctuning/mlcommons-ck/blob/master/docs/tutorials/mlperf-inference-power-measurement.md) -* Use `--division=closed` to run all scenarios for the closed division including the compliance tests -* `--offline_target_qps`, `--server_target_qps` and `--singlestream_target_latency` can be used to override the determined performance numbers - -#### Populate the README files -``` -cm run script --tags=generate-run-cmds,inference,_populate-readme,_all-scenarios \ ---model=bert-99 --device=cpu --implementation=reference --backend=onnxruntime \ ---execution-mode=valid --results_dir=$HOME/inference_3.1_results \ ---category=edge --division=open --quiet -``` - -#### Generate actual submission tree - -Here, we are copying the performance and accuracy log files (compliance logs also in the case of closed division) from the results directory to the submission tree following the [directory structure required by MLCommons Inference](https://github.com/mlcommons/policies/blob/master/submission_rules.adoc#inference-1). After the submission tree is generated, [accuracy truncate script](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/truncate-mlperf-inference-accuracy-log) is called to truncate accuracy logs and then the [submission checker](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/run-mlperf-inference-submission-checker) is called to validate the generated submission tree. - -We should use the master branch of MLCommons inference repo for the submission checker. You can use `--hw_note_extra` option to add your name to the notes. -``` -cm run script --tags=generate,inference,submission --results_dir=$HOME/inference_3.1_results/valid_results \ ---device=cpu --submission_dir=$HOME/inference_submission_tree --clean --run-checker --submitter=cTuning ---adr.inference-src.version=master --hw_notes_extra="Result taken by NAME" --quiet -``` - - -## Tensorflow backend (Reference implementation) - -Same commands as for `onnxruntime` should work by replacing `backend=onnxruntime` with `--backend=tf`. For example, - -``` -cm run script --tags=generate-run-cmds,inference,_accuracy-only,_all-scenarios \ ---model=bert-99 --device=cpu --implementation=reference --backend=tf --execution-mode=valid \ ---results_dir=$HOME/inference_3.1_results --quiet -``` - -## Pytorch backend (Reference implementation) - -Same commands as for `onnxruntime` should work by replacing `backend=onnxruntime` with `--backend=pytorch`. For example, - -``` -cm run script --tags=generate-run-cmds,inference,_accuracy-only,_all-scenarios \ ---model=bert-99 --device=cpu --implementation=reference --backend=pytorch \ ---execution-mode=valid --results_dir=$HOME/inference_3.1_results --quiet -``` - -## TensorRT backend (Nvidia implementation) - -For TensorRT backend we are using the [Nvidia implementation](https://github.com/ctuning/mlcommons-ck/tree/master/cm-mlops/script/reproduce-mlperf-inference-nvidia) and not the [MLPerf inference reference implementation](https://github.com/ctuning/mlcommons-ck/tree/master/cm-mlops/script/app-mlperf-inference-reference) for the below reasons -* TensorRT backend is not supported by default in the reference implementation -* Reference implemnetation is mostly for fp32 models and quantization is not suppoted by default -* Nvidia has done some fantastic work in optimizing performance for TensorRT backend - -To get setup please follow the instructions [here](https://github.com/mlcommons/ck/blob/master/cm-mlops/script/reproduce-mlperf-inference-nvidia/README-about.md) to download and install TensorRT and cuDNN unless you already have them installed. This readme also details how to handle the configuration files which are automatically generated by the Nvidia implementation scripts. Once this is done, the following command will run all the modes and scenarios. - -``` -cm run script --tags=generate-run-cmds,inference,_all-modes,_all-scenarios \ ---model=bert-99 --device=cuda --implementation=nvidia-original --backend=tensorrt \ ---execution-mode=valid --results_dir=$HOME/inference_3.1_results --quiet -``` - -* Use `--power=yes` for measuring power. It is ignored for accuracy and compliance runs. This requires a power analyzer as described [here](https://github.com/ctuning/mlcommons-ck/blob/master/docs/tutorials/mlperf-inference-power-measurement.md) -* Use `--division=closed` to run all scenarios for the closed division including the compliance tests -* `--offline_target_qps`, `--server_target_qps` and `--singlestream_target_latency` can be used to override the default performance numbers -* Use `--division=closed` to run all scenarios for the closed division including the compliance tests -* Use `--category=datacenter` to run datacenter scenarios - - -TensorRT backend has an engine generation stage which can be time consuming. For repeated runs `--adr.nvidia-harness.make_cmd=run_harness` option will avoid this engine regeneration and reuse the previously generated one. - - diff --git a/challenge/optimize-mlperf-inference-v3.1-2023/docs/generate-resnet50-submission.md b/challenge/optimize-mlperf-inference-v3.1-2023/docs/generate-resnet50-submission.md deleted file mode 100644 index 470930e373..0000000000 --- a/challenge/optimize-mlperf-inference-v3.1-2023/docs/generate-resnet50-submission.md +++ /dev/null @@ -1,90 +0,0 @@ -## Setup - -Please follow this [installation guide](https://github.com/mlcommons/ck/blob/master/docs/installation.md) -to install the MLCommons CM reproducibility and automation language in your native environment or Docker container. - -Then install the repository with CM automation scripts to run MLPerf benchmarks out-of-the-box -across different software, hardware, models and data sets: - - -``` -cm pull repo mlcommons@ck -``` - -Note that you can install Python virtual environment via CM to avoid contaminating -your local Python installation as described [here](https://github.com/mlcommons/ck/blob/master/cm-mlops/automation/script/README-extra.md#using-python-virtual-environments). - -## Run Commands - -We need to get imagenet full dataset to make image-classification submissions for MLPerf inference. Since this dataset is not publicly available via a URL please follow the instructions given [here](https://github.com/mlcommons/ck/blob/master/cm-mlops/script/get-dataset-imagenet-val/README-extra.md) to download the dataset and register in CM. - -On edge category ResNet50 has Offline, SingleStream and MultiStream scenarios and in datacenter category it has Offline and Server scenarios. The below commands are assuming an edge category system. - -### Onnxruntime backend - -#### Do a test run to detect and record the system performance - -``` -cm run script --tags=generate-run-cmds,inference,_find-performance,_all-scenarios \ ---model=resnet50 --implementation=reference --device=cpu --backend=onnxruntime \ ---category=edge --division=open --quiet -``` -* Use `--device=cuda` to run the inference on Nvidia GPU -* Use `--division=closed` to run all scenarios for the closed division (compliance tests are skipped for `_find-performance` mode) -* Use `--category=datacenter` to run datacenter scenarios - -#### Do a full accuracy and performance runs for all the scenarios - -``` -cm run script --tags=generate-run-cmds,inference,_all-modes,_all-scenarios --model=resnet50 \ ---device=cpu --implementation=reference --backend=onnxruntime \ ---execution-mode=valid --results_dir=$HOME/inference_3.1_results \ ---category=edge --division=open --quiet -``` - -* Use `--power=yes` for measuring power. It is ignored for accuracy and compliance runs -* Use `--division=closed` to run all scenarios for the closed division including the compliance tests -* `--offline_target_qps`, `--server_target_qps`, `--singlestream_target_latency` and `multistream_target_latency` can be used to override the determined performance numbers - -#### Populate the README files -``` -cm run script --tags=generate-run-cmds,inference,_populate-readme,_all-scenarios \ ---model=resnet50 --device=cpu --implementation=reference --backend=onnxruntime \ ---execution-mode=valid --results_dir=$HOME/inference_3.1_results \ ---category=edge --division=open --quiet -``` - -#### Generate actual submission tree - -Here, we are copying the performance and accuracy log files (compliance logs also in the case of closed division) from the results directory to the submission tree following the [directory structure required by MLCommons Inference](https://github.com/mlcommons/policies/blob/master/submission_rules.adoc#inference-1). After the submission tree is generated, [accuracy truncate script](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/truncate-mlperf-inference-accuracy-log) is called to truncate accuracy logs and then the [submission checker](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/run-mlperf-inference-submission-checker) is called to validate the generated submission tree. - -We should use the master branch of MLCommons inference repo for the submission checker. You can use `--hw_note_extra` option to add your name to the notes. -``` -cm run script --tags=generate,inference,submission --results_dir=$HOME/inference_3.1_results/valid_results \ ---submission_dir=$HOME/inference_submission_tree --clean \ ---run-checker --submitter=cTuning --adr.inference-src.version=master \ ---hw_notes_extra="Result taken by NAME" --quiet -``` - - -## Tensorflow backend - -Same commands as for `onnxruntime` should work by replacing `backend=onnxruntime` with `--backend=tf`. For example, - -``` -cm run script --tags=generate-run-cmds,inference,_all-modes,_all-scenarios \ ---model=resnet50 --device=cpu --implementation=reference --backend=tf \ ---execution-mode=valid --results_dir=$HOME/inference_3.1_results \ ---category=edge --division=open --quiet -``` - -## TVM backend - -Same commands as for `onnxruntime` should work by replacing `backend=onnxruntime` with `--backend=tvm-onnx`. (Only `--device=cpu` is currently supported for TVM) For example, - -``` -cm run script --tags=generate-run-cmds,inference,_all-modes,_all-scenarios \ ---model=resnet50 --device=cpu --implementation=reference --backend=tvm-onnx \ ---execution-mode=valid --results_dir=$HOME/inference_3.1_results \ ---category=edge --division=open --quiet -``` diff --git a/challenge/optimize-mlperf-inference-v3.1-2023/docs/generate-retinanet-submission.md b/challenge/optimize-mlperf-inference-v3.1-2023/docs/generate-retinanet-submission.md deleted file mode 100644 index 4420462cde..0000000000 --- a/challenge/optimize-mlperf-inference-v3.1-2023/docs/generate-retinanet-submission.md +++ /dev/null @@ -1,75 +0,0 @@ -## Setup - -Please follow this [installation guide](https://github.com/mlcommons/ck/blob/master/docs/installation.md) -to install the MLCommons CM reproducibility and automation language in your native environment or Docker container. - -Then install the repository with CM automation scripts to run MLPerf benchmarks out-of-the-box -across different software, hardware, models and data sets: - - -``` -cm pull repo mlcommons@ck -``` - -Note that you can install Python virtual environment via CM to avoid contaminating -your local Python installation as described [here](https://github.com/mlcommons/ck/blob/master/cm-mlops/automation/script/README-extra.md#using-python-virtual-environments). - -## Run Commands - - -### Onnxruntime backend - -#### Do a test run to detect and record the system performance - -``` -cm run script --tags=generate-run-cmds,inference,_find-performance,_all-scenarios \ ---model=retinanet --implementation=reference --device=cpu --backend=onnxruntime \ ---category=edge --division=open --quiet -``` -* Use `--device=cuda` to run the inference on Nvidia GPU -* Use `--division=closed` to run all scenarios for the closed division (compliance tests are skipped for `_find-performance` mode) -* Use `--category=datacenter` to run datacenter scenarios - -#### Do a full accuracy and performance runs for all the scenarios - -``` -cm run script --tags=generate-run-cmds,inference,_all-modes,_all-scenarios \ ---model=retinanet --device=cpu --implementation=reference --backend=onnxruntime \ ---execution-mode=valid --results_dir=$HOME/inference_3.1_results \ ---category=edge --division=open --quiet -``` - -* Use `--power=yes` for measuring power. It is ignored for accuracy and compliance runs -* Use `--division=closed` to run all scenarios for the closed division including the compliance tests -* `--offline_target_qps`, `--server_target_qps`, `--singlestream_target_latency` and `multistream_target_latency` can be used to override the determined performance numbers - -#### Populate the README files -``` -cm run script --tags=generate-run-cmds,inference,_populate-readme,_all-scenarios \ ---model=retinanet --device=cpu --implementation=reference --backend=onnxruntime \ ---execution-mode=valid --results_dir=$HOME/inference_3.1_results \ ---category=edge --division=open --quiet -``` - -#### Generate actual submission tree - -Here, we are copying the performance and accuracy log files (compliance logs also in the case of closed division) from the results directory to the submission tree following the [directory structure required by MLCommons Inference](https://github.com/mlcommons/policies/blob/master/submission_rules.adoc#inference-1). After the submission tree is generated, [accuracy truncate script](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/truncate-mlperf-inference-accuracy-log) is called to truncate accuracy logs and then the [submission checker](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/run-mlperf-inference-submission-checker) is called to validate the generated submission tree. - -We should use the master branch of MLCommons inference repo for the submission checker. You can use `--hw_note_extra` option to add your name to the notes. -``` -cm run script --tags=generate,inference,submission --results_dir=$HOME/inference_3.1_results/valid_results \ ---device=cpu --submission_dir=$HOME/inference_submission_tree --clean --run-checker --submitter=cTuning ---adr.inference-src.version=master --hw_notes_extra="Result taken by NAME" --quiet -``` - - -## Pytorch backend - -Same commands as for `onnxruntime` should work by replacing `backend=onnxruntime` with `--backend=pytorch`. For example, - -``` -cm run script --tags=generate-run-cmds,inference,_all-modes,_all-scenarios \ ---model=retinanet --device=cpu --implementation=reference --backend=pytorch \ ---execution-mode=valid --results_dir=$HOME/inference_3.1_results \ ---category=edge --division=open --quiet -``` diff --git a/challenge/optimize-mlperf-inference-v3.1-2023/docs/generate-rnnt-submission.md b/challenge/optimize-mlperf-inference-v3.1-2023/docs/generate-rnnt-submission.md deleted file mode 100644 index a6ca069215..0000000000 --- a/challenge/optimize-mlperf-inference-v3.1-2023/docs/generate-rnnt-submission.md +++ /dev/null @@ -1,61 +0,0 @@ -## Setup - -Please follow this [installation guide](https://github.com/mlcommons/ck/blob/master/docs/installation.md) -to install the MLCommons CM reproducibility and automation language in your native environment or Docker container. - -Then install the repository with CM automation scripts to run MLPerf benchmarks out-of-the-box -across different software, hardware, models and data sets: - - -``` -cm pull repo mlcommons@ck -``` - -Note that you can install Python virtual environment via CM to avoid contaminating -your local Python installation as described [here](https://github.com/mlcommons/ck/blob/master/cm-mlops/automation/script/README-extra.md#using-python-virtual-environments). - -## Run Commands - -### TensorRT backend - -#### Do a test run to detect and record the system performance - -``` -cm run script --tags=generate-run-cmds,inference,_find-performance,_all-scenarios \ ---model=rnnt --implementation=nvidia-original --device=cuda --backend=tensorrt \ ---category=edge --division=open --quiet -``` -* Use `--category=datacenter` to run datacenter scenarios -* Use `--division=closed` to run all scenarios for the closed division (compliance tests are skipped for `_find-performance` mode) - -#### Do a full accuracy and performance runs for all the scenarios - -``` -cm run script --tags=generate-run-cmds,inference,_all-modes,_all-scenarios \ ---model=rnnt --device=cuda --implementation=nvidia-original --backend=tensorrt \ ---execution-mode=valid --results_dir=$HOME/inference_3.1_results \ ---category=edge --division=open --quiet -``` - -* Use `--power=yes` for measuring power. It is ignored for accuracy and compliance runs -* Use `--division=closed` to run all scenarios for the closed division including the compliance tests -* `--offline_target_qps`, `--server_target_qps` and `--singlestream_target_latency` can be used to override the determined performance numbers - -#### Populate the README files -``` -cm run script --tags=generate-run-cmds,inference,_populate-readme,_all-scenarios \ ---model=rnnt --device=cuda --implementation=nvidia-original --backend=tensorrt \ ---execution-mode=valid --results_dir=$HOME/inference_3.1_results \ ---category=edge --division=open --quiet -``` - -#### Generate actual submission tree - -Here, we are copying the performance and accuracy log files (compliance logs also in the case of closed division) from the results directory to the submission tree following the [directory structure required by MLCommons Inference](https://github.com/mlcommons/policies/blob/master/submission_rules.adoc#inference-1). After the submission tree is generated, [accuracy truncate script](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/truncate-mlperf-inference-accuracy-log) is called to truncate accuracy logs and then the [submission checker](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/run-mlperf-inference-submission-checker) is called to validate the generated submission tree. - -We should use the master branch of MLCommons inference repo for the submission checker. You can use `--hw_note_extra` option to add your name to the notes. -``` -cm run script --tags=generate,inference,submission --results_dir=$HOME/inference_3.1_results/valid_results \ ---device=cpu --submission_dir=$HOME/inference_submission_tree --clean --run-checker --submitter=cTuning ---adr.inference-src.version=master --hw_notes_extra="Result taken by NAME" --quiet -``` diff --git a/challenge/optimize-mlperf-inference-v3.1-2023/docs/setup-aws-instance.md b/challenge/optimize-mlperf-inference-v3.1-2023/docs/setup-aws-instance.md deleted file mode 100644 index 152c612aad..0000000000 --- a/challenge/optimize-mlperf-inference-v3.1-2023/docs/setup-aws-instance.md +++ /dev/null @@ -1,50 +0,0 @@ -## Setup ASW instance for MLPerf - -The below instructions are for creating an AWS instance from the CLI. You can also create an instance via web and setup CM on it. - -## Prerequisites - -1. AWS Key, secret and token -2. `*.pem` ssh key file to be used to create the instance (public key from here will be copied to the `$HOME/.ssh/authorized_keys` file in the created instance) - -## Run Commands - -We need to get imagenet full dataset to make image-classification submissions for MLPerf inference. Since this dataset is not publicly available via a URL please follow the instructions given [here](https://github.com/mlcommons/ck/blob/master/cm-mlops/script/get-dataset-imagenet-val/README-extra.md) to download the dataset and register in CM. - -### Update Access Details - -``` -cd $HOME/CM/repos/mlcommon@ck/cm-mlops/script/run-terraform/aws/ -cp credentials.example credentials.sh -``` -Update `credentials.sh` with your AWS Key, Secret and Token - -### Create an AWS Instance - - -``` -cm run script --tags=run,terraform,_m7g.xlarge,_storage_size.500,_ubuntu.2204,_us-west-2 \ ---cminit --key_file=$HOME/cmuser.pem -``` - -The above command will output the IP of the created instance which will be having CM setup already done. - -`_m7g.xlarge,_storage_size.500,_ubuntu.2204` variations can be changed to launch a different instance. Below are the variation combinations we used for MLPerf inference 3.0 submissions. - -* `_g4dn.xlarge` -* `_a1.2xlarge,_storage_size.130,_ubuntu.2204` -* `_c5.4xlarge,_storage_size.130,_ubuntu.2204` -* `_m7g.2xlarge,_storage_size.500,_ubuntu.2204` -* `_inf1.2xlarge,_storage_size.500,_amazon-linux-2-kernel.510` -* `_t2.medium,_storage_size.200,_rhel.9` - -### Copy the needed files from the local machine - -Copy the imagenet dataset to the created instance. For example, - -``` -rsync -avz -e 'ssh -i $HOME/cmuser.pem' $HOME/imagenet-2012-val/ ubuntu@54.189.93.134: -``` -For using [nvidia-original implementation](https://github.com/mlcommons/ck/tree/main/cm-mlops/script/reproduce-mlperf-inference-nvidia) tar files for cuDNN and TensorRT are needed to be downloaded locally from Nvidia website and copied to the AWS instance similar to the above command. - -Once all the required files are copied over, login to the instance and follow the individual benchmark instructions from the README files given [here](./) diff --git a/challenge/optimize-mlperf-inference-v3.1-2023/docs/setup-gcp-instance.md b/challenge/optimize-mlperf-inference-v3.1-2023/docs/setup-gcp-instance.md deleted file mode 100644 index a3a0e457a1..0000000000 --- a/challenge/optimize-mlperf-inference-v3.1-2023/docs/setup-gcp-instance.md +++ /dev/null @@ -1,37 +0,0 @@ -## Setup GCP instance for MLPerf - -The below instructions are for creating a Google Cloud instance from the CLI. You can also create an instance via web and setup CM on it. - -## Prerequisites - -Please follow the authentication instructions given [here](https://github.com/mlcommons/ck/blob/master/cm-mlops/script/run-terraform/README-about.md). - - -## Run Commands - -We need to get imagenet full dataset to make image-classification submissions for MLPerf inference. Since this dataset is not publicly available via a URL please follow the instructions given [here](https://github.com/mlcommons/ck/blob/master/cm-mlops/script/get-dataset-imagenet-val/README-extra.md) to download the dataset and register in CM. - - -### Create a GCP Instance - - -``` -cm run script --tags=run,terraform,_gcp,_n1-highmem.4,_gcp_project.mlperf-inference-tests --cminit -``` - -The above command will output the IP of the created instance which will be having CM setup already done. - -`_n1-highmem.4` variation can be changed to launch a different instance. Below are the variation combinations we used for MLPerf inference 3.0 submissions. - -* `_n1-standard.4` - -### Copy the needed files - -Copy the imagenet dataset to the created instance. For example, - -``` -rsync -avz -e 'ssh -i $HOME/cmuser.pem' $HOME/imagenet-2012-val/ ubuntu@54.189.93.134: -``` -For using [nvidia-original implementation](https://github.com/mlcommons/ck/tree/main/cm-mlops/script/reproduce-mlperf-inference-nvidia) tar files for cuDNN and TensorRT are needed to be downloaded locally from Nvidia website and copied to the AWS instance similar to the above command. - -Once all the required files are copied over, login to the instance and follow the individual benchmark instructions from the README files given [here](./) diff --git a/challenge/optimize-mlperf-inference-v3.1-2023/docs/setup-nvidia-jetson-orin.md b/challenge/optimize-mlperf-inference-v3.1-2023/docs/setup-nvidia-jetson-orin.md deleted file mode 100644 index 08c0a8eeb0..0000000000 --- a/challenge/optimize-mlperf-inference-v3.1-2023/docs/setup-nvidia-jetson-orin.md +++ /dev/null @@ -1,54 +0,0 @@ -## Setup - -We used Nvidia Jetson AGX Orin developer kit with 32GB RAM and 64GB eMMC. We also connected a 500GB SSD disk via USB and Wifi connection for internet connectivity. - -We used the out of the box developer kit image which was running Ubuntu 20.04 and JetPack 5.0.1 Developer Preview (L4T 34.1.1) with CUDA 11.4. We were also using the default 4k page size (Nvidia recommends 64k for MLPerf inference). - -[cuDNN 8.6.0](https://developer.nvidia.com/compute/cudnn/secure/8.6.0/local_installers/11.8/cudnn-local-repo-ubuntu2004-8.6.0.163_1.0-1_arm64.deb) and [TensorRT 8.5.2.2](https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/secure/8.5.3/local_repos/nv-tensorrt-local-repo-ubuntu2004-8.5.3-cuda-11.8_1.0-1_arm64.deb) were downloaded as Debian packages on a host machine, copied over to Nvidia Jetson Orin and installed. - - -We need to get imagenet full dataset to make image-classification submissions for MLPerf inference. Since this dataset is not publicly available via a URL please follow the instructions given [here](https://github.com/mlcommons/ck/blob/master/cm-mlops/script/get-dataset-imagenet-val/README-extra.md) to download the dataset. - -### Copy the needed files from a host machine - -Copy the imagenet dataset to the created instance. For example, - -``` -rsync -avz $HOME/imagenet-2012-val/ user@192.168.0.27: -``` - -Login to Orin and register the imagenet dataset as -``` -cm run script --tags=get,imagenet,dataset,_2012,_full --input=$HOME/imagenet-2012-val -``` - -Once all the required files are copied over, follow the individual benchmark instructions from the README files given [here](./) All the required dependencies should be resolved by CM. - -### Power Measurement Setup - -We were measuring power in the peak performance mode (MaxN) except for one SUT where the energy efficiency mode was changed to Max15. Our aim was to showcase the out of the box performance of Nvidia Jetson AGX Orin including the power usage. - -## Reproducing the Nvidia Jetson AGX Orin Submission - -After our submission we followed the instructions from Nvidia in the inference v3.0 repository and tried to reproduce the numbers from Nvidia. For MaxN mode we were able to match the numbers by Nvidia using same versions of CUDA, cuDNN and TensorRT but outside of docker. For MaxQ mode, we could get the same performance as Nvidia but our power usage was about 5W higher. - -### Performance results MaxN - -The below table shows the performance comparison of our results under different settings and the Nvidia submission for MLPerf inference 3.0. We'll be updating our instructions for easier reproducibility of these numbers including CM scripts for flashing the L4T image and rebuilding the kernel for 64k pagesize. - - -| Workload | Results | L4T | PAGESIZE | Power Mode | FAN Dynamic Speed control | Offline Accuracy | Offline Performance | SingleStream Accuracy | SingleStream Performance | MultiStream Accuracy | MultiStream Performance | -| --------- | --------------------------------- | ----- | -------- | ---------- | ------------------------- | ---------------- | ------------------- | --------------------- | ------------------------ | -------------------- | ----------------------- | -| ResNet50 | Nvidia Submitted (docker) | r35.3 | 64k | MaxN | active | 75.934 | 6438.1 | 76.032 | 0.633479 | 76.032 | 2.187731 | -| ResNet50 | cTuning Submitted | r34.1.1 | 4k | MaxN | active | 75.934 | 4697 | 76.032 | 0.72 | 76.032 | 2.57 | -| ResNet50 | MLCommons taskforce on reproducibility | r35.2.1 | 4k | MaxN | active | 75.85 | 6172 | 76.056 | 0.644 | 76.056 | 2.074 | -| ResNet50 | MLCommons taskforce on reproducibility | r35.3 | 64k | MaxN | active | 75.85 | 6430 | 76.056 | 0.659 | 76.056 | 2.20 | -| RetinaNet | Nvidia Submitted (docker) | r35.3 | x | MaxN | active | 37.372 | 92.4048 | 37.403 | 13.924457 | 37.519 | 104.680313 | -| RetinaNet | MLCommons taskforce on reproducibility | r35.2.1 | 4k | MaxN | active | 37.346 | 80.0854 (no DLA) | 37.350 | 14,19 | 37.409 | 105.344828 | -| RetinaNet | MLCommons taskforce on reproducibility | r35.3 | 64k | MaxN | active | 37.345 | 94.6886 | 37.340 | 14.073 | 37.488 | 103.8 | -| BERT | Nvidia Submitted (docker) | r35.3 | x | MaxN | active | 90.552 | 544.243 | 90.344 | 5.635431 | NA | NA | -| BERT | cTuning Submitted | r34.1.1 | 4k | MaxN | active | 90.552 | 449.96 | 90.344 | 7.8 | NA | NA | -| BERT | MLCommons taskforce on reproducibility | r35.2.1 | 4k | MaxN | active | 90.562 | 527 (128 batchsize) | 90.311 | 6.636 | NA | NA | -| BERT | MLCommons taskforce on reproducibility | r35.3 | 64k | MaxN | active | 90.552 | 539 | 90.344 | 6.31 | NA | NA | - - diff --git a/challenge/optimize-mlperf-inference-v3.1-amazon-inferentia-2023/README.md b/challenge/optimize-mlperf-inference-v3.1-amazon-inferentia-2023/README.md deleted file mode 100644 index b72349ad59..0000000000 --- a/challenge/optimize-mlperf-inference-v3.1-amazon-inferentia-2023/README.md +++ /dev/null @@ -1,31 +0,0 @@ -### Challenge - -Develop a reference implementation of any MLPerf inference benchmark to run on Amazon Inferentia. -Submit preliminary (unoptimized) benchmarking results to MLPerf inference v3.1 and beyond. - -Read [this documentation](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/README.md) -to run reference implementations of MLPerf inference benchmarks -using the CM automation language and use them as a base for your developments. - -Check [this ACM REP'23 keynote](https://doi.org/10.5281/zenodo.8105339) to learn more about our open-source project and long-term vision. - - -### Prizes - -* *All contributors will receive an official MLCommons Collective Knowledge contributor award (see [this example](https://ctuning.org/awards/ck-award-202307-zhu.pdf)).* -* *The top contributors will receive cash prizes from [MLCommons organizations](https://mlcommons.org) and [cKnowledge.org](https://www.linkedin.com/company/cknowledge)*. - - - -### Organizers - -* [MLCommons](https://cKnowledge.org/mlcommons-taskforce) -* [cTuning.org](https://www.linkedin.com/company/ctuning-foundation) -* [cKnowledge.org](https://www.linkedin.com/company/cknowledge) - -### Results - -All accepted results will be publicly available in the CM format with derived metrics -in this [MLCommons repository](https://github.com/mlcommons/cm4mlperf-results), -in [MLCommons Collective Knowledge explorer](https://access.cknowledge.org/playground/?action=experiments) -and at official [MLCommons website](https://mlcommons.org). diff --git a/challenge/optimize-mlperf-inference-v3.1-amazon-inferentia-2023/_cm.json b/challenge/optimize-mlperf-inference-v3.1-amazon-inferentia-2023/_cm.json deleted file mode 100644 index 66431963a5..0000000000 --- a/challenge/optimize-mlperf-inference-v3.1-amazon-inferentia-2023/_cm.json +++ /dev/null @@ -1,27 +0,0 @@ -{ - "alias": "optimize-mlperf-inference-v3.1-amazon-inferentia-2023", - "automation_alias": "challenge", - "automation_uid": "3d84abd768f34e08", - "date_close": "20230817", - "date_close_extension": true, - "date_open": "20230704", - "points":3, - "trophies":true, - "tags": [ - "modularize", - "optimize", - "reproduce", - "replicate", - "benchmark", - "automate", - "inferentia", - "mlperf-inference", - "mlperf-inference-inferentia", - "mlperf-inference-inferentia", - "mlperf-inference-inferentia-v3.1", - "mlperf-inference-inferentia-v3.1-2023", - "v3.1" - ], - "title": "Develop a reference implementation of any MLPerf inference benchmark to run on Amazon Inferentia and submit to MLPerf inference v3.1+", - "uid": "c8f2573320424e2a" -} diff --git a/challenge/optimize-mlperf-inference-v3.1-create-end-to-end-app/README.md b/challenge/optimize-mlperf-inference-v3.1-create-end-to-end-app/README.md deleted file mode 100644 index c08847da6a..0000000000 --- a/challenge/optimize-mlperf-inference-v3.1-create-end-to-end-app/README.md +++ /dev/null @@ -1,20 +0,0 @@ -### Challenge - -Create any end-to-end AI application with web cam, speech recognition, chat bot, LLM -that uses any MLPerf model and CM automation. - -Check [this ACM REP'23 keynote](https://doi.org/10.5281/zenodo.8105339) to learn more about our open-source project and long-term vision. - -Looking forward to your submissions and happy hacking! - -### Prizes - -* *All submitters will receive 1 point for submitting valid results for 1 complete benchmark on one system.* -* *All submitters will receive an official MLCommons Collective Knowledge contributor award (see [this example](https://ctuning.org/awards/ck-award-202307-zhu.pdf)).* - - -### Organizers - -* [MLCommons taskforce on automation and reproducibility](https://cKnowledge.org/mlcommons-taskforce) -* [cTuning foundation](https://cTuning.org) - diff --git a/challenge/optimize-mlperf-inference-v3.1-create-end-to-end-app/_cm.json b/challenge/optimize-mlperf-inference-v3.1-create-end-to-end-app/_cm.json deleted file mode 100644 index 23fb64d835..0000000000 --- a/challenge/optimize-mlperf-inference-v3.1-create-end-to-end-app/_cm.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "alias": "optimize-mlperf-inference-v3.1-create-end-to-end-app", - "automation_alias": "challenge", - "automation_uid": "3d84abd768f34e08", - "date_open": "20230704", - "date_close_extension": true, - "points":3, - "trophies":true, - "tags": [ - "modularize", - "optimize", - "reproduce", - "replicate", - "automate", - "benchmark", - "end-to-end-app", - "mlperf-inference", - "mlperf-inference-end-to-end-app", - "mlperf-inference-end-to-end-app", - "mlperf-inference-end-to-end-app-v3.1", - "mlperf-inference-end-to-end-app-v3.1-2023", - "v3.1" - ], - "title": "Generate end-to-end optimized AI apps (LLM, speech, etc) based on MLPerf inference results (with and without container)", - "uid": "96ca61a5aa914063" -} diff --git a/challenge/optimize-mlperf-inference-v3.1-deepsparse/README.md b/challenge/optimize-mlperf-inference-v3.1-deepsparse/README.md deleted file mode 100644 index f0f8908d29..0000000000 --- a/challenge/optimize-mlperf-inference-v3.1-deepsparse/README.md +++ /dev/null @@ -1,31 +0,0 @@ -### Challenge - -Prepare, optimize and submit benchmarking results to MLPerf inference v3.1 using -CM automation language with the DeepSparse library, any model and any platform. - -Check [this related challenge](https://access.cknowledge.org/playground/?action=challenges&name=3e971d8089014d1f) for more details. - -Read [this documentation](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/README.md) -to run reference implementations of MLPerf inference benchmarks -using the CM automation language and use them as a base for your developments. - -Check [this ACM REP'23 keynote](https://doi.org/10.5281/zenodo.8105339) to learn more about our open-source project and long-term vision. - -### Prizes - -* *All contributors will receive 1 point for submitting valid results for 1 complete benchmark on one system.* -* *All contributors will receive an official MLCommons Collective Knowledge contributor award (see [this example](https://ctuning.org/awards/ck-award-202307-zhu.pdf)).* -* *The top contributors will receive cash prizes from [MLCommons organizations](https://mlcommons.org) and [cKnowledge.org](https://www.linkedin.com/company/cknowledge)*. - -### Organizers - -* [MLCommons taskforce on automation and reproducibility](https://cKnowledge.org/mlcommons-taskforce) -* [cTuning foundation](https://cTuning.org) -* [cKnowledge Ltd](https://cKnowledge.org) - -### Results - -All accepted results will be publicly available in the CM format with derived metrics -in this [MLCommons repository](https://github.com/mlcommons/cm4mlperf-results), -in [MLCommons Collective Knowledge explorer](https://access.cknowledge.org/playground/?action=experiments) -and at official [MLCommons website](https://mlcommons.org). diff --git a/challenge/optimize-mlperf-inference-v3.1-deepsparse/_cm.json b/challenge/optimize-mlperf-inference-v3.1-deepsparse/_cm.json deleted file mode 100644 index e1cc4f8880..0000000000 --- a/challenge/optimize-mlperf-inference-v3.1-deepsparse/_cm.json +++ /dev/null @@ -1,28 +0,0 @@ -{ - "alias": "optimize-mlperf-inference-v3.1-deepsparse", - "automation_alias": "challenge", - "automation_uid": "3d84abd768f34e08", - "date_close": "20230817", - "date_close_extension": true, - "date_open": "20230704", - "experiments": [], - "points": 1, - "tags": [ - "modularize", - "optimize", - "reproduce", - "replicate", - "automate", - "benchmark", - "deepsparse", - "mlperf-inference", - "mlperf-inference-deepsparse", - "mlperf-inference-deepsparse", - "mlperf-inference-deepsparse-v3.1", - "mlperf-inference-deepsparse-v3.1-2023", - "v3.1" - ], - "title": "Run and optimize MLPerf inference v3.1 benchmarks with Neural Magic's DeepSparse library", - "trophies": true, - "uid": "c495863b08e74abc" -} diff --git a/challenge/optimize-mlperf-inference-v3.1-google-tpu-2023/README.md b/challenge/optimize-mlperf-inference-v3.1-google-tpu-2023/README.md deleted file mode 100644 index 94fad05b51..0000000000 --- a/challenge/optimize-mlperf-inference-v3.1-google-tpu-2023/README.md +++ /dev/null @@ -1,32 +0,0 @@ -### Challenge - -Develop a reference implementation of any MLPerf inference benchmark to run on the latest publicly available Google TPU. -Submit preliminary (unoptimized) benchmarking results to MLPerf inference v3.1 and beyond. - -Note that you can use either GCP TPU or Coral TPU USB-Accelerator CPU card. -In the latter case, you can reuse and extend our CM-MLPerf script for MobileNets! - -Read [this documentation](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/README.md) -to run reference implementations of MLPerf inference benchmarks -using the CM automation language and use them as a base for your developments. - -Check [this ACM REP'23 keynote](https://doi.org/10.5281/zenodo.8105339) to learn more about our open-source project and long-term vision. - -### Prizes - -* *All contributors will receive an official MLCommons Collective Knowledge contributor award (see [this example](https://ctuning.org/awards/ck-award-202307-zhu.pdf)).* -* *The top contributors will receive cash prizes from [MLCommons organizations](https://mlcommons.org) and [cKnowledge.org](https://www.linkedin.com/company/cknowledge)*. - -### Organizers - -* [MLCommons](https://cKnowledge.org/mlcommons-taskforce) -* [cTuning.org](https://www.linkedin.com/company/ctuning-foundation) -* [cKnowledge.org](https://www.linkedin.com/company/cknowledge) - - -### Results - -All accepted results will be publicly available in the CM format with derived metrics -in this [MLCommons repository](https://github.com/mlcommons/cm4mlperf-results), -in [MLCommons Collective Knowledge explorer](https://access.cknowledge.org/playground/?action=experiments) -and at official [MLCommons website](https://mlcommons.org). diff --git a/challenge/optimize-mlperf-inference-v3.1-google-tpu-2023/_cm.json b/challenge/optimize-mlperf-inference-v3.1-google-tpu-2023/_cm.json deleted file mode 100644 index 3d5aecc950..0000000000 --- a/challenge/optimize-mlperf-inference-v3.1-google-tpu-2023/_cm.json +++ /dev/null @@ -1,27 +0,0 @@ -{ - "alias": "optimize-mlperf-inference-v3.1-google-tpu-2023", - "automation_alias": "challenge", - "automation_uid": "3d84abd768f34e08", - "date_close": "20230817", - "date_open": "20230704", - "points":3, - "trophies":true, - "date_close_extension": true, - "tags": [ - "modularize", - "optimize", - "reproduce", - "replicate", - "automate", - "benchmark", - "tpu", - "mlperf-inference", - "mlperf-inference-tpu", - "mlperf-inference-tpu", - "mlperf-inference-tpu-v3.1", - "mlperf-inference-tpu-v3.1-2023", - "v3.1" - ], - "title": "Develop a reference implementation of any MLPerf inference benchmark to run on the latest publicly available Google TPU (GCP or Coral USB accelerator) and submit to MLPerf inference v3.1+", - "uid": "5975fd0e18cd4073" -} diff --git a/challenge/optimize-mlperf-inference-v3.1-hugging-face-models-2023/README.md b/challenge/optimize-mlperf-inference-v3.1-hugging-face-models-2023/README.md deleted file mode 100644 index 014f83f7d9..0000000000 --- a/challenge/optimize-mlperf-inference-v3.1-hugging-face-models-2023/README.md +++ /dev/null @@ -1,52 +0,0 @@ -### Introduction - -Open-source [MLPerf inference benchmarks](https://arxiv.org/abs/1911.02549) -were developed by a [consortium of 50+ companies and universities (MLCommons)](https://mlcommons.org) -to enable trustable and reproducible comparison of AI/ML systems -in terms of latency, throughput, power consumption, accuracy and other metrics -across diverse software/hardware stacks from different vendors. - -However, it is difficult to customize and run MLPerf benchmarks with non-reference models. - -That's why the MLCommons Task Force on automation and reproducibility has developed -a [Collective Mind automation language](https://doi.org/10.5281/zenodo.8144274) -to modularize this benchmark and make it easier to run with different models and data sets. - - -### Challenge - -Implement a CM workflow to connect any Hugging Face model -to MLPerf loadgen and run it with random inputs to obtain a preliminary latency and througput -without accuracy. - -Resources: -* [CM script to get ML model from Hugging Face zoo](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/get-ml-model-huggingface-zoo) -* [CM script to convert Hugging Face model to ONNX](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/convert-ml-model-huggingface-to-onnx) -* [CM script to build MLPerf loadgen](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/get-mlperf-inference-loadgen) -* [CM script to run Python Loadgen with any ONNX model](https://github.com/mlcommons/ck/blob/master/cm-mlops/script/app-loadgen-generic-python/README-extra.md) -* [MLPerf BERT FP32 model is available at Hugging Face](https://huggingface.co/ctuning/mlperf-inference-bert-onnx-fp32-squad-v1.1) - -Some results showcases CK workflow to benchmark Hugging Face models with MLPerf from v3.0 (BERT): -* https://access.cknowledge.org/playground/?action=experiments&name=2f1f70d8b2594149 -* https://access.cknowledge.org/playground/?action=experiments&name=mlperf-inference--v3.0--edge--open-power--language-processing--offline&result_uid=9d2594448bbb4b45 - -Read [this documentation](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/README.md) -to run reference implementations of MLPerf inference benchmarks -using the CM automation language and use them as a base for your developments. - -Check [this ACM REP'23 keynote](https://doi.org/10.5281/zenodo.8105339) to learn more about our open-source project and long-term vision. - -### Prizes - -* *All contributors will receive 1 point for submitting valid results for 1 complete benchmark on one system.* -* *All contributors will receive an official MLCommons Collective Knowledge contributor award (see [this example](https://ctuning.org/awards/ck-award-202307-zhu.pdf)).* -* *The top contributors will receive cash prizes from [MLCommons organizations](https://mlcommons.org) and [cKnowledge.org](https://www.linkedin.com/company/cknowledge)*. - - -### Organizers - -* [MLCommons](https://cKnowledge.org/mlcommons-taskforce) -* [cTuning.org](https://www.linkedin.com/company/ctuning-foundation) -* [cKnowledge.org](https://www.linkedin.com/company/cknowledge) - - diff --git a/challenge/optimize-mlperf-inference-v3.1-hugging-face-models-2023/_cm.json b/challenge/optimize-mlperf-inference-v3.1-hugging-face-models-2023/_cm.json deleted file mode 100644 index 146505b55a..0000000000 --- a/challenge/optimize-mlperf-inference-v3.1-hugging-face-models-2023/_cm.json +++ /dev/null @@ -1,27 +0,0 @@ -{ - "alias": "optimize-mlperf-inference-v3.1-hugging-face-models-2023", - "automation_alias": "challenge", - "automation_uid": "3d84abd768f34e08", - "date_close": "20230817", - "date_close_extension": true, - "date_open": "20230704", - "points":3, - "trophies":true, - "tags": [ - "modularize", - "optimize", - "reproduce", - "replicate", - "automate", - "benchmark", - "huggingface", - "mlperf-inference", - "mlperf-inference-huggingface", - "mlperf-inference-huggingface", - "mlperf-inference-huggingface-v3.1", - "mlperf-inference-huggingface-v3.1-2023", - "v3.1" - ], - "title": "Implement CM automation to run benchmark Hugging Face models using MLPerf loadgen", - "uid": "72b95d08a9e04698" -} diff --git a/challenge/optimize-mlperf-inference-v3.1-intel-2023/README.md b/challenge/optimize-mlperf-inference-v3.1-intel-2023/README.md deleted file mode 100644 index aec0514730..0000000000 --- a/challenge/optimize-mlperf-inference-v3.1-intel-2023/README.md +++ /dev/null @@ -1,31 +0,0 @@ -### Challenge - -Add CM interface to run MLPerf inference benchmarks on Intel-based platforms. - -You can start from reproducing any past MLPerf inference submission from Intel and their partners -and then adding CM automation. - -Read [this documentation](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/README.md) -to run reference implementations of MLPerf inference benchmarks -using the CM automation language and use them as a base for your developments. - -Check [this ACM REP'23 keynote](https://doi.org/10.5281/zenodo.8105339) to learn more about our open-source project and long-term vision. - -### Prizes - -* *All contributors will receive an official MLCommons Collective Knowledge contributor award (see [this example](https://ctuning.org/awards/ck-award-202307-zhu.pdf)).* -* *The top contributors will receive cash prizes from [MLCommons organizations](https://mlcommons.org) and [cKnowledge.org](https://www.linkedin.com/company/cknowledge)*. - - -### Organizers - -* [MLCommons](https://cKnowledge.org/mlcommons-taskforce) -* [cTuning.org](https://www.linkedin.com/company/ctuning-foundation) -* [cKnowledge.org](https://www.linkedin.com/company/cknowledge) - -### Results - -All accepted results will be publicly available in the CM format with derived metrics -in this [MLCommons repository](https://github.com/mlcommons/cm4mlperf-results), -in [MLCommons Collective Knowledge explorer](https://access.cknowledge.org/playground/?action=experiments) -and at official [MLCommons website](https://mlcommons.org). diff --git a/challenge/optimize-mlperf-inference-v3.1-intel-2023/_cm.json b/challenge/optimize-mlperf-inference-v3.1-intel-2023/_cm.json deleted file mode 100644 index c3d9adbe4c..0000000000 --- a/challenge/optimize-mlperf-inference-v3.1-intel-2023/_cm.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "alias": "optimize-mlperf-inference-v3.1-intel-2023", - "automation_alias": "challenge", - "automation_uid": "3d84abd768f34e08", - "date_close": "20240104", - "date_open": "20230704", - "points": 2, - "tags": [ - "modularize", - "optimize", - "reproduce", - "replicate", - "automate", - "benchmark", - "intel", - "mlperf-inference", - "mlperf-inference-intel", - "mlperf-inference-intel", - "mlperf-inference-intel-v3.1", - "mlperf-inference-intel-v3.1-2023", - "v3.1" - ], - "title": "Add the CM interface to run MLPerf inference benchmarks on Intel-based platforms", - "trophies": true, - "uid": "1c1d5da6766f4afb" -} diff --git a/challenge/optimize-mlperf-inference-v3.1-modular-mojo-2023/README.md b/challenge/optimize-mlperf-inference-v3.1-modular-mojo-2023/README.md deleted file mode 100644 index 6aaf4e3947..0000000000 --- a/challenge/optimize-mlperf-inference-v3.1-modular-mojo-2023/README.md +++ /dev/null @@ -1,34 +0,0 @@ -### Challenge - -Add support to run a reference implementation of any MLPerf inference benchmark using -[Mojo language]( https://github.com/modularml/mojo ) -from [Modular.ai](https://modular.ai). - -Prepare, optimize and submit benchmarking results to MLPerf inference v3.1 with Mojo. - -Check [this related challenge](https://access.cknowledge.org/playground/?action=challenges&name=3e971d8089014d1f) for more details. - -Read [this documentation](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/README.md) -to run reference implementations of MLPerf inference benchmarks -using the CM automation language and use them as a base for your developments. - -Check [this ACM REP'23 keynote](https://doi.org/10.5281/zenodo.8105339) to learn more about our open-source project and long-term vision. - -### Prizes - -* *The first implementation will receive a cache prize from organizers.* -* *All contributors will receive 1 point for submitting valid results for 1 complete benchmark on one system.* -* *The top contributors will receive cash prizes from [MLCommons organizations](https://mlcommons.org) and [cKnowledge.org](https://www.linkedin.com/company/cknowledge)*. - -### Organizers - -* [MLCommons taskforce on automation and reproducibility](https://cKnowledge.org/mlcommons-taskforce) -* [cTuning foundation](https://cTuning.org) -* [cKnowledge Ltd](https://cKnowledge.org) - -### Results - -All accepted results will be publicly available in the CM format with derived metrics -in this [MLCommons repository](https://github.com/mlcommons/cm4mlperf-results), -in [MLCommons Collective Knowledge explorer](https://access.cknowledge.org/playground/?action=experiments) -and at official [MLCommons website](https://mlcommons.org). diff --git a/challenge/optimize-mlperf-inference-v3.1-modular-mojo-2023/_cm.json b/challenge/optimize-mlperf-inference-v3.1-modular-mojo-2023/_cm.json deleted file mode 100644 index e805879dee..0000000000 --- a/challenge/optimize-mlperf-inference-v3.1-modular-mojo-2023/_cm.json +++ /dev/null @@ -1,28 +0,0 @@ -{ - "alias": "optimize-mlperf-inference-v3.1-modular-mojo-2023", - "automation_alias": "challenge", - "automation_uid": "3d84abd768f34e08", - "date_close": "20230817", - "date_close_extension": true, - "date_open": "20230704", - "experiments": [], - "points": 1, - "tags": [ - "modularize", - "optimize", - "reproduce", - "replicate", - "automate", - "benchmark", - "mojo", - "mlperf-inference", - "mlperf-inference-mojo", - "mlperf-inference-mojo", - "mlperf-inference-mojo-v3.1", - "mlperf-inference-mojo-v3.1-2023", - "v3.1" - ], - "title": "Run reference implementations of MLperf inference benchmarks using Mojo language from Modular.ai", - "trophies": true, - "uid": "0a8a7bb5572447db" -} diff --git a/challenge/optimize-mlperf-inference-v3.1-qualcomm-ai100-2023/README.md b/challenge/optimize-mlperf-inference-v3.1-qualcomm-ai100-2023/README.md deleted file mode 100644 index c16a9335a6..0000000000 --- a/challenge/optimize-mlperf-inference-v3.1-qualcomm-ai100-2023/README.md +++ /dev/null @@ -1,33 +0,0 @@ -### Challenge - -Add CM interface to run MLPerf inference benchmarks on Qualcomm AI100-based platforms. - -You can start from reproducing any past submission from Dell, Lenovo or HPE -and then adding CM automation. - -Read [this documentation](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/README.md) -to run reference implementations of MLPerf inference benchmarks -using the CM automation language and use them as a base for your developments. - -Check [this ACM REP'23 keynote](https://doi.org/10.5281/zenodo.8105339) to learn more about our open-source project and long-term vision. - -### Prizes - -* *All contributors will receive an official MLCommons Collective Knowledge contributor award (see [this example](https://ctuning.org/awards/ck-award-202307-zhu.pdf)).* -* *The top contributors will receive cash prizes from [MLCommons organizations](https://mlcommons.org) and [cKnowledge.org](https://www.linkedin.com/company/cknowledge)*. - - - -### Organizers - -* [MLCommons](https://cKnowledge.org/mlcommons-taskforce) -* [cTuning.org](https://www.linkedin.com/company/ctuning-foundation) -* [cKnowledge.org](https://www.linkedin.com/company/cknowledge) - - -### Results - -All accepted results will be publicly available in the CM format with derived metrics -in this [MLCommons repository](https://github.com/mlcommons/cm4mlperf-results), -in [MLCommons Collective Knowledge explorer](https://access.cknowledge.org/playground/?action=experiments) -and at official [MLCommons website](https://mlcommons.org). diff --git a/challenge/optimize-mlperf-inference-v3.1-qualcomm-ai100-2023/_cm.json b/challenge/optimize-mlperf-inference-v3.1-qualcomm-ai100-2023/_cm.json deleted file mode 100644 index 07c626e259..0000000000 --- a/challenge/optimize-mlperf-inference-v3.1-qualcomm-ai100-2023/_cm.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "alias": "optimize-mlperf-inference-v3.1-qualcomm-ai100-2023", - "automation_alias": "challenge", - "automation_uid": "3d84abd768f34e08", - "date_close": "20240104", - "date_open": "20230704", - "points":3, - "trophies":true, - "tags": [ - "modularize", - "optimize", - "reproduce", - "replicate", - "automate", - "benchmark", - "ai100", - "mlperf-inference", - "mlperf-inference-ai100", - "mlperf-inference-ai100", - "mlperf-inference-ai100-v3.1", - "mlperf-inference-ai100-v3.1-2023", - "v3.1" - ], - "title": "Add the CM interface to run MLPerf inference benchmarks on Qualcomm AI100-based platforms", - "uid": "09bd5f9e05ff46b1" -} diff --git a/challenge/optimize-mlperf-inference-v3.1-tvm-2023/README.md b/challenge/optimize-mlperf-inference-v3.1-tvm-2023/README.md deleted file mode 100644 index f8d9fbd71b..0000000000 --- a/challenge/optimize-mlperf-inference-v3.1-tvm-2023/README.md +++ /dev/null @@ -1,41 +0,0 @@ -### Challenge - -Prepare, optimize and submit benchmarking results to MLPerf inference v3.1 using -CM automation language with Apache TVM, any model and any platform. - -Check [this related challenge](https://access.cknowledge.org/playground/?action=challenges&name=3e971d8089014d1f) for more details. - -Read [this documentation](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/README.md) -to run reference implementations of MLPerf inference benchmarks -using the CM automation language and use them as a base for your developments. - -Check [this ACM REP'23 keynote](https://doi.org/10.5281/zenodo.8105339) to learn more about our open-source project and long-term vision. - -### Prizes - -* *All contributors will receive 1 point for submitting valid results for 1 complete benchmark on one system.* -* *All contributors will receive an official MLCommons Collective Knowledge contributor award (see [this example](https://ctuning.org/awards/ck-award-202307-zhu.pdf)).* -* *The top contributors will receive cash prizes from [MLCommons organizations](https://mlcommons.org) and [cKnowledge.org](https://www.linkedin.com/company/cknowledge)*. - -### Organizers - -* [Deelvin](https://deelvin.com) -* [MLCommons taskforce on automation and reproducibility](https://cKnowledge.org/mlcommons-taskforce) -* [cTuning foundation](https://cTuning.org) -* [cKnowledge Ltd](https://cKnowledge.org) - -### Status - -This challenge is under preparation. - -* https://github.com/mlcommons/ck/pull/693 -* https://github.com/mlcommons/ck/pull/700 -* https://github.com/mlcommons/ck/pull/701 - - -### Results - -All accepted results will be publicly available in the CM format with derived metrics -in this [MLCommons repository](https://github.com/mlcommons/cm4mlperf-results), -in [MLCommons Collective Knowledge explorer](https://access.cknowledge.org/playground/?action=experiments) -and at official [MLCommons website](https://mlcommons.org). diff --git a/challenge/optimize-mlperf-inference-v3.1-tvm-2023/_cm.json b/challenge/optimize-mlperf-inference-v3.1-tvm-2023/_cm.json deleted file mode 100644 index 839fb6b86e..0000000000 --- a/challenge/optimize-mlperf-inference-v3.1-tvm-2023/_cm.json +++ /dev/null @@ -1,28 +0,0 @@ -{ - "alias": "optimize-mlperf-inference-v3.1-tvm-2023", - "automation_alias": "challenge", - "automation_uid": "3d84abd768f34e08", - "date_close": "20230817", - "date_close_extension": true, - "date_open": "20230704", - "points":1, - "trophies":true, - "experiments": [], - "tags": [ - "modularize", - "optimize", - "reproduce", - "replicate", - "automate", - "benchmark", - "tvm", - "mlperf-inference", - "mlperf-inference-tvm", - "mlperf-inference-tvm", - "mlperf-inference-tvm-v3.1", - "mlperf-inference-tvm-v3.1-2023", - "v3.1" - ], - "title": "Run and optimize MLPerf inference v3.1 benchmarks with Apache TVM", - "uid": "29c416e245884746" -} diff --git a/challenge/optimize-mlperf-inference-v3.1-universal-cpp-implementation-2023/README.md b/challenge/optimize-mlperf-inference-v3.1-universal-cpp-implementation-2023/README.md deleted file mode 100644 index 0a5fe9aa2c..0000000000 --- a/challenge/optimize-mlperf-inference-v3.1-universal-cpp-implementation-2023/README.md +++ /dev/null @@ -1,31 +0,0 @@ -### Challenge - -Add more models and hardware backends to the [universal C++ implementation of MLPerf inference benchmarks)](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/app-mlperf-inference-cpp) -being developed by the [MLCommons Task Force on Automation and Reproducibility](https://github.com/mlcommons/ck/blob/master/docs/taskforce.md). - -Read [this documentation](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/README.md) -to run reference implementations of MLPerf inference benchmarks -using the CM automation language and use them as a base for your developments. - -Check [this ACM REP'23 keynote](https://doi.org/10.5281/zenodo.8105339) to learn more about our open-source project and long-term vision. - - -### Prizes - -* *All contributors will receive an official MLCommons Collective Knowledge contributor award (see [this example](https://ctuning.org/awards/ck-award-202307-zhu.pdf)).* -* *The top contributors will receive cash prizes from [MLCommons organizations](https://mlcommons.org) and [cKnowledge.org](https://www.linkedin.com/company/cknowledge)*. - - - -### Organizers - -* [MLCommons](https://cKnowledge.org/mlcommons-taskforce) -* [cTuning.org](https://www.linkedin.com/company/ctuning-foundation) -* [cKnowledge.org](https://www.linkedin.com/company/cknowledge) - -### Results - -All accepted results will be publicly available in the CM format with derived metrics -in this [MLCommons repository](https://github.com/mlcommons/cm4mlperf-results), -in [MLCommons Collective Knowledge explorer](https://access.cknowledge.org/playground/?action=experiments) -and at official [MLCommons website](https://mlcommons.org). diff --git a/challenge/optimize-mlperf-inference-v3.1-universal-cpp-implementation-2023/_cm.json b/challenge/optimize-mlperf-inference-v3.1-universal-cpp-implementation-2023/_cm.json deleted file mode 100644 index e4e5cae105..0000000000 --- a/challenge/optimize-mlperf-inference-v3.1-universal-cpp-implementation-2023/_cm.json +++ /dev/null @@ -1,27 +0,0 @@ -{ - "alias": "optimize-mlperf-inference-v3.1-universal-cpp-implementation-2023", - "automation_alias": "challenge", - "automation_uid": "3d84abd768f34e08", - "date_close": "20230817", - "date_close_extension": true, - "date_open": "20230704", - "points": 2, - "tags": [ - "modularize", - "optimize", - "reproduce", - "replicate", - "automate", - "benchmark", - "cpp", - "mlperf-inference", - "mlperf-inference-cpp", - "mlperf-inference-cpp", - "mlperf-inference-cpp-v3.1", - "mlperf-inference-cpp-v3.1-2023", - "v3.1" - ], - "title": "Add more models and hardware backends to the universal C++ implementation of MLPerf inference benchmarks from MLCommons", - "trophies": true, - "uid": "518420b0e6dd4fed" -} diff --git a/challenge/optimize-mlperf-inference-v3.1-windows-2023/README.md b/challenge/optimize-mlperf-inference-v3.1-windows-2023/README.md deleted file mode 100644 index d587f62f89..0000000000 --- a/challenge/optimize-mlperf-inference-v3.1-windows-2023/README.md +++ /dev/null @@ -1,36 +0,0 @@ -### Challenge - -Prepare, optimize and submit any benchmarking results to MLPerf inference v3.1 using -CM automation language on Windows. - -Read [this documentation](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/README.md) -to run reference implementations of MLPerf inference benchmarks -using the CM automation language and use them as a base for your developments. - -Check [this ACM REP'23 keynote](https://doi.org/10.5281/zenodo.8105339) to learn more about our open-source project and long-term vision. - - -### Prizes - -* *All contributors will receive an official MLCommons Collective Knowledge contributor award (see [this example](https://ctuning.org/awards/ck-award-202307-zhu.pdf)).* -* *The top contributors will receive cash prizes from [MLCommons organizations](https://mlcommons.org) and [cKnowledge.org](https://www.linkedin.com/company/cknowledge)*. - - -### Organizers - -* [MLCommons](https://cKnowledge.org/mlcommons-taskforce) -* [cTuning.org](https://www.linkedin.com/company/ctuning-foundation) -* [cKnowledge.org](https://www.linkedin.com/company/cknowledge) - - -### Status - -Open ticket: [GitHub](https://github.com/mlcommons/ck/issues/696) - - -### Results - -All accepted results will be publicly available in the CM format with derived metrics -in this [MLCommons repository](https://github.com/mlcommons/cm4mlperf-results), -in [MLCommons Collective Knowledge explorer](https://access.cknowledge.org/playground/?action=experiments) -and at official [MLCommons website](https://mlcommons.org). diff --git a/challenge/optimize-mlperf-inference-v3.1-windows-2023/_cm.json b/challenge/optimize-mlperf-inference-v3.1-windows-2023/_cm.json deleted file mode 100644 index 1a55dcbe0f..0000000000 --- a/challenge/optimize-mlperf-inference-v3.1-windows-2023/_cm.json +++ /dev/null @@ -1,28 +0,0 @@ -{ - "alias": "optimize-mlperf-inference-v3.1-windows-2023", - "automation_alias": "challenge", - "automation_uid": "3d84abd768f34e08", - "date_close": "20230817", - "date_close_extension": true, - "date_open": "20230704", - "points":2, - "trophies":true, - "experiments": [], - "tags": [ - "modularize", - "optimize", - "reproduce", - "replicate", - "automate", - "benchmark", - "windows", - "mlperf-inference", - "mlperf-inference-windows", - "mlperf-inference-windows", - "mlperf-inference-windows-v3.1", - "mlperf-inference-windows-v3.1-2023", - "v3.1" - ], - "title": "Run and optimize MLPerf inference v3.1 benchmarks on Windows", - "uid": "53e56d714c7649c7" -} diff --git a/challenge/repro-mlperf-inf-v3.0-orin/README.md b/challenge/repro-mlperf-inf-v3.0-orin/README.md deleted file mode 100644 index 54dd4feeb0..0000000000 --- a/challenge/repro-mlperf-inf-v3.0-orin/README.md +++ /dev/null @@ -1,16 +0,0 @@ -### Challenge - -Reproduce MLPerf inference v3.0 benchmark results for Nvidia Jetson Orin -(performance, accuracy,power) and automate it using the -[MLCommons CK framework](https://github.com/mlcommons/ck). - -### Organizers - -* [MLCommons taskforce on automation and reproducibility](https://cKnowledge.org/mlcommons-taskforce) -* [cTuning foundation](https://cTuning.org) -* [cKnowledge](https://cKnowledge.org) - -### Status - -Finished. Preliminary results are available [here](https://github.com/mlcommons/ck/blob/master/cm-mlops/challenge/optimize-mlperf-inference-v3.0-2023/docs/setup-nvidia-jetson-orin.md). - diff --git a/challenge/repro-mlperf-inf-v3.0-orin/_cm.json b/challenge/repro-mlperf-inf-v3.0-orin/_cm.json deleted file mode 100644 index aff0fdba0f..0000000000 --- a/challenge/repro-mlperf-inf-v3.0-orin/_cm.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "alias": "repro-mlperf-inf-v3.0-orin", - "automation_alias": "challenge", - "automation_uid": "3d84abd768f34e08", - "date_close": "20230406", - "date_open": "20230301", - "experiments": [ - { - "tags": "mlperf-inference,v3.0" - } - ], - "_password_hash": "$2b$12$ionIRWe5Ft7jkn4y/7C6/eYoo6uBBMkGy/9SxwtKhaDRqZ1w2s3dO", - "tags": [ - "reproduce", - "replicate", - "automate", - "orin", - "nvidia", - "mlperf-inference-v3.0-orin" - ], - "title": "Reproduce MLPerf inference v3.0 results for Nvidia Jetson Orin", - "uid": "6d377c1a1b224636" -} diff --git a/challenge/repro-mlperf-inference-retinanet-scc2022/README.md b/challenge/repro-mlperf-inference-retinanet-scc2022/README.md deleted file mode 100644 index 9917547c15..0000000000 --- a/challenge/repro-mlperf-inference-retinanet-scc2022/README.md +++ /dev/null @@ -1,39 +0,0 @@ -### Challenge - -Reproduce the MLPerf inference RetinaNet benchmark during Student Cluster Competition at SuperComputing'22 -using the following [CM tutorial](https://github.com/mlcommons/ck/blob/master/docs/tutorials/sc22-scc-mlperf.md). - -### Organizers - -* [MLCommons taskforce on automation and reproducibility](https://cKnowledge.org/mlcommons-taskforce) -* [cTuning foundation](https://cTuning.org) -* [OctoML](https://octoml.ai) - -### Status - -This challenge has been successfully completed. - -### Results - -Results from 10 international student teams are available at: -* [W&B dashboard 1 (during SCC'22)](https://wandb.ai/cmind/cm-mlperf-sc22-scc-retinanet-offline/table?workspace=user-gfursin) -* [W&B dashboard 2 (after SCC'22)](https://wandb.ai/cmind/cm-mlperf-dse-testing/table?workspace=user-gfursin) - - -### Acknowledgments - -We thank -[Hai Ah Nam](https://www.nersc.gov/about/nersc-staff/advanced-technologies-group/hai-ah-nam), -[Steve Leak](https://www.linkedin.com/in/steve-leak), -[Vijay Janappa Reddi](https://scholar.harvard.edu/vijay-janapa-reddi/home), -[Tom Jablin](https://scholar.google.com/citations?user=L_1FmIMAAAAJ&hl=en), -[Ramesh N Chukka](https://www.linkedin.com/in/ramesh-chukka-74b5b21), -[Peter Mattson](https://www.linkedin.com/in/peter-mattson-33b8863/), -[David Kanter](https://www.linkedin.com/in/kanterd), -[Pablo Gonzalez Mesa](https://www.linkedin.com/in/pablo-gonzalez-mesa-952ab2207), -[Thomas Zhu](https://www.linkedin.com/in/hanwen-zhu-483614189), -[Thomas Schmid](https://www.linkedin.com/in/tschmid) -and [Gaurav Verma](https://www.linkedin.com/in/grverma) -for their suggestions and contributions. - - diff --git a/challenge/repro-mlperf-inference-retinanet-scc2022/_cm.json b/challenge/repro-mlperf-inference-retinanet-scc2022/_cm.json deleted file mode 100644 index 68352f9c3b..0000000000 --- a/challenge/repro-mlperf-inference-retinanet-scc2022/_cm.json +++ /dev/null @@ -1,20 +0,0 @@ -{ - "alias": "repro-mlperf-inference-retinanet-scc2022", - "automation_alias": "challenge", - "automation_uid": "3d84abd768f34e08", - "date_close": "20221201", - "date_open": "20221101", - "tags": [ - "modularize", - "reproduce", - "replicate", - "automate", - "benchmark", - "mlperf", - "mlperf-inference", - "mlperf-inference-scc", - "mlperf-inference-scc-2022" - ], - "title": "Automate MLPerf RetinaNet benchmark at the Student Cluster Competition at SuperComputing'22 using CM", - "uid": "e71fa8b396874e68" -} diff --git a/challenge/repro-mlperf-inference-v4.0-2024/README.md b/challenge/repro-mlperf-inference-v4.0-2024/README.md deleted file mode 100644 index af23eb1205..0000000000 --- a/challenge/repro-mlperf-inference-v4.0-2024/README.md +++ /dev/null @@ -1,3 +0,0 @@ -The [MLCommons](https://mlcommons.org), [cTuning foundation](https://cTuning.org) and [cKnowledge.org](https://cKnowledge.org) -are preparing a unified interface to reproduce results from the MLPerf inference benchmark submission v4.0. -Please feel free to join the testing phase using [GitHub issues](https://github.com/mlcommons/ck/issues)! diff --git a/challenge/repro-mlperf-inference-v4.0-2024/_cm.yaml b/challenge/repro-mlperf-inference-v4.0-2024/_cm.yaml deleted file mode 100644 index 01bcfd52a7..0000000000 --- a/challenge/repro-mlperf-inference-v4.0-2024/_cm.yaml +++ /dev/null @@ -1,25 +0,0 @@ -alias: repro-mlperf-inference-v4.0-2024 -uid: e6b8738383eb46d0 - -automation_alias: challenge -automation_uid: 3d84abd768f34e08 - -title: Reproduce and automate MLPerf inference benchmark results v4.0 from different vendors (Intel, Nvidia, Qualcomm, Google, NeuralMagic, ...) using CM - -date_open: '20240201' - -tags: -- modularize -- optimize -- reproduce -- replicate -- automate -- benchmark -- mlperf -- mlperf-inference -- mlperf-inference-v4.0 -- mlperf-inference-v4.0-2024 -- v4.0 - -experiments: -- tags: mlperf-inference,v4.0 diff --git a/challenge/repro-mlperf-inference-v4.1-2024/README.md b/challenge/repro-mlperf-inference-v4.1-2024/README.md deleted file mode 100644 index 1aacc2d59d..0000000000 --- a/challenge/repro-mlperf-inference-v4.1-2024/README.md +++ /dev/null @@ -1,4 +0,0 @@ -The [cTuning foundation](https://cTuning.org), [cKnowledge.org](https://cKnowledge.org) and [MLCommons](https://mlcommons.org) -are preparing an open reproducibility challenge to reproduce various results from the MLPerf inference benchmark v4.1 -using the MLCommons CM automation framework. Please stay tuned for more details! - diff --git a/challenge/repro-mlperf-inference-v4.1-2024/_cm.yaml b/challenge/repro-mlperf-inference-v4.1-2024/_cm.yaml deleted file mode 100644 index 840d58318d..0000000000 --- a/challenge/repro-mlperf-inference-v4.1-2024/_cm.yaml +++ /dev/null @@ -1,22 +0,0 @@ -alias: repro-mlperf-inference-v4.1-2024 -uid: 2093f4d750144df4 - -automation_alias: challenge -automation_uid: 3d84abd768f34e08 - -title: 'Reproduce the upcoming MLPerf inference benchmark v4.1 results' - -date_open: '20240901' - -tags: -- modularize -- optimize -- reproduce -- replicate -- automate -- benchmark -- mlperf -- mlperf-inference -- mlperf-inference-v4.1 -- mlperf-inference-v4.1-2024 -- v4.1 diff --git a/challenge/reproduce-and-automate-tinymlperf-v1.1-2023/README.md b/challenge/reproduce-and-automate-tinymlperf-v1.1-2023/README.md deleted file mode 100644 index 0f59f59f0e..0000000000 --- a/challenge/reproduce-and-automate-tinymlperf-v1.1-2023/README.md +++ /dev/null @@ -1,36 +0,0 @@ -### Challenge - -Reproduce and automate [TinyMLPerf benchmarks](https://github.com/mlcommons/tiny). - -### Organizers - -* [MLCommons taskforce on automation and reproducibility](https://cKnowledge.org/mlcommons-taskforce) -* [cTuning foundation](https://cTuning.org) -* [cKnowledge Ltd](https://cKnowledge.org) - -### Status - -We have successfully reproduced [TinyMLPerf v1.0 submission with microTVM on the STMicroelectronics NUCLEO-L4R5ZI board](https://github.com/mlcommons/tiny_results_v1.0/tree/main/closed/OctoML), -automated it with the latest version of the [MLCommons CM automation language](https://github.com/mlcommons/ck/blob/master/docs/README.md), -submit reproduce results to the TinyMLperf v1.1 round, -and added all past TinyMLPerf results to the [MLCommons CK playground](https://access.cknowledge.org/playground/?action=experiments&tags=mlperf-tiny) -for further collaborative analysis and improvement. - -Pleases check our tutorial and reproducibility report: -* [Automate TinyMLPerf benchmark](https://github.com/ctuning/mlcommons-ck/blob/master/docs/tutorials/automate-mlperf-tiny.md) - useful for all SW/HW stacks and submission rounds. -* [Reproduce TinyMLPerf v1.0 submission](https://github.com/ctuning/mlcommons-ck/blob/master/docs/tutorials/reproduce-mlperf-tiny.md). - -TinyMLPerf v1.1 results will be published at te [MLCommons CK playground](https://access.cknowledge.org/playground/?action=experiments&tags=mlperf-tiny) -in mid June 2023. - -### Related discussions for the future - -* https://github.com/mlcommons/ck/pull/693 -* https://github.com/mlcommons/ck/pull/700 -* https://github.com/mlcommons/ck/pull/701 -* https://github.com/mlcommons/ck/issues/606 - -### Results - -All results will be available in [this GitHub repo](https://github.com/ctuning/cm4mlperf-results) -and can be visualized and compared using the [MLCommons Collective Knowledge Playground](https://access.cknowledge.org/playground/?action=experiments&tags=mlperf-tiny). diff --git a/challenge/reproduce-and-automate-tinymlperf-v1.1-2023/_cm.json b/challenge/reproduce-and-automate-tinymlperf-v1.1-2023/_cm.json deleted file mode 100644 index 4e9e248505..0000000000 --- a/challenge/reproduce-and-automate-tinymlperf-v1.1-2023/_cm.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "alias": "reproduce-and-automate-tinymlperf-v1.1-2023", - "automation_alias": "challenge", - "automation_uid": "3d84abd768f34e08", - "date_close": "20230519", - "date_open": "20230501", - "experiments": [], - "tags": [ - "modularize", - "automate", - "reproduce", - "replicate", - "optimize", - "benchmark", - "tinymlperf", - "tinymlperf-inference", - "tinymlperf-inference-v3.0", - "tinymlperf-inference-v3.0-2023", - "v1.0" - ], - "title": "Reproduce and optimize TinyMLPerf inference v1.1 benchmarks", - "uid": "d98cd66e0e5641f7" -} diff --git a/challenge/reproduce-mlperf-training-v3.0-2023/README.md b/challenge/reproduce-mlperf-training-v3.0-2023/README.md deleted file mode 100644 index a1f1ea22ac..0000000000 --- a/challenge/reproduce-mlperf-training-v3.0-2023/README.md +++ /dev/null @@ -1,17 +0,0 @@ -### Challenge - -Prepare, optimize and reproduce MLPerf training v3.0 benchmarks -using the [MLCommons CM (CK2) automation framework](https://github.com/mlcommons/ck) - -### Status - -We could not do a successful submission mainly because the training scripts were not converging on a single GPU. We tried resnet and bert training. The below CM scripts are added to do MLPerf training for BERT using the reference and NVIDIA implementations. - -1. [BERT Training using Nvidia code](https://github.com/ctuning/mlcommons-ck/tree/master/cm-mlops/script/app-mlperf-training-nvidia) -2. [BERT Training using MLPerf Reference code](https://github.com/ctuning/mlcommons-ck/tree/master/cm-mlops/script/app-mlperf-training-reference) - -### Organizers - -* [MLCommons taskforce on automation and reproducibility](https://cKnowledge.org/mlcommons-taskforce) -* [cTuning foundation](https://cTuning.org) -* [cKnowledge](https://cKnowledge.org) diff --git a/challenge/reproduce-mlperf-training-v3.0-2023/_cm.json b/challenge/reproduce-mlperf-training-v3.0-2023/_cm.json deleted file mode 100644 index d1e5eddea8..0000000000 --- a/challenge/reproduce-mlperf-training-v3.0-2023/_cm.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "alias": "reproduce-mlperf-training-v3.0-2023", - "automation_alias": "challenge", - "automation_uid": "3d84abd768f34e08", - "date_close": "20230519", - "date_open": "20230501", - "experiments": [], - "tags": [ - "modularize", - "optimize", - "reproduce", - "replicate", - "automate", - "benchmark", - "mlperf", - "mlperf-training", - "mlperf-training-v3.0", - "mlperf-training-v3.0-2023", - "v3.0" - ], - "title": "Reproduce MLPerf training v3.0 benchmarks", - "uid": "1d26149c1cce4da3" -} diff --git a/challenge/run-mlperf@home-v3.1-cpu/README.md b/challenge/run-mlperf@home-v3.1-cpu/README.md deleted file mode 100644 index bd734f7896..0000000000 --- a/challenge/run-mlperf@home-v3.1-cpu/README.md +++ /dev/null @@ -1,67 +0,0 @@ -### Introduction - -The goal of this MLPerf@home challenge is to help the community find -the most efficient CPU (Intel/AMD/Arm) for BERT-99 model with DeepSparse engine -and different variations of MobileNets/EfficientNets with TFLite -in terms of latency, throughput, accuracy, number of cores, frequency, memory size, cost, and other metrics. - -We would like to ask you to run a few [MLPerf inference benchmarks](https://arxiv.org/abs/1911.02549) -with BERT and MobileNets/EfficientNets on one or more systems with different CPUs -that you have an access to: laptops, servers, cloud instances... - -You will be able to run benchmarks, collect all metrics and submit results in an automated way -in a native environment or Docker container using the portable and technology-agnostic -[MLCommons Collective Mind automation language (CM)](https://doi.org/10.5281/zenodo.8105339). - -Your name and benchmark submissions will be published in the official MLCommons inference v3.1 results -on September 1, 2023 (submission deadline: August 4, 2023), -will be published in the [official leaderboard](https://access.cknowledge.org/playground/?action=contributors), -will be included to the prize draw, and will be presented in our upcoming ACM/HiPEAC events. - -Please report encountered problems using [GitHub issues](https://github.com/mlcommons/ck) -to help the community improve CM automation workflows to run MLPerf benchmarks on any system with any software/hardware stack. - -Thank you in advance for helping the community find Pareto-efficient AI/ML Systems! - -### Minimal requirements - -* CPU: Any x86-64 or Arm64 -* OS: - * native: any Linux (tested on Ubuntu 22.04) - * Docker: any OS -* Disk space: - * BERT-99: ~ 20GB - * Different variations of MobileNets/EfficientNets: ~ 140GB -* Time to run: - * BERT-99: ~ 2 hours - * Different variations of MobileNets/EfficientNets: ~ 2 days - -### Instructions to run benchmarks and submit results - -You can run any of these benchmarks or all depending on available time: - -* [Automated Design Space Exploration of MobileNets/EfficientNets; TFLite MLPerf implementation; native environment or Docker](https://github.com/mlcommons/ck/blob/master/cm-mlops/challenge/run-mlperf%40home-v3.1-cpu/run-cpu-dse-mobilenets-efficientnets-tflite.md) -* [BERT-99 model; DeepSparse MLPerf implementation; native environment](https://github.com/mlcommons/ck/blob/master/cm-mlops/challenge/run-mlperf%40home-v3.1-cpu/run-cpu-bert-99-deepsparse.md) - -### Results - -All accepted results with submitter names will be publicly available -at the official [MLCommons website](https://mlcommons.org) -and in the [Collective Knowledge explorer (MLCommons CK)](https://access.cknowledge.org/playground/?action=experiments) -along with the reproducibility and automation report to help the community -build efficient AI/ML systems. - - -### Organizers - -* [MLCommons Task Force on Automation and Reproducibility](https://cKnowledge.org/mlcommons-taskforce) -* [cTuning.org](https://www.linkedin.com/company/ctuning-foundation) -* [cKnowledge.org](https://www.linkedin.com/company/cknowledge) - -### Advanced challenges - -If you feel that running these benchmarks was relatively easy, -please try [more advanced challenges](https://access.cknowledge.org/playground/?action=challenges), -read about our [plans and long-term vision](https://doi.org/10.5281/zenodo.8105339), -check [CM documentation](https://github.com/mlcommons/ck/blob/master/docs/README.md) -and run other [MLPerf benchmarks](https://github.com/mlcommons/ck/tree/master/docs/mlperf). diff --git a/challenge/run-mlperf@home-v3.1-cpu/_cm.json b/challenge/run-mlperf@home-v3.1-cpu/_cm.json deleted file mode 100644 index 88f4716cda..0000000000 --- a/challenge/run-mlperf@home-v3.1-cpu/_cm.json +++ /dev/null @@ -1,21 +0,0 @@ -{ - "alias": "run-mlperf@home-v3.1-cpu", - "automation_alias": "challenge", - "automation_uid": "3d84abd768f34e08", - "date_close": "20230817", - "date_open": "20230725", - "experiments": [], - "points": 2, - "sort": -20, - "tags": [ - "run", - "mlperf", - "inference", - "v3.1", - "mlperf-inference-v3.1-simple-cpu" - ], - "title": "Work with the community to find the most efficient CPUs (Intel/AMD/Arm) for BERT and MobileNets/EfficientNets (latency, throughput, accuracy, number of cores, frequency, memory size, cost and other metrics)", - "skip": true, - "trophies": true, - "uid": "498f33f3dac647c1" -} diff --git a/challenge/run-mlperf@home-v3.1-cpu/run-cpu-bert-99-deepsparse.md b/challenge/run-mlperf@home-v3.1-cpu/run-cpu-bert-99-deepsparse.md deleted file mode 100644 index b4266ffa97..0000000000 --- a/challenge/run-mlperf@home-v3.1-cpu/run-cpu-bert-99-deepsparse.md +++ /dev/null @@ -1,100 +0,0 @@ -# Introduction - -This guide will help you automatically run the MLPerf inference benchmark v3.1 with BERT-99 model and DeepSparse engine -on any Linux-based system with Intel, AMD or Arm CPU. - -This benchmark is automated by the MLCommons CM language and you should be able to submit official MLPerf v3.1 inference results -for offline scenario in open division and edge category. - -It will require ~20GB of disk space and can take ~2 hours to run on 1 system. - - - - -## Install CM automation language - -Install the [MLCommons CM automation language](https://doi.org/10.5281/zenodo.8105339) as described in this [guide](../../../docs/installation.md). -It is a small Python library with `cm` and `cmr` command line front-ends and minimal dependencies including Python 3+, Git and wget. - -If you encounter problems, please report them at [GitHub](https://github.com/mlcommons/ck/issues). - - -## Install repository with CM automations - -Install the MLCommons repository with [reusable and portable automation recipes (CM scripts)](https://github.com/mlcommons/ck/tree/master/cm-mlops/script) via CM. -These scripts are being developed and shared by the community and MLCommons under Apache 2.0 license -to enable portable, modular, and technology-agnostic benchmarks and applications -that can automatically run with any software, hardware, models and data sets. - -```bash -cm pull repo mlcommons@ck -``` - -You can run it again at any time to pick up the latest updates. - -Note that CM will store all such repositories and downloaded/installed data sets, models and tools -in your `$HOME/CM` directory. - -Since MLPerf benchmarks require lots of space (somethings hundreds of Gigabytes), -you can change the above location to some large scratch disk using `CM_REPOS` -environment variable as follows: - -```bash -export CM_REPOS={new path to CM repositories and data} -echo "CM_REPOS=${CM_REPOS} >> $HOME/.bashrc" -cm pull repo mlcommons@ck -``` - - - -## Setup virtual environment - -We suggest you to setup a Python virtual environment via CM to avoid contaminating your existing Python installation: - -```bash -cm run script "install python-venv" --name=mlperf --version_min=3.8 -export CM_SCRIPT_EXTRA_CMD="--adr.python.name=mlperf" -``` - -CM will install a new Python virtual environment in CM cache and will install all Python dependencies there: -```bash -cm show cache --tags=python-venv -``` - -Note that CM downloads and/or installs models, data sets, packages, libraries and tools in this cache. - -You can clean it at any time and start from scratch using the following command: -```bash -cm rm cache -f -``` - -Alternatively, you can remove specific entries using tags: -```bash -cm show cache -cm rm cache --tags=tag1,tag2,... -``` - - - - -### Do a test run to detect and record the system performance - -```bash -cm run script --tags=generate-run-cmds,inference,_find-performance \ ---model=bert-99 --implementation=reference --device=cpu --backend=deepsparse \ ---category=edge --division=open --quiet --scenario=Offline -``` - -### Do full accuracy and performance run - -``` -cm run script --tags=generate-run-cmds,inference,_submission --model=bert-99 \ ---device=cpu --implementation=reference --backend=deepsparse \ ---execution-mode=valid --results_dir=$HOME/results_dir \ ---category=edge --division=open --quiet --scenario=Offline -``` -### Generate and upload MLPerf submission - -Follow [this guide](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/Submission.md) to generate the submission tree and upload your results. - - diff --git a/challenge/run-mlperf@home-v3.1-cpu/run-cpu-dse-mobilenets-efficientnets-tflite.md b/challenge/run-mlperf@home-v3.1-cpu/run-cpu-dse-mobilenets-efficientnets-tflite.md deleted file mode 100644 index f41b1b463b..0000000000 --- a/challenge/run-mlperf@home-v3.1-cpu/run-cpu-dse-mobilenets-efficientnets-tflite.md +++ /dev/null @@ -1,77 +0,0 @@ -# Introduction - -This guide will help you automatically run the MLPerf inference benchmark v3.1 with multiple variations of MobileNets and EfficientNets -and TFLite on any Linux-based system with Intel, AMD or Arm CPU. - -This benchmark is automated by the MLCommons CM language and you should be able to submit official MLPerf v3.1 inference results -for singlestream scenario in open division and edge category. - -It will require ~140GB of disk space and can take ~2 days to run on 1 system producing 243 MLPerf results -during automatic design space exploration to trade off accuracy vs performance. - - - -## Install CM automation language - -Install the [MLCommons CM automation language](https://doi.org/10.5281/zenodo.8105339) as described in this [guide](../../../docs/installation.md). -It is a small Python library with `cm` and `cmr` command line front-ends and minimal dependencies including Python 3+, Git and wget. - -If you encounter problems, please report them at [GitHub](https://github.com/mlcommons/ck/issues). - - -## Install repository with CM automations - -Install the MLCommons repository with [reusable and portable automation recipes (CM scripts)](https://github.com/mlcommons/ck/tree/master/cm-mlops/script) via CM. -These scripts are being developed and shared by the community and MLCommons under Apache 2.0 license -to enable portable, modular, and technology-agnostic benchmarks and applications -that can automatically run with any software, hardware, models and data sets. - -```bash -cm pull repo mlcommons@ck -``` - -You can run it again at any time to pick up the latest updates. - -Note that CM will store all such repositories and downloaded/installed data sets, models and tools -in your `$HOME/CM` directory. - -Since MLPerf benchmarks require lots of space (somethings hundreds of Gigabytes), -you can change the above location to some large scratch disk using `CM_REPOS` -environment variable as follows: - -```bash -export CM_REPOS={new path to CM repositories and data} -echo "CM_REPOS=${CM_REPOS} >> $HOME/.bashrc" -cm pull repo mlcommons@ck -``` - - - -## Setup virtual environment - -We suggest you to setup a Python virtual environment via CM to avoid contaminating your existing Python installation: - -```bash -cm run script "install python-venv" --name=mlperf --version_min=3.8 -export CM_SCRIPT_EXTRA_CMD="--adr.python.name=mlperf" -``` - -CM will install a new Python virtual environment in CM cache and will install all Python dependencies there: -```bash -cm show cache --tags=python-venv -``` - -Note that CM downloads and/or installs models, data sets, packages, libraries and tools in this cache. - -You can clean it at any time and start from scratch using the following command: -```bash -cm rm cache -f -``` - -Alternatively, you can remove specific entries using tags: -```bash -cm show cache -cm rm cache --tags=tag1,tag2,... -``` - - diff --git a/challenge/run-mlperf@home-v3.1-gpu/README.md b/challenge/run-mlperf@home-v3.1-gpu/README.md deleted file mode 100644 index b6482d3835..0000000000 --- a/challenge/run-mlperf@home-v3.1-gpu/README.md +++ /dev/null @@ -1,65 +0,0 @@ -### Introduction - -The goal of this MLPerf@home challenge is to help the community find -the most efficient Nvidia GPUs for GPT-J 6B model and BERT-99 in terms of -latency, throughput, accuracy, number of cores, frequency, memory size, cost, and other metrics. - -We would like to ask you to run a few [MLPerf inference benchmarks](https://arxiv.org/abs/1911.02549) -with GPT-J and BERT-99 models on one or more systems with different Nvidia GPUs -that you have an access to: laptops, servers, cloud instances... - -You will be able to run benchmarks, collect all metrics and submit results in an automated way -in a native environment or Docker container using the portable and technology-agnostic -[MLCommons Collective Mind automation language (CM)](https://doi.org/10.5281/zenodo.8105339). - -Your name and benchmark submissions will be published in the official MLCommons inference v3.1 results -on September 1, 2023 (**submission deadline: August 17, 2023**), -will be published in the [official leaderboard](https://access.cknowledge.org/playground/?action=contributors), -will be included to the prize draw, and will be presented in our upcoming ACM/HiPEAC events. - -Please report encountered problems using [GitHub issues](https://github.com/mlcommons/ck) -to help the community improve CM automation workflows to run MLPerf benchmarks on any system with any software/hardware stack. - -Thank you in advance for helping the community find Pareto-efficient AI/ML Systems! - -### Minimal requirements - -* GPU: Nvidia -* GPU memory: - * GPT-J 6B: min 24GB - * BERT-99: min 8..16GB -* OS: - * native: any Linux (tested on Ubuntu 22.04) - * Docker: any OS - any Linux (tested on Ubuntu 22.04) -* Disk space: ~30GB per model/data set -* Time to run: - * GPT-J 6B: ~ 1 day - * BERT-99: ~ 2 hours - -### Instructions to run benchmarks and submit results - -* [GPT-J 6B model (24GB min GPU memory); PyTorch+CUDA; native environment](https://github.com/mlcommons/ck/blob/master/cm-mlops/challenge/run-mlperf%40home-v3.1-gpu/run-nvidia-gpu-gpt-j-6b-ref-pytorch.md) -* [BERT-99 model (8GB min GPU memory); TensorRT; Docker](https://github.com/mlcommons/ck/blob/master/cm-mlops/challenge/run-mlperf%40home-v3.1-gpu/run-nvidia-gpu-bert-99-nvidia-docker-tensorrt.md) - -### Results - -All accepted results with submitter names will be publicly available -at the official [MLCommons website](https://mlcommons.org) -and in the [Collective Knowledge explorer (MLCommons CK)](https://access.cknowledge.org/playground/?action=experiments) -along with the reproducibility and automation report to help the community -build efficient AI/ML systems. - -### Organizers - -* [MLCommons Task Force on Automation and Reproducibility](https://cKnowledge.org/mlcommons-taskforce) -* [cTuning.org](https://www.linkedin.com/company/ctuning-foundation) -* [cKnowledge.org](https://www.linkedin.com/company/cknowledge) - -### Advanced challenges - -If you feel that running these benchmarks was relatively easy, -please try [more advanced challenges](https://access.cknowledge.org/playground/?action=challenges), -read about our [plans and long-term vision](https://doi.org/10.5281/zenodo.8105339), -check [CM documentation](https://github.com/mlcommons/ck/blob/master/docs/README.md) -and run other [MLPerf benchmarks](https://github.com/mlcommons/ck/tree/master/docs/mlperf). diff --git a/challenge/run-mlperf@home-v3.1-gpu/_cm.json b/challenge/run-mlperf@home-v3.1-gpu/_cm.json deleted file mode 100644 index af7deeadae..0000000000 --- a/challenge/run-mlperf@home-v3.1-gpu/_cm.json +++ /dev/null @@ -1,20 +0,0 @@ -{ - "alias": "run-mlperf@home-v3.1-gpu", - "automation_alias": "challenge", - "automation_uid": "3d84abd768f34e08", - "date_close": "20230817", - "date_open": "20230725", - "experiments": [], - "points": 2, - "sort": -30, - "tags": [ - "run", - "mlperf", - "inference", - "v3.1", - "mlperf-inference-v3.1-simple-cpu" - ], - "title": "Work with the community to find the most efficient Nvidia GPUs for GPT-J 6B model and BERT (latency, throughput, accuracy, number of cores, frequency, memory size, cost, and other metrics)", - "trophies": true, - "uid": "54230c3b66564cef" -} diff --git a/challenge/run-mlperf@home-v3.1-gpu/run-nvidia-gpu-bert-99-nvidia-docker-tensorrt.md b/challenge/run-mlperf@home-v3.1-gpu/run-nvidia-gpu-bert-99-nvidia-docker-tensorrt.md deleted file mode 100644 index f543c23621..0000000000 --- a/challenge/run-mlperf@home-v3.1-gpu/run-nvidia-gpu-bert-99-nvidia-docker-tensorrt.md +++ /dev/null @@ -1,193 +0,0 @@ -# Introduction - -This guide will help you run the Nvidia implementation of the MLPerf inference benchmark v3.1 -with BERT-99 model and TensorRT on any Linux-based system with Nvidia GPU (8..16GB min memory required) -and Docker. - -This benchmark is semi-automated by the [MLCommons CM language](https://doi.org/10.5281/zenodo.8105339) -and you should be able to submit official MLPerf v3.1 inference results -for all scenarios in closed division and edge category -(**deadline to send us results for v3.1 submission: August 3, 2023**). - - -It will require ~30GB of disk space and can take ~2 hours to run on 1 system. - - -## Install CM automation language - -Install the [MLCommons CM automation language](https://doi.org/10.5281/zenodo.8105339) as described in this [guide](../../../docs/installation.md). -It is a small Python library with `cm` and `cmr` command line front-ends and minimal dependencies including Python 3+, Git and wget. - -If you encounter problems, please report them at [GitHub](https://github.com/mlcommons/ck/issues). - - -## Install repository with CM automations - -Install the MLCommons repository with [reusable and portable automation recipes (CM scripts)](https://github.com/mlcommons/ck/tree/master/cm-mlops/script) via CM. -These scripts are being developed and shared by the community and MLCommons under Apache 2.0 license -to enable portable, modular, and technology-agnostic benchmarks and applications -that can automatically run with any software, hardware, models and data sets. - -```bash -cm pull repo mlcommons@ck -``` - -You can run it again at any time to pick up the latest updates. - -Note that CM will store all such repositories and downloaded/installed data sets, models and tools -in your `$HOME/CM` directory. - -Since MLPerf benchmarks require lots of space (somethings hundreds of Gigabytes), -you can change the above location to some large scratch disk using `CM_REPOS` -environment variable as follows: - -```bash -export CM_REPOS={new path to CM repositories and data} -echo "CM_REPOS=${CM_REPOS} >> $HOME/.bashrc" -cm pull repo mlcommons@ck -``` - - - -## Setup CUDA and Docker container - -### Download CUDA 11.8 - -Nvidia recommends the following version of CUDA to be used with their MLPerf inference implementation: - -``` -wget https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_520.61.05_linux.run -``` - -However, you are very welcome to try another version! - -### Download cuDNN, TensorRT - -For x86 machines, please download the following TAR files: -1. [cuDNN](https://developer.nvidia.com/cudnn) - note that Nvidia recommends `cudnn-linux-x86_64-8.9.2.26_cuda11-archive.tar.xz` - but you are welcome to try another version -2. [TensorRT](https://developer.nvidia.com/tensorrt) - note that Nvidia recommends `TensorRT-8.6.1.6.Linux.x86_64-gnu.cuda-11.8.tar.gz` - but you can try another version - - -### Set up Nvidia Docker container with MLPerf benchmarks - -1. [Install Docker](https://docs.docker.com/engine/install/) and [Nvidia container toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html) - -2. Give Docker permission to the current user - ``` - sudo usermod -aG docker $USER - ``` - Logout and login - Restart docker if required and confirm that Nvidia container toolkit is working by - ``` - nvidia-ctk --version - ``` - -3. Check if Nvidia driver is working properly on the host. - ``` - nvidia-smi - ``` - If the above command produces any error you'll need to install Nvidia drivers on the host. You can do this via CM if you have sudo access - ``` - cmr "install cuda prebuilt _driver" --version=11.8.0 - ``` - - -4. Build the docker container and mount the paths from the host machine. - - *You may need to change --cuda_run_file_path, --tensorrt_tar_file_path and --cudnn_tar_file_path if you downloaded other versions than recommended by Nvidia.* - - *You may want to change the `scratch_path` location as it can take 100s of GBs.* - - ```bash - cm docker script --tags=build,nvidia,inference,server \ - --cuda_run_file_path=$HOME/cuda_11.8.0_520.61.05_linux.run \ - --tensorrt_tar_file_path=$HOME/TensorRT-8.6.1.6.Linux.x86_64-gnu.cuda-11.8.tar.gz \ - --cudnn_tar_file_path=$HOME/cudnn-linux-x86_64-8.9.2.26_cuda11-archive.tar.xz \ - --scratch_path=$HOME/mlperf_scratch \ - --docker_cm_repo=mlcommons@ck \ - --results_dir=$HOME/results_dir \ - --submission_dir=$HOME/submission_dir \ - --adr.compiler.tags=gcc - ``` - -5. At the end of the build you'll get a prompt - please enter your system name such as "aws_nvidia_t4" - (note that space, `-` and other special characters are not allowed), - and say `yes` to generating the configuration files. - - ``` - ============================================ - => A system ID is a string containing only letters, numbers, and underscores - => that is used as the human-readable name of the system. It is also used as - => the system name when creating the measurements/ and results/ entries. - => This string should also start with a letter to be a valid Python enum member name. - => Specify the system ID to use for the current system: phoenix - => Reloaded system list. MATCHED_SYSTEM: KnownSystem.phoenix - => This script will generate Benchmark Configuration stubs for the detected system. - Continue? [y/n]: y - ``` - Now you'll be inside the CM Nvidia docker container and can access Nvidia implementations of MLPerf inference benchmarks. - -6. Once the build is complete, you can run Nvidia implementations of MLPerf inference benchmarks - using the unified CM interface. - - You can also save the container at this stage using [Docker commit](https://docs.docker.com/engine/reference/commandline/commit/) - so that it can be launched later without having to go through the previous steps. - - -### Do a test run to detect and record the system performance - -``` -cmr "generate-run-cmds inference _find-performance _all-scenarios" \ - --model=bert-99 \ - --implementation=nvidia-original \ - --device=cuda \ - --backend=tensorrt \ - --category=edge \ - --division=closed \ - --test_query_count=1000 \ - --quiet -``` - -### Do full accuracy and performance runs - -``` -cmr "generate-run-cmds inference _submission _allscenarios" \ - --model=bert-99 \ - --device=cuda \ - --implementation=nvidia-original \ - --backend=tensorrt \ - --execution-mode=valid \ - --results_dir=$HOME/results_dir \ - --category=edge \ - --division=closed \ - --quiet -``` - -* `--offline_target_qps` and `--singlestream_target_latency` can be used to override the determined performance numbers - -### Populate the README files describing your submission - -``` -cmr "generate-run-cmds inference _populate-readme _all-scenarios" \ - --model=bert-99 \ - --device=cuda \ - --implementation=nvidia-original \ - --backend=tensorrt \ - --execution-mode=valid \ - --results_dir=$HOME/results_dir \ - --category=edge \ - --division=closed \ - --quiet -``` - -### Generate and upload MLPerf submission - -Follow [this guide](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/Submission.md) to generate the submission tree and upload your results. - - -## Questions? Suggestions? - -Please follow the [cTuning foundation](https://cTuning.org), [cKnowledge.org](https://cKnowledge.org) -and [MLCommons](https://mlcommons.org). diff --git a/challenge/run-mlperf@home-v3.1-gpu/run-nvidia-gpu-gpt-j-6b-ref-pytorch.md b/challenge/run-mlperf@home-v3.1-gpu/run-nvidia-gpu-gpt-j-6b-ref-pytorch.md deleted file mode 100644 index 39b1cc0de2..0000000000 --- a/challenge/run-mlperf@home-v3.1-gpu/run-nvidia-gpu-gpt-j-6b-ref-pytorch.md +++ /dev/null @@ -1,314 +0,0 @@ -# Introduction - -This guide will help you run the reference implementation of the MLPerf inference benchmark v3.1 -with GPT-J 6B model and PyTorch on any Linux-based system with Nvidia GPU (24GB min memory required) -using the [MLCommons CM automation language](https://doi.org/10.5281/zenodo.8105339). - -CM will help you to obtain performance and accuracy numbers for GPT-J 6B model on your system -for the SingleStream scenario and submit them to the official MLPerf v3.1 inference benchmarking round -in open division and edge category -(**deadline to send us results for v3.1 submission: August 3, 2023**). - -You can read more about scenarios, divisions and categories of MLPerf inference benchmarks -in this [MLPerf inference benchmark paper](https://arxiv.org/abs/1911.02549) - -our goal is to help the community compare performance, accuracy and other metrics of popular models across diverse systems -in an automated, unified and reproducible way! - -This benchmark will require ~30GB of disk space and can take ~1 day to run on one system -to have a valid MLPerf result. - - - -## Install CM automation language - -Install the [MLCommons CM automation language](https://github.com/mlcommons/ck) as described in this [guide](../../../docs/installation.md). -It is a small Python library with `cm` and `cmr` command line front-ends and minimal dependencies including Python 3+, Git and wget. - -If you encounter problems, please report them at [GitHub](https://github.com/mlcommons/ck/issues). - - -## Install repository with CM automations - -Install the MLCommons repository with [reusable and portable automation recipes (CM scripts)](https://github.com/mlcommons/ck/tree/master/cm-mlops/script) via CM. -These scripts are being developed and shared by the community and MLCommons under Apache 2.0 license -to enable portable, modular, and technology-agnostic benchmarks and applications -that can automatically run with any software, hardware, models and data sets. - -```bash -cm pull repo mlcommons@ck -``` - -You can run it again at any time to pick up the latest updates. - -Note that CM will store all such repositories and downloaded/installed data sets, models, and tools -in your `$HOME/CM` directory. - -Since MLPerf benchmarks require lots of space (somethings hundreds of Gigabytes), -you can change the above location to some large scratch disk using `CM_REPOS` -environment variable as follows: - -```bash -export CM_REPOS={new path to CM repositories and data} -echo "CM_REPOS=${CM_REPOS} >> $HOME/.bashrc" -cm pull repo mlcommons@ck -``` - - - -## Setup virtual environment - -We suggest you to setup a Python virtual environment via CM to avoid contaminating your existing Python installation: - -```bash -cm run script "install python-venv" --name=mlperf --version_min=3.8 -export CM_SCRIPT_EXTRA_CMD="--adr.python.name=mlperf" -``` - -CM will install a new Python virtual environment in CM cache and will install all Python dependencies there: -```bash -cm show cache --tags=python-venv -``` - -Note that CM downloads and/or installs models, data sets, packages, libraries and tools in this cache. - -You can clean it at any time and start from scratch using the following command: -```bash -cm rm cache -f -``` - -Alternatively, you can remove specific entries using tags: -```bash -cm show cache -cm rm cache --tags=tag1,tag2,... -``` - - -## Do the performance run - -Now you can run MLPerf inference benchmark to measure performance of GPT-J using CM command as follows -(note that `cmr` is equivalent to `cm run script`): - -```bash -cm run script --tags=generate-run-cmds,inference,_performance-only \ - --division=open \ - --category=edge \ - --model=gptj-99 \ - --precision=bfloat16 \ - --device=cuda \ - --implementation=reference \ - --backend=pytorch \ - --scenario=SingleStream \ - --env.GPTJ_BEAM_SIZE=1 \ - --execution-mode=valid \ - --results_dir=$HOME/results_dir \ - --quiet -``` - -Note that this command will need to automatically download the model (24GB) -and [CNN Daily Mail dataset (relatively small)](https://github.com/mlcommons/ck/tree/master/cm-mlops/script/get-dataset-cnndm)! - -The benchmark run is expected to finish within 10-100 minutes depending on the performance of your GPU. - -In the end of the valid run, you should see [output](https://github.com/ctuning/mlperf_inference_submissions_v3.1/blob/main/open/cTuning/results/amd_zen4_workstation-reference-gpu-pytorch-v2.0.1-default_config/gptj-99/singlestream/performance/run_1/mlperf_log_summary.txt) similar to - -```txt -================================================ -MLPerf Results Summary -================================================ -SUT name : PySUT -Scenario : SingleStream -Mode : PerformanceOnly -90th percentile latency (ns) : 4751920830 -Result is : VALID - Min duration satisfied : Yes - Min queries satisfied : Yes - Early stopping satisfied: Yes -Early Stopping Result: - * Processed at least 64 queries (201). - * Would discard 9 highest latency queries. - * Early stopping 90th percentile estimate: 5387449249 - * Not enough queries processed for 99th percentile - early stopping estimate (would need to process at - least 662 total queries). - -================================================ -Additional Stats -================================================ -QPS w/ loadgen overhead : 0.33 -QPS w/o loadgen overhead : 0.33 - -Min latency (ns) : 881803157 -Max latency (ns) : 5939081711 -Mean latency (ns) : 3008773902 -50.00 percentile latency (ns) : 2788885477 -90.00 percentile latency (ns) : 4751920830 -95.00 percentile latency (ns) : 5307244203 -97.00 percentile latency (ns) : 5677375096 -99.00 percentile latency (ns) : 5927209480 -99.90 percentile latency (ns) : 5939081711 - -================================================ -Test Parameters Used -================================================ -samples_per_query : 1 -target_qps : 2000 -target_latency (ns): 0 -max_async_queries : 1 -min_duration (ms): 600000 -max_duration (ms): 620000 -min_query_count : 100 -max_query_count : 0 -qsl_rng_seed : 148687905518835231 -sample_index_rng_seed : 520418551913322573 -schedule_rng_seed : 811580660758947900 -accuracy_log_rng_seed : 0 -accuracy_log_probability : 0 -accuracy_log_sampling_target : 0 -print_timestamps : 0 -performance_issue_unique : 0 -performance_issue_same : 0 -performance_issue_same_index : 0 -performance_sample_count : 13368 - -No warnings encountered during test. - -No errors encountered during test. -``` - - -## Do the accuracy run - -```bash -cm run script --tags=generate-run-cmds,inference,_accuracy-only \ - --division=open \ - --category=edge \ - --model=gptj-99 \ - --precision=bfloat16 \ - --device=cuda \ - --implementation=reference \ - --backend=pytorch \ - --scenario=SingleStream \ - --env.GPTJ_BEAM_SIZE=1 \ - --execution-mode=valid \ - --results_dir=$HOME/results_dir \ - --quiet -``` - -This accuracy run can take many hours (typically 12..46 hours). You can estimate it using the QPS (queries per second) -from the previous performance run as follows: - -accuracy time = data set / QPS = 13368 / QPS . - -For example, if your reported QPS is 0.1 (equivalent to 10000 ms latency), it will take 13368/0.1 ~ 37 hours. - - - -## Populate the MLPerf README files describing your submission - -Now you can use CM to automatically populate README files mandated by MLPerf to describe your submission -(we also show you a simpler syntax of `cmr` instead of `cm run script --tags=`): - -```bash -cmr "generate-run-cmds inference _populate-readme" \ - --division=open \ - --category=edge \ - --model=gptj-99 \ - --precision=bfloat16 \ - --device=cuda \ - --implementation=reference \ - --backend=pytorch \ - --scenario=SingleStream \ - --env.GPTJ_BEAM_SIZE=1 \ - --execution-mode=valid \ - --results_dir=$HOME/results_dir \ - --quiet -``` - - -## Generate MLPerf submission - -Unless your organization is an official member of MLCommons, you will be able to participate in the official MLPerf inference community submission -via the cTuning foundation (founding member of MLCommons). - -You should update the following flags in the below CM command: -* Use `--hw_notes_extra` option to add your name to the submission such as `--hw_notes_extra="Result taken by NAME" `. -* Use `--hw_name="My system name"` to give a meaningful system name describing your GPU. - Examples can be seen [here](https://github.com/mlcommons/inference_results_v3.0/tree/main/open/cTuning/systems). -* Use `--submitter=` if your organization is an official MLCommons member and you would like to submit under your organization. - -You should use the master branch of MLCommons inference repo for the submission checker: - -```bash -cmr "generate inference submission" \ - --clean \ - --submitter=cTuning \ - --results_dir=$HOME/results_dir/valid_results \ - --submission_dir=$HOME/inference_submission_tree \ - --preprocess_submission=yes \ - --adr.compiler.tags=gcc \ - --adr.inference-src.version=master \ - --run-checker -``` - -## Push the results to GitHub repo - -1. Create a fork of [this cTuning repo with the community results](https://github.com/ctuning/mlperf_inference_submissions_v3.1). - -2. Run the following command after replacing `--repo_url` with your fork URL. - - ``` - cmr "push github mlperf inference submission" \ - --submission_dir=$HOME/inference_submission_tree \ - --repo_url=https://github.com/ctuning/mlperf_inference_submissions_v3.1/ \ - --commit_message="GPTJ results on added by " - ``` - -3. Create a PR to the [cTuning repo with the community results](https://github.com/ctuning/mlperf_inference_submissions_v3.1) - - - - - - - - - -## Additional performance optimization challenge for interested enthusiasts - -The MLPerf GPT-J inference benchmark is implemented in this [backend.py](https://github.com/mlcommons/inference/blob/master/language/gpt-j/backend.py). - -It is automatically installed and cached by CM. You can find it on your system using this command: -```bash -cd `cm find cache --tags=inference,src,_branch.master`/language/gpt-j -ls backend.py -``` - -The original model is available at the [Hugging Face Zoo](https://huggingface.co/EleutherAI/gpt-j-6b). It was fine-tuned by Intel for this benchmark -and is available at the MLCommons cloud. It is automatically downloaded by CM using [this script](https://github.com/mlcommons/ck/blob/master/cm-mlops/script/get-ml-model-gptj/_cm.json). - -You can try to improve the performance (QPS) on this code or fine-tune model and substitute the default one -in [this line](https://github.com/mlcommons/inference/blob/master/language/gpt-j/backend.py#L27). - -Some examples of fine-tuning can be seen [here](https://betterprogramming.pub/fine-tuning-gpt-j-6b-on-google-colab-or-equivalent-desktop-or-server-gpu-b6dc849cb205). - -Any better performance or accuracy result will be very valuable to the community. - -After any modification, you can redo a quick performance run to see the performance difference. -``` -cm run script --tags=generate-run-cmds,inference,_performance-only \ - --division=open \ - --category=edge \ - --model=gptj-99 \ - --precision=bfloat16 \ - --device=cuda \ - --implementation=reference \ - --backend=pytorch \ - --scenario=SingleStream \ - --env.GPTJ_BEAM_SIZE=1 \ - --execution-mode=valid \ - --results_dir=$HOME/results_dir \ - --quiet -``` - - - diff --git a/challenge/train-llm-for-cm-mlperf-2023/README.md b/challenge/train-llm-for-cm-mlperf-2023/README.md deleted file mode 100644 index 4e9f6cf178..0000000000 --- a/challenge/train-llm-for-cm-mlperf-2023/README.md +++ /dev/null @@ -1,20 +0,0 @@ -### Challenge - -Improve the prototype of our LLM-based assistant to suggest users how to run MLPerf inference benchmarks -using the MLCommons CM automation language: https://access.cknowledge.org/assistant . - -Read [this documentation](https://github.com/mlcommons/ck/blob/master/docs/mlperf/inference/README.md) -to run reference implementations of MLPerf inference benchmarks -using the CM automation language and use them as a base for your developments. - -Check [this ACM REP'23 keynote](https://doi.org/10.5281/zenodo.8105339) to learn more about our open-source project and long-term vision. - - -### Prizes - -* *Get in touch with organizers for more info!* - - -### Organizers - -* [cKnowledge.org](https://www.linkedin.com/company/cknowledge) diff --git a/challenge/train-llm-for-cm-mlperf-2023/_cm.json b/challenge/train-llm-for-cm-mlperf-2023/_cm.json deleted file mode 100644 index ce6009db37..0000000000 --- a/challenge/train-llm-for-cm-mlperf-2023/_cm.json +++ /dev/null @@ -1,21 +0,0 @@ -{ - "alias": "train-llm-for-cm-mlperf-2023", - "automation_alias": "challenge", - "automation_uid": "3d84abd768f34e08", - "date_close_extension": true, - "date_open": "20230704", - "experiments": [], - "points": 3, - "tags": [ - "train", - "improve", - "llm", - "assistant", - "mlperf-llm", - "mlperf-llm-assistant", - "mlperf-assistant" - ], - "title": "Train and improve LLM to suggest users how to run MLPerf inference benchmarks using CM automation language", - "trophies": true, - "uid": "d37bf37a24c44ec3" -} diff --git a/report/mlperf-inference-v3.1-analysis-ctuning/README.md b/report/mlperf-inference-v3.1-analysis-ctuning/README.md deleted file mode 100644 index 9d4b696949..0000000000 --- a/report/mlperf-inference-v3.1-analysis-ctuning/README.md +++ /dev/null @@ -1,93 +0,0 @@ -On this page, we highlight some of the exciting submissions done by CTuning for the MLCommons Inference 3.1 round. - -## Top Results in Edge Category - -In the edge category, Rigel Supercomputers from One Stop Systems achieved the peak offline performance for the four submitted benchmarks - Image classification (ResNet50), Object detection (RetinaNet), Language processing (Bert) and Speech Recognition (RNNT). The below graph compares the peak performance of bert-99 model among the top 10 performing systems. - -![image](https://github.com/ctuning/mlcommons-ck/assets/4791823/9f8e3367-1ca4-4298-8545-285cdedfc991) - - -Nvidia RTX 4090 has the best performance for performance per accelerator, and this accelerator is assembled on a PC made by PCSPECIALIST UK. The below graph compares the performance per accelerator of bert-99 model among the top 10 performing systems. - -![image](https://github.com/ctuning/mlcommons-ck/assets/4791823/c02120cb-eda9-4eef-9e22-56fff4bf23a7) - - -Nvidia RTX 4090 wins the latency metric too for ResNet50, Bert and 3d-unet in the SingleStream scenario. -![image](https://github.com/ctuning/mlcommons-ck/assets/4791823/6d4b39a0-9f39-474a-ac16-5498e281ebad) - -![image](https://github.com/ctuning/mlcommons-ck/assets/4791823/8afb5609-581d-4ee8-be56-731af731f10f) - -![image](https://github.com/ctuning/mlcommons-ck/assets/4791823/5cb88f53-9255-4a0b-98df-a192ba87b125) - - -## Best energy efficient results in Edge category - -For the Speech Recognition model rnnt, CTuning submitted the best power-efficient result on Nvidia Jetson Orin AGX. - -![image](https://github.com/ctuning/mlcommons-ck/assets/4791823/d485aa50-a0d4-4a40-a805-cc2ddc3e0ca6) - - -For the Medical Imaging model 3d-unet where the samples per second is quite low, the best 4 energy efficient results are by CTuning. - -![image](https://github.com/ctuning/mlcommons-ck/assets/4791823/d15297fb-3eff-47c9-b188-68d438b7f248) - -For the Language Processing model bert-99, gloria highend system from Qualcomm tops the energy efficiency metric and CTuning's Nvidia Jetson Orin AGX is at second place. - -![image](https://github.com/ctuning/mlcommons-ck/assets/4791823/22c85404-51f5-44b7-b128-8df4579c635c) - - - -## Benchmarking Rigel Supercomputer - -Rigel Edge Supercomputer from OneStopSytems wins the peak performance for all four submitted models and comfortably beats the second-place system. It also wins the best latency for ResNet50 MultiStream scenario. - - -![image](https://github.com/ctuning/mlcommons-ck/assets/4791823/635f5f29-080f-4c7c-85a5-65fcf438f9e1) - -![image](https://github.com/ctuning/mlcommons-ck/assets/4791823/c993c2f5-a8b7-4a11-b89f-35d96e357e42) - - - - - -## Benchmarking MLPerf Inference Reference Implementations - -We compared the performance of the reference implementation with that of the Nvidia optimized implementation by running both implementations on an Nvidia RTX 4090 GPU. Reference implementation uses fp32 models whereas Nvidia implementation uses quantized models. - -![image](https://github.com/ctuning/mlcommons-ck/assets/4791823/b46bc509-f242-4bc6-a9e8-ec318d09616b) - -![image](https://github.com/ctuning/mlcommons-ck/assets/4791823/404b54d2-a04e-4e5e-861d-43c7d940faf8) - -![image](https://github.com/ctuning/mlcommons-ck/assets/4791823/f5a04e85-269f-485a-8839-348dddcd5eb7) - -## Showcasing Apple Metal Performance - -We benchmarked the performance of Apple metal using Tensorflow-metal. The below graphs show the performance benefit of running inference on Apple meta using tensorflow-metal versus onnxruntime running only on CPUs. - -![image](https://github.com/ctuning/mlcommons-ck/assets/4791823/87385e24-b3b5-4694-8106-2c30eeb393de) - -![image](https://github.com/ctuning/mlcommons-ck/assets/4791823/c9a38dc9-0986-461e-b81d-988297e1771e) - -![image](https://github.com/ctuning/mlcommons-ck/assets/4791823/4b8565b4-7a23-4f29-b450-6eaf00d10f63) - - - - - -## Design Space Exploration For NeuralMagic Deepsparse Library - -Using CM experiment automation we did a design space exploration to find the optimal batch size for the bert-99 compatible sparse models. - -![image](https://github.com/ctuning/mlcommons-ck/assets/4791823/a18088f2-c864-4c16-b714-5b375cf5fc94) - -![image](https://github.com/ctuning/mlcommons-ck/assets/4791823/8bd95c5f-344f-4d9f-9f94-c3024efbce13) - - -## Comparing the performance of Modular MLPerf Inference C++ implementations - -Here we compare the performance of MIL Library used by CTuning and the KILT library used by KRAI both on CPUs and GPUs. This is not an apple-to-apple comparison as KILT used Nvidia Nvidia A1000 GPU and MIL was run on Nvidia RTX 4090 GPU. For CPUs, KILT was run on a [24-core Dell server](https://github.com/mlcommons/inference_results_v3.1/blob/main/closed/Krai/systems/7920t-kilt-onnxruntime_cpu.json) with peak frequency of 4000 MHz whereas MIL was run on a [16 core PCSPECIALIST custom workstation](https://github.com/mlcommons/inference_results_v3.1/blob/main/closed/CTuning/systems/amd_ryzen_workstation-cpp-cpu-onnxruntime-vdefault-default_config.json) with peak frequency of 5900 MHz. - -![image](https://github.com/ctuning/mlcommons-ck/assets/4791823/6d73360a-27ab-4158-b4cc-a5724d6d4c73) - -![image](https://github.com/ctuning/mlcommons-ck/assets/4791823/d6b5516b-4861-4355-badf-65decbf8d3b0) - diff --git a/report/mlperf-inference-v3.1-analysis-ctuning/_cm.json b/report/mlperf-inference-v3.1-analysis-ctuning/_cm.json deleted file mode 100644 index 4860af17b2..0000000000 --- a/report/mlperf-inference-v3.1-analysis-ctuning/_cm.json +++ /dev/null @@ -1,16 +0,0 @@ -{ - "alias": "mlperf-inference-v3.1-analysis-ctuning", - "automation_alias": "report", - "automation_uid": "6462ecdba2054467", - "date":"20230917", - "title":"cTuning's analysis of MLPerf inference v3.1 community results", - "tags": [ - "mlperf", - "inference", - "mlperf-inference", - "v3.1", - "analysis", - "ctuning" - ], - "uid": "ebc483653dbc45b6" -} diff --git a/report/mlperf-inference-v3.1-press-release-ctuning/_cm.json b/report/mlperf-inference-v3.1-press-release-ctuning/_cm.json deleted file mode 100644 index 99d0370a50..0000000000 --- a/report/mlperf-inference-v3.1-press-release-ctuning/_cm.json +++ /dev/null @@ -1,17 +0,0 @@ -{ - "alias": "mlperf-inference-v3.1-press-release-ctuning", - "automation_alias": "report", - "automation_uid": "6462ecdba2054467", - "date": "20230913", - "redirect": "https://www.linkedin.com/pulse/new-milestone-make-mlperf-benchmarks-accessible-everyone-fursin", - "tags": [ - "mlperf", - "inference", - "mlperf-inference", - "v3.1", - "analysis", - "ctuning" - ], - "title": "cTuning press-release about making MLPerf inference accessible to everyone", - "uid": "85ff4a6ac203411e" -} diff --git a/report/mlperf-inference-v3.1-press-release-hpcwire/_cm.json b/report/mlperf-inference-v3.1-press-release-hpcwire/_cm.json deleted file mode 100644 index 159a986735..0000000000 --- a/report/mlperf-inference-v3.1-press-release-hpcwire/_cm.json +++ /dev/null @@ -1,17 +0,0 @@ -{ - "alias": "mlperf-inference-v3.1-press-release-hpcwire", - "automation_alias": "report", - "automation_uid": "6462ecdba2054467", - "date": "20230913", - "tags": [ - "mlperf", - "inference", - "mlperf-inference", - "v3.1", - "analysis", - "ctuning" - ], - "redirect": "https://www.hpcwire.com/2023/09/13/mlperf-releases-latest-inference-results-and-new-storage-benchmark", - "title": "HPCWire about MLPerf inference v3.1 and storage results (with cTuning/cKnowledge coverage)", - "uid": "50960565640142d6" -} diff --git a/report/mlperf-inference-v4.0-press-release-ctuning/_cm.json b/report/mlperf-inference-v4.0-press-release-ctuning/_cm.json deleted file mode 100644 index 15c3fa6c42..0000000000 --- a/report/mlperf-inference-v4.0-press-release-ctuning/_cm.json +++ /dev/null @@ -1,17 +0,0 @@ -{ - "alias": "mlperf-inference-v4.0-press-release-ctuning", - "automation_alias": "report", - "automation_uid": "6462ecdba2054467", - "date": "20230913", - "redirect": "https://www.linkedin.com/pulse/new-cm-mlperf-automation-helps-benchmark-commodity-hardware-fursin-61noe", - "tags": [ - "mlperf", - "inference", - "mlperf-inference", - "v4.0", - "analysis", - "ctuning" - ], - "title": "cTuning press-release about a new version of the CM workflow to automate MLPerf", - "uid": "acc35b8e9ed14c98" -} diff --git a/script/app-mlperf-inference/_cm.yaml b/script/app-mlperf-inference/_cm.yaml index 6e95a00827..ffb4a26b86 100644 --- a/script/app-mlperf-inference/_cm.yaml +++ b/script/app-mlperf-inference/_cm.yaml @@ -66,6 +66,8 @@ input_mapping: tp_size: CM_NVIDIA_TP_SIZE use_dataset_from_host: CM_USE_DATASET_FROM_HOST +predeps: False + # Duplicate CM environment variables to the ones used in native apps env_key_mappings: CM_HOST_: HOST_ diff --git a/script/run-mlperf-inference-app/_cm.yaml b/script/run-mlperf-inference-app/_cm.yaml index cf390bc3ab..5b9d4b1512 100644 --- a/script/run-mlperf-inference-app/_cm.yaml +++ b/script/run-mlperf-inference-app/_cm.yaml @@ -29,6 +29,7 @@ tags: - reference tags_help: "run-mlperf,inference" +predeps: False default_env: CM_MLPERF_IMPLEMENTATION: reference diff --git a/tests/script/check.py b/tests/script/check.py deleted file mode 100644 index dd030c3bb8..0000000000 --- a/tests/script/check.py +++ /dev/null @@ -1,27 +0,0 @@ -def check_return(r): - if 'return' not in r: - raise Exception( - 'CM access function should always return key \'return\'!') - if 'error' in r: - raise Exception(r['error']) - - -def check_list(r, string, found=True): - check_return(r) - if 'list' not in r: - raise Exception('CM search should return a list!') - if len(r['list']) < 1 and found: - raise Exception('CM search returned an empty list for ' + string) - if len(r['list']) > 0 and not found: - raise Exception('CM search returned at lease one entry for ' + string) - - -def check_key_value(d, key, value, absent_ok=False): - if not d.get(key): - if absent_ok: - return True - else: - raise Exception(f"{key} is missing. Current values are {d}") - elif d[key] != value: - raise Exception( - f"{key} is not having the expected value of {value}. Current value is {d[key]}") diff --git a/tests/script/process_dockerfile.py b/tests/script/process_dockerfile.py deleted file mode 100644 index d9abd2abeb..0000000000 --- a/tests/script/process_dockerfile.py +++ /dev/null @@ -1,33 +0,0 @@ -import sys -import os -import cmind as cm -import check as checks -import json -import yaml - -files = sys.argv[1:] - -for file in files: - if not os.path.isfile(file): - continue - if not file.endswith("_cm.json") and not file.endswith("_cm.yaml"): - continue - if not file.startswith(os.path.join("cm-mlops", "script")): - continue - - script_path = os.path.dirname(file) - - f = open(file) - - if file.endswith(".json"): - data = json.load(f) - elif file.endswith(".yaml"): - data = yaml.safe_load(f) - - uid = data['uid'] - - r = cm.access({'action': 'dockerfile', - 'automation': 'script', - 'artifact': uid, - 'quiet': 'yes'}) - checks.check_return(r) diff --git a/tests/script/process_readme.py b/tests/script/process_readme.py deleted file mode 100644 index de7e04033e..0000000000 --- a/tests/script/process_readme.py +++ /dev/null @@ -1,27 +0,0 @@ -import sys -import os -import cmind as cm -import check as checks -import json -import yaml - -files = sys.argv[1:] - -for file in files: - if not os.path.isfile(file): - continue - if not file.endswith("_cm.json") and not file.endswith("_cm.yaml"): - continue - if not file.startswith(os.path.join("cm-mlops", "script")): - continue - script_path = os.path.dirname(file) - f = open(file) - if file.endswith(".json"): - data = json.load(f) - elif file.endswith(".yaml"): - data = yaml.safe_load(f) - uid = data['uid'] - - r = cm.access({'action': 'doc', 'automation': 'script', - 'artifact': uid, 'quiet': 'yes'}) - checks.check_return(r) diff --git a/tests/script/process_tests.py b/tests/script/process_tests.py deleted file mode 100644 index 8012d097b6..0000000000 --- a/tests/script/process_tests.py +++ /dev/null @@ -1,38 +0,0 @@ -import sys -import os -import cmind as cm -import check as checks -import json -import yaml - -files = sys.argv[1:] - -for file in files: - print(file) - if not os.path.isfile(file) or not "script" in file: - continue - if not file.endswith("_cm.json") and not file.endswith("_cm.yaml"): - continue - script_path = os.path.dirname(file) - f = open(file) - if file.endswith(".json"): - data = json.load(f) - elif file.endswith(".yaml"): - data = yaml.safe_load(f) - if data.get('uid', '') == '': - continue # not a CM script meta - uid = data['uid'] - - ii = { - 'action': 'test', 'automation': 'script', 'artifact': uid, 'quiet': 'yes', 'out': 'con' - } - if os.environ.get('DOCKER_CM_REPO', '') != '': - ii['docker_cm_repo'] = os.environ['DOCKER_CM_REPO'] - if os.environ.get('DOCKER_CM_REPO_BRANCH', '') != '': - ii['docker_cm_repo_branch'] = os.environ['DOCKER_CM_REPO_BRANCH'] - if os.environ.get('TEST_INPUT_INDEX', '') != '': - ii['test_input_index'] = os.environ['TEST_INPUT_INDEX'] - print(ii) - r = cm.access(ii) - - checks.check_return(r) diff --git a/tests/script/test_deps.py b/tests/script/test_deps.py deleted file mode 100644 index 37d75fd4c4..0000000000 --- a/tests/script/test_deps.py +++ /dev/null @@ -1,25 +0,0 @@ -# This test covers version, variation, compilation from src, add_deps, -# add_deps_recursive, deps, post_deps - -import cmind as cm -import check as checks - -# MLPerf v3.0 inference is now very outdated and we are testing inference -# in separate tests - -# r = cm.access({'action':'run', 'automation':'script', 'tags': 'generate-run-cmds,mlperf', 'adr': -# {'loadgen': {'version': 'r3.0'}, 'compiler': {'tags': "gcc"}}, 'env': {'CM_MODEL': 'resnet50', -# 'CM_DEVICE': 'cpu', 'CM_BACKEND': 'onnxruntime'}, 'quiet': 'yes'}) -# checks.check_return(r) -# -# r = cm.access({'action':'search', 'automation': 'cache', 'tags': 'loadgen,version-r3.0,deps-python-non-virtual'}) -# checks.check_list(r, "loadgen,version-r3.0,deps-python-non-virtual") -# -# r = cm.access({'action':'search', 'automation': 'cache', 'tags': 'inference,src,version-r3.0'}) -# checks.check_list(r, "inference,src,version-r3.0") -# -# r = cm.access({'action':'run', 'automation':'script', 'tags': 'app,mlperf,inference,generic,_python,_resnet50,_onnxruntime,_cpu,_r3.0_default', 'adr': {'mlperf-implementation': { 'version': 'master'}}, 'quiet': 'yes'}) -# checks.check_return(r) -# -# r = cm.access({'action':'run', 'automation':'script', 'tags': 'app,mlperf,inference,generic,_python,_resnet50,_tf,_cpu,_r3.0_default', 'adr': {'mlperf-implementation': { 'version': 'master'}}, 'quiet': 'yes'}) -# checks.check_return(r) diff --git a/tests/script/test_docker.py b/tests/script/test_docker.py deleted file mode 100644 index 991ef04030..0000000000 --- a/tests/script/test_docker.py +++ /dev/null @@ -1,23 +0,0 @@ -# This test covers version, variation, compilation from src, -# add_deps_recursive, post_deps - -import cmind as cm -import check as checks - -r = cm.access({'action': 'run', - 'automation': 'script', - 'tags': 'run,docker,container', - 'add_deps_recursive': { - 'compiler': {'tags': "gcc"} - }, - 'image_name': 'cm-script-app-image-classification-onnx-py', - 'env': { - 'CM_DOCKER_RUN_SCRIPT_TAGS': 'app,image-classification,onnx,python', - 'CM_MLOPS_REPO': 'mlcommons@cm4mlops', - 'CM_MLOPS_REPO_BRANCH': 'mlperf-inference', - 'CM_DOCKER_IMAGE_BASE': 'ubuntu:22.04' - }, - 'quiet': 'yes' - }) - -checks.check_return(r) diff --git a/tests/script/test_features.py b/tests/script/test_features.py deleted file mode 100644 index b29ee6a7a5..0000000000 --- a/tests/script/test_features.py +++ /dev/null @@ -1,38 +0,0 @@ -# This test covers -# 1. python-virtual-env and update_deps inside customize.py -# 2. cache search using "-" prefix - -import cmind as cm -import check as checks - -r = cm.access({'action': 'run', - 'automation': 'script', - 'tags': 'install,python-venv', - 'name': 'test', - 'quiet': 'yes'}) -checks.check_return(r) - -r = cm.access({'action': 'search', 'automation': 'cache', - 'tags': 'get,python,virtual,name-test'}) -checks.check_list(r, "get,python-venv") - -r = cm.access({'action': 'run', - 'automation': 'script', - 'tags': 'get,dataset,preprocessed,imagenet,_NHWC', - 'quiet': 'yes'}) -checks.check_return(r) - -r = cm.access({'action': 'search', 'automation': 'cache', - 'tags': 'get,dataset,preprocessed,imagenet,-_NCHW'}) -checks.check_list(r, "_NHWC") - -r = cm.access({'action': 'search', 'automation': 'cache', - 'tags': 'get,dataset,preprocessed,imagenet,-_NHWC'}) -# checks.check_list(r, "-_NHWC", False) - - -r = cm.access({'action': 'run', 'automation': 'script', - 'tags': 'test-scripts,_v1,_v2'}) -new_env = r['new_env'] -checks.check_key_value(new_env, "CM_VAR1", "combv1v2") -checks.check_key_value(new_env, "CM_VAR2", "constv2") diff --git a/tests/script/test_install.py b/tests/script/test_install.py deleted file mode 100644 index d4fb93ec70..0000000000 --- a/tests/script/test_install.py +++ /dev/null @@ -1,15 +0,0 @@ -# This test covers script installation, version, shared library install - -import cmind as cm -import check as checks - -r = cm.access({'action': 'run', - 'automation': 'script', - 'tags': 'python,src,install,_shared', - 'version': '3.9.10', - 'quiet': 'true'}) -checks.check_return(r) - -r = cm.access({'action': 'search', 'automation': 'cache', - 'tags': 'python,src,install,_shared,version-3.9.10'}) -checks.check_list(r, "python,src,install,_shared,version-3.9.10") diff --git a/tests/test_cm.py b/tests/test_cm.py deleted file mode 100644 index 821e1571d6..0000000000 --- a/tests/test_cm.py +++ /dev/null @@ -1,17 +0,0 @@ -try: - import cmind as cm - - r = cm.access(['test', 'script']) - if 'return' not in r: - raise Exception( - 'CM access function should always return key \'return\'!') - exit(0) - -except ImportError as e: - from sys import stderr - from subprocess import call - print( - 'WARNING: CM module for python is not installed & jupyter notebooks will not be supported', - file=stderr) - retcode = call(['cm', 'test', 'script']) - exit(retcode) diff --git a/tests/test_search_speed.py b/tests/test_search_speed.py deleted file mode 100644 index 577c4f0b80..0000000000 --- a/tests/test_search_speed.py +++ /dev/null @@ -1,26 +0,0 @@ -import cmind as cm -import time - -times = [] - -steps = 10 - -print('Running search with tags {} times ...'.format(steps)) - -for step in range(steps): - - start = time.time() - r = cm.access({'action': 'search', - 'automation': 'script', - 'tags': 'detect,os'}) - timer = time.time() - start - - if r['return'] > 0: - cm.error(r) - - times.append(timer) - -step = 0 -for t in times: - step += 1 - print("{}) {:0.3f} sec.".format(step, t)) diff --git a/tests/tutorials/test_tutorial_retinanet.py b/tests/tutorials/test_tutorial_retinanet.py deleted file mode 100644 index dcca78f205..0000000000 --- a/tests/tutorials/test_tutorial_retinanet.py +++ /dev/null @@ -1,37 +0,0 @@ -# This test covers version, variation, compilation from src, add_deps, -# add_deps_recursive, deps, post_deps - -import check as checks -import cmind as cm -from pathlib import Path -import sys -import os - -sys.path.insert( - 1, - os.path.join( - Path(__file__).parent.parent.resolve(), - "script")) - -r = cm.access({'action': 'run', 'automation': 'script', 'tags': 'app,mlperf,inference,generic,_cpp,_retinanet,_onnxruntime,_cpu', 'adr': - {'python': {'version_min': '3.8'}, 'compiler': {'tags': "gcc"}, 'openimages-preprocessed': {'tags': '_50'}}, 'scenario': 'Offline', - 'mode': 'accuracy', 'test_query_count': '10', 'rerun': 'true', 'quiet': 'yes'}) -checks.check_return(r) - -r = cm.access({'action': 'run', 'automation': 'script', 'tags': 'app,mlperf,inference,generic,_cpp,_retinanet,_onnxruntime,_cpu', 'adr': - {'python': {'version_min': '3.8'}, 'compiler': {'tags': "gcc"}, 'openimages-preprocessed': {'tags': '_50'}}, 'scenario': 'Offline', - 'mode': 'performance', 'test_query_count': '10', 'rerun': 'true', 'quiet': 'yes'}) -checks.check_return(r) - -r = cm.access({'action': 'run', - 'automation': 'script', - 'tags': 'install,python-venv', - 'version': '3.10.8', - 'name': 'mlperf'}) -checks.check_return(r) - -r = cm.access({'action': 'run', 'automation': 'script', 'tags': 'run,mlperf,inference,generate-run-cmds,_submission,_short', 'adr': - {'python': {'name': 'mlperf', 'version_min': '3.8'}, 'compiler': {'tags': "gcc"}, 'openimages-preprocessed': {'tags': '_50'}}, 'submitter': 'Community', - 'implementation': 'cpp', 'hw_name': 'default', 'model': 'retinanet', 'backend': 'onnxruntime', 'device': 'cpu', 'scenario': 'Offline', - 'test_query_count': '10', 'clean': 'true', 'quiet': 'yes'}) -checks.check_return(r) diff --git a/tests/tutorials/test_tutorial_tvm.py b/tests/tutorials/test_tutorial_tvm.py deleted file mode 100644 index 6901a31693..0000000000 --- a/tests/tutorials/test_tutorial_tvm.py +++ /dev/null @@ -1,28 +0,0 @@ -# This test covers version, variation, compilation from src, add_deps, -# add_deps_recursive, deps, post_deps - -import check as checks -import cmind as cm - -from pathlib import Path -import sys -import os - -sys.path.insert( - 1, - os.path.join( - Path(__file__).parent.parent.resolve(), - "script")) - -r = cm.access({'action': 'run', 'automation': 'script', 'tags': 'run,mlperf,inference,generate-run-cmds', 'adr': - {'python': {'name': 'mlperf', 'version_min': '3.8'}}, 'submitter': 'Community', - 'implementation': 'python', 'hw_name': 'default', 'model': 'resnet50', 'backend': 'tvm-onnx', 'device': 'cpu', 'scenario': 'Offline', - 'mode': 'accuracy', 'test_query_count': '5', 'clean': 'true', 'quiet': 'yes'}) -checks.check_return(r) - - -r = cm.access({'action': 'run', 'automation': 'script', 'tags': 'run,mlperf,inference,generate-run-cmds,_submission', 'adr': - {'python': {'name': 'mlperf', 'version_min': '3.8'}}, 'submitter': 'Community', - 'implementation': 'python', 'hw_name': 'default', 'model': 'resnet50', 'backend': 'tvm-onnx', 'device': 'cpu', 'scenario': 'Offline', - 'test_query_count': '500', 'clean': 'true', 'quiet': 'yes'}) -checks.check_return(r) diff --git a/tests/tutorials/test_tutorial_tvm_pip_ge.py b/tests/tutorials/test_tutorial_tvm_pip_ge.py deleted file mode 100644 index f95521f440..0000000000 --- a/tests/tutorials/test_tutorial_tvm_pip_ge.py +++ /dev/null @@ -1,26 +0,0 @@ -import check as checks -import cmind as cm - -from pathlib import Path -import sys -import os - -sys.path.insert( - 1, - os.path.join( - Path(__file__).parent.parent.resolve(), - "script")) - -r = cm.access({'action': 'run', 'automation': 'script', 'tags': 'run,mlperf,inference,generate-run-cmds', 'adr': - {'python': {'name': 'mlperf', 'version_min': '3.8'}, 'tvm': { - 'tags': '_pip-install'}, 'tvm-model': {'tags': '_graph_executor'}}, - 'submitter': 'Community', 'implementation': 'python', 'hw_name': 'default', 'model': 'resnet50', 'backend': 'tvm-onnx', - 'device': 'cpu', 'scenario': 'Offline', 'mode': 'accuracy', 'test_query_count': '5', 'clean': 'true', 'quiet': 'yes'}) -checks.check_return(r) - -r = cm.access({'action': 'run', 'automation': 'script', 'tags': 'run,mlperf,inference,generate-run-cmds,_submission,_short', 'adr': - {'python': {'name': 'mlperf', 'version_min': '3.8'}, 'tvm': { - 'tags': '_pip-install'}, 'tvm-model': {'tags': '_graph_executor'}}, - 'submitter': 'Community', 'implementation': 'python', 'hw_name': 'default', 'model': 'resnet50', 'backend': 'tvm-onnx', - 'device': 'cpu', 'scenario': 'Offline', 'test_query_count': '500', 'clean': 'true', 'quiet': 'yes'}) -checks.check_return(r) diff --git a/tests/tutorials/test_tutorial_tvm_pip_vm.py b/tests/tutorials/test_tutorial_tvm_pip_vm.py deleted file mode 100644 index ab9244bf01..0000000000 --- a/tests/tutorials/test_tutorial_tvm_pip_vm.py +++ /dev/null @@ -1,27 +0,0 @@ -# This test covers version, variation, compilation from src, add_deps, -# add_deps_recursive, deps, post_deps - -import check as checks -import cmind as cm - -from pathlib import Path -import sys -import os - -sys.path.insert( - 1, - os.path.join( - Path(__file__).parent.parent.resolve(), - "script")) - -r = cm.access({'action': 'run', 'automation': 'script', 'tags': 'run,mlperf,inference,generate-run-cmds', 'adr': - {'python': {'name': 'mlperf', 'version_min': '3.8'}, 'tvm': {'tags': '_pip-install'}}, 'submitter': 'Community', - 'implementation': 'python', 'hw_name': 'default', 'model': 'resnet50', 'backend': 'tvm-onnx', 'device': 'cpu', 'scenario': 'Offline', - 'mode': 'accuracy', 'test_query_count': '5', 'clean': 'true', 'quiet': 'yes'}) -checks.check_return(r) - -r = cm.access({'action': 'run', 'automation': 'script', 'tags': 'run,mlperf,inference,generate-run-cmds,_submission,_short', 'adr': - {'python': {'name': 'mlperf', 'version_min': '3.8'}, 'tvm': {'tags': '_pip-install'}}, 'submitter': 'Community', - 'implementation': 'python', 'hw_name': 'default', 'model': 'resnet50', 'backend': 'tvm-onnx', 'device': 'cpu', 'scenario': 'Offline', - 'test_query_count': '500', 'clean': 'true', 'quiet': 'yes'}) -checks.check_return(r)