diff --git a/.github/workflows/test-rnnt.yml b/.github/workflows/test-rnnt.yml old mode 100755 new mode 100644 diff --git a/.github/workflows/test-submission-checker.yml b/.github/workflows/test-submission-checker.yml new file mode 100644 index 000000000..26322d858 --- /dev/null +++ b/.github/workflows/test-submission-checker.yml @@ -0,0 +1,36 @@ +# This workflow will install Python dependencies, run tests and lint with a variety of Python versions +# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions + +name: Test for MLPerf inference submission checker using CM script automation + +on: + pull_request: + branches: [ "master", "dev" ] + paths: + - tools/submission/** + - .github/workflows/test-submission-checker.yml + - '!**.md' + +jobs: + build: + + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python-version: [ "3.8", "3.11", "3.12" ] + + steps: + - uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python3 -m pip install cmind + cm pull repo mlcommons@ck + git clone https://github.com/mlcommons/inference_results_v4.0 + - name: Test MLPerf inference submission checker + run: | + cm run script --tags=run,mlperf,inference,submission,checker --input=`pwd`/inference_results_v4.0 --quiet diff --git a/retired_benchmarks/never_adopted/language/gpt3/megatron/DATASETS_MODELS.md b/retired_benchmarks/never_adopted/language/gpt3/megatron/DATASETS_MODELS.md new file mode 100644 index 000000000..6cf9edae1 --- /dev/null +++ b/retired_benchmarks/never_adopted/language/gpt3/megatron/DATASETS_MODELS.md @@ -0,0 +1,8 @@ +# Datasets + +This is a comprehensive list of public datasets and models used by this repository. + +| Name (Link/Source) | Framework | Use Case | +|--------------------| --------- | -------- | +| [cnn_dailymail (Hugging Face)](https://huggingface.co/datasets/cnn_dailymail) | PyTorch | Text Summarization | +| [gpt-3-175b (Megatron implementation)](https://github.com/mlcommons/training/tree/master/large_language_model/megatron-lm) | PyTorch | Text Summarization | diff --git a/retired_benchmarks/never_adopted/language/gpt3/megatron/LICENSE b/retired_benchmarks/never_adopted/language/gpt3/megatron/LICENSE new file mode 100644 index 000000000..25ae41106 --- /dev/null +++ b/retired_benchmarks/never_adopted/language/gpt3/megatron/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2023 Rohan Taori, Ishaan Gulrajani, Tianyi Zhang, Yann Dubois, Xuechen Li + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/retired_benchmarks/never_adopted/language/gpt3/megatron/README.md b/retired_benchmarks/never_adopted/language/gpt3/megatron/README.md new file mode 100644 index 000000000..23900f715 --- /dev/null +++ b/retired_benchmarks/never_adopted/language/gpt3/megatron/README.md @@ -0,0 +1,139 @@ +# GPT-3 Reference Implementation + +## Setup Instructions + +```bash +WORK_DIR=$PWD +# Create Environment (conda) +conda create -n llm python=3.9 -y +conda activate llm +``` + +### Download repositories +```bash +cd $HOME +git clone --recurse-submodules https://github.com/mlcommons/inference.git --depth 1 +git clone https://github.com/NVIDIA/apex.git +git clone https://github.com/mlcommons/training.git --branch inference-megatron +cd $HOME/apex +git checkout -b language 2d8302a6c12e202f7b40b13a43daa95f326fd0ea +``` + + +### install requirements +```bash +pip install torch==1.13.0 torchvision==0.14.0 datasets evaluate accelerate simplejson nltk rouge_score pybind11 Ninja numpy==1.19.5 sentencepiece zarr tensorstore +pip install git+https://github.com/NVIDIA/mlperf-common.git +pip install git+https://github.com/mlperf/logging.git +sudo apt install pybind11-dev +``` + +#### install apex +For `pip >= 23.1` +```bash +cd $HOME/apex +pip install -v --disable-pip-version-check --no-cache-dir --no-build-isolation --config-settings "--build-option=--cpp_ext" --config-settings "--build-option=--cuda_ext" ./ +``` +Otherwise +```bash +cd $HOME/apex +pip install -v --disable-pip-version-check --no-cache-dir --no-build-isolation --global-option="--cpp_ext" --global-option="--cuda_ext" ./ +``` +**Warning:** Make sure the Nvidia driver version and the pytorch's version of cuda match + +This step takes a several minutes. You can cache this step by running: +For `pip >= 23.1` +```bash +cd $HOME/apex +pip wheel -v --disable-pip-version-check --no-cache-dir --no-build-isolation --config-settings "--build-option=--cpp_ext" --config-settings "--build-option=--cuda_ext" ./ +``` +Otherwise +```bash +cd $HOME/apex +pip wheel -v --disable-pip-version-check --no-cache-dir --no-build-isolation --global-option="--cpp_ext" --global-option="--cuda_ext" ./ +``` +Afterthat, you can store the whl file and simply run +```bash +pip install .whl +``` + +### Build Loadgen +```sh +cd $HOME/inference/loadgen +CFLAGS="-std=c++14 -O3" python setup.py develop +``` + + +### Download & Process Dataset +Downloads the raw data, processes and saves it as json file inside data/ +```bash +cd $HOME/inference/language/gpt3/megatron +python download_cnndm.py +``` +### Calibration +Downloads CNN-Daily Mail dataset and creates the calibration dataset (JSON) for post-training quantization +```bash +cd $HOME/inference/language/gpt3/megatron +pip install datasets +python prepare-calibration.py --calibration-list-file calibration-list.txt --output-dir +``` +### Download tokenizer files +TODO: Share tokenizer links + +Temporary private link: +```bash +cd $HOME/inference/language/gpt3/megatron/data/ +gsutil cp gs://mlperf-llm-public2/vocab/c4_en_301_5Mexp2_spm.model . +``` +### Download GPT-3 model +TODO: Share checkpoint link + +Temporary private link: +```bash +cd $HOME/inference/language/gpt3/megatron/ +mkdir model +cd $HOME/inference/language/gpt3/megatron/model/ +gcloud auth login +# gcloud storage cp "gs://mlperf-llm-public2/nv_gpt3ckpt_00011000_megatron_06162023/language_model*" . +gsutil -m rsync -r "gs://mlperf-llm-public2/nv_gpt3ckpt_00011000_megatron_06162023/" . +gsutil cp gs://mlperf-llm-public2/nv_gpt3ckpt_00011000_megatron_06162023/metadata.json . +``` +### Running the Benchmark - Megatron +First set the `MEGATRON_PATH` environment variable: +```bash +export MEGATRON_PATH=$HOME/training/large_language_model/megatron-lm +``` +In one terminal, run the text generation server. For this 8 gpus are necessary: +```bash +cd $HOME/inference/language/gpt3/megatron/ +./run_generation_server.sh +``` +You can make a debug run with one gpu: +```bash +cd $HOME/inference/language/gpt3/megatron/ +./run_generation_server_debug.sh +``` + +In another terminal run the benchmark. This will query the server each time a query for the SUT is generated +```bash +cd $HOME/inference/language/gpt3/megatron/ +python main.py --scenario=[Offline | Server | SingleStream] --model-path=./model/ --dataset-path=./data/cnn_eval.json [--accuracy] --max_examples=[Maximum number of examples to consider] +``` +### Evaluate accuracy run +Evaluates the ROGUE scores from the accuracy logs. Only applicable when specifiying [--accuracy] while running main.py +```bash +pip install rouge_score +python evaluation.py --mlperf-accuracy-file ./build/logs/mlperf_log_accuracy.json --dataset-file ./data/cnn_eval.json +``` + +### Reference Model - ROUGE scores +TODO: Compute rouge scores + +### License: +Apache License Version 2.0. + +### Datasets & Models: + +To the extent that any data, datasets or models are referenced by Intel or accessed using tools or code on this site such data, datasets and models are provided by the third party indicated as the source of such content. Intel does not create the data, datasets, or models, provide a license to any third-party data, datasets, or models referenced, and does not warrant their accuracy or quality. By accessing such data, dataset(s) or model(s) you agree to the terms associated with that content and that your use complies with the applicable license. DATASETS [https://github.com/badhri-intel/inference/blob/gpt-j/ref_implementation/language/gpt-j/DATASETS_MODELS.md] + +Intel expressly disclaims the accuracy, adequacy, or completeness of any data, datasets or models, and is not liable for any errors, omissions, or defects in such content, or for any reliance thereon. Intel also expressly disclaims any warranty of non-infringement with respect to such data, dataset(s), or model(s). Intel is not liable for any liability or damages relating to your use of such data, datasets or models. diff --git a/retired_benchmarks/never_adopted/language/gpt3/megatron/backend.py b/retired_benchmarks/never_adopted/language/gpt3/megatron/backend.py new file mode 100644 index 000000000..39605cd7b --- /dev/null +++ b/retired_benchmarks/never_adopted/language/gpt3/megatron/backend.py @@ -0,0 +1,186 @@ +import array +import torch +import requests +import json +import os +import sys +import numpy as np + +import mlperf_loadgen as lg +from dataset import Dataset + + +class SUT_base: + def __init__( + self, + dataset_path, + max_examples, + args, + ): + # TODO : Pass model file name to init instead of args + print("Loading PyTorch model...") + self.model_name = "Megatron-LM" + self.dataset_path = dataset_path + self.url = "http://localhost:5000/api" + self.headers = {"Content-Type": "application/json"} + + self.data_object = Dataset( + self.dataset_path, total_count_override=max_examples, args=args + ) + + self.qsl = lg.ConstructQSL( + self.data_object.count, + self.data_object.perf_count, + self.data_object.LoadSamplesToRam, + self.data_object.UnloadSamplesFromRam, + ) + + self.sut = lg.ConstructSUT(self.issue_queries, self.flush_queries) + + def issue_queries(self, query_samples): + print("Number of Samples in query_samples : ", len(query_samples)) + + for i in range(len(query_samples)): + index = query_samples[i].index + input_ids_tensor = self.data_object.source_encoded_input_ids[index] + # input_masks_tensor = self.data_object.source_encoded_attn_masks[index] + input_length_tensor = self.data_object.source_encoded_input_id_lengths[ + index + ] + + pred_output_batch = self.inference_call( + input_ids_tensor, input_length_tensor + ) + + response_array = array.array("B", pred_output_batch[0].tobytes()) + bi = response_array.buffer_info() + response = [lg.QuerySampleResponse(query_samples[i].id, bi[0], bi[1])] + lg.QuerySamplesComplete(response) + if i % 5 == 0: + print("Completed : ", i) + + def inference_call(self, input_ids_tensor, input_length_tensor): + """Common for all scenarios""" + data = {"input_ids": input_ids_tensor, "input_length": input_length_tensor} + response = requests.put(self.url, data=json.dumps(data), headers=self.headers) + if response.status_code != 200: + # TODO: Manage exeption + return None + else: + output = response.json()["output"] + output = np.asarray(output) + return output + + def flush_queries(self): + pass + + def __del__(self): + print("Finished destroying SUT.") + + +class SUT_Offline(SUT_base): + def __init__( + self, dataset_path, max_examples, args, + ): + SUT_base.__init__( + self, + dataset_path, + max_examples, + args, + ) + + """IssueQuery and inference methods implemented in Base class""" + + +class SUT_Server(SUT_base): + def __init__( + self, dataset_path, max_examples, args, + ): + + SUT_base.__init__( + self, + dataset_path, + max_examples, + args, + ) + self.total_samples_done = 0 + self.sut = lg.ConstructSUT(self.issue_queries, self.flush_queries) + print("SUT Server") + + def issue_queries(self, query_samples): + + index = query_samples[0].index + input_ids_tensor = self.data_object.source_encoded_input_ids[index] + # input_masks_tensor = self.data_object.source_encoded_attn_masks[index] + input_length_tensor = self.data_object.source_encoded_input_id_lengths[index] + + pred_output_batch = ( + self.inference_call(input_ids_tensor, input_length_tensor) + ) + + response_array = array.array("B", pred_output_batch[0].tobytes()) + bi = response_array.buffer_info() + responses = [lg.QuerySampleResponse(query_samples[0].id, bi[0], bi[1])] + lg.QuerySamplesComplete(responses) + self.total_samples_done += 1 + if self.total_samples_done % 5 == 0: + print("Completed : ", self.total_samples_done) + + +class SUT_SingleStream(SUT_base): + def __init__( + self, dataset_path, max_examples, args, + ): + SUT_base.__init__( + self, + dataset_path, + max_examples, + args, + ) + self.sut = lg.ConstructSUT(self.issue_queries, self.flush_queries) + self.total_samples_done = 0 + + def issue_queries(self, query_samples): + + index = query_samples[0].index + input_ids_tensor = self.data_object.source_encoded_input_ids[index] + # input_masks_tensor = self.data_object.source_encoded_attn_masks[index] + input_length_tensor = self.data_object.source_encoded_input_id_lengths[index] + + pred_output_batch = ( + self.inference_call(input_ids_tensor, input_length_tensor) + ) + + response_array = array.array("B", pred_output_batch[0].tobytes()) + bi = response_array.buffer_info() + responses = [lg.QuerySampleResponse(query_samples[0].id, bi[0], bi[1])] + lg.QuerySamplesComplete(responses) + self.total_samples_done += 1 + if self.total_samples_done % 5 == 0: + print("Completed : ", self.total_samples_done) + + +def get_SUT( + scenario, + dataset_path, + max_examples, + args, +): + if scenario == "Offline": + return SUT_Offline( + dataset_path, + max_examples, + args, + ) + elif scenario == "Server": + return SUT_Server( + dataset_path, + max_examples, + args, + ) + elif scenario == "SingleStream": + return SUT_SingleStream( + dataset_path, + max_examples, + args, + ) diff --git a/retired_benchmarks/never_adopted/language/gpt3/megatron/calibration-list.txt b/retired_benchmarks/never_adopted/language/gpt3/megatron/calibration-list.txt new file mode 100644 index 000000000..fc5d25884 --- /dev/null +++ b/retired_benchmarks/never_adopted/language/gpt3/megatron/calibration-list.txt @@ -0,0 +1,1000 @@ +eceaa658027ad9625f832368198e11bd2fa38977 +70052e55c12c97a9bf6796a25b6ced8f3ec4be06 +9767fdf0a53da6ee9e2f75582cac5964d80e1b5d +1f8c736647d06c42beb553b25a02e44ca15ca0fb +d3ce7d615ecc15f094d8130654812ad77cd604a3 +55086c3f69cb41b991d3db0c6b10b0aa374788b4 +2745f93afca3edf25dd9ccfd094eef06298f62cb +343644770a597a2dfa7548ba165fa9c6bdc88245 +e2cecb8734918ac6a2d9cc8afcfafb16b1781ae2 +feba32aa9aa3b51fb451bc48a54e78d02efe977f +9c2e4d2f6085ef9f237e6fe1baf83000a264cf93 +d85158494b7041560466f153c4d050362f90a7e6 +1e14852c49e84434ca249951e0fe603610eb68f6 +369d721d1102f0cad726ad3426d79c965a224b28 +b9898d6014353a7411c0cec222996431c832c35f +7cbe104b3203061bb544267879fa316436a1ab5f +f48a6b4fa0827b4c6324bd47dc2e8954141b1a6a +acb5ce76c230bc66633414678bf254387c3d6c85 +d70d5115ec3adc5fb3aee8b1e29c7f0f2db083be +ffbe89f592457d39ab9c28de4fd89fbac2150f81 +d841808ba87a4aabbfe4427c53eb0e2e8a74995c +2d4125c6162f9b4924a262a55bd8fe3faad0b3c7 +95fbe3b3a7e5fb6fa48289885df25d9a6e911d2d +f6ffa98e7d46000bee325e5284a2ac897ba4149d +31e424f7a6fe1f5ec61486eec17a85c78ca2ce8c +2165fd490b9d14ce0cd3784beb2f4a1d10028a1d +4d6b1a85d264397e956c0a03f70de123ed4fff5f +4d20111e71a06547613e161292a51cc44eb74da0 +b90b35dfde9fc08fe1651e36bc02a3f1503e3b6e +2d3b2eb21a6691c764aaa1f27030453fc44331ab +dbf02196bae40e0adbcd1790294748a560b1e45c +0ef5c715acd7a70f51a9d800c9e01bfe69657bed +f0f65f40fc56b69bbfab5d88a157dc52ad967484 +db3575fd124f65a7aeee7b4512b5e0fbebf2c8ea +1234fafb7b6ecc9224d6d979536115771467f4ae +c31f79babaf4d93609ef3ee2966e25428a4fc130 +600b619001a9d840f5bb5ee3362787ee38df26fd +5e68842017bc6f5806d962ae5ddea8490d763b82 +fa87334256af163f8f56d8035d1be4c7909147e9 +826f2414623f8e444725f1c9af593f27b440ebdc +3603896c0fbb49d6231ced092ff7d057db2c43f1 +4b8505e0219b78f9645fb12177d97b8e29821ee5 +3332226f8b4f6c46ed3c66ad0765c3720475254f +97223b7119e264a598afe67e0ba82fbb97fedd2b +87fd2fd13729ba13920591bcc96a83ddf61625e0 +2160c5d812611becf442b5079a7908c2f48f6de7 +559d3b10273acbd4b98ff5557aee92f33e50129d +273c1d2936592874fb599f39dce8d6a0813a49b3 +3e6affd8cc6ead338996574fe1d0cb03ca983ea2 +3b733db6e80a89bb4062c1e661b9f9d4690ea0c8 +4a0f219f4b67d9cda499b71e3ee54bff5736f8c1 +064396600b73dc4418ef783fc82f4fe1ff038f6d +eee1cbcef13cd16096734414da992aa1a7048bee +e190b373844d5d3a59b9abb867de8f0fdffddeda +8700aeab383d481d613759f19717ea4381df1433 +087b05a98112135d9fb1c7628fffb72ae2456e9e +f69c5a3c9ef4bfb195ad7ce2590021121a7afced +82958d258a7fe963f8c9a001010365437bf15fc2 +b6b37b9bc60519fd214d6e980fcbb16da066eb68 +a49ac163d47c1e690f5d3237433194a9d0ab558a +aa35fa6f613b29bf80806552b1bf260f04bbedc2 +c248fc3e54b451a4117f23f5adc88cb8067be3aa +f21eae7e796721088234b885bc6eae0daef05738 +b5c4d6f671adfb997abb53a5f2f54519180df7b5 +457b2ab2b4edb94c4b67c1219451be80dc369e8b +e80b0028e44685e39581ced42c1e1ed9cf44f74e +c2d90734f9228cf3163187ad72405c90bb09d13b +a999f5732f9bbe0991e6e895f9bfd42bdda75bf1 +cea6ac133923e62b186aeb17db37be6640a81200 +7facc85e37ababb8c029257f246fe0934f84a808 +21dcf444d4bec9d4b2a6ffb54112c4cbb797025f +f880779ad1b262aac438a8cf3a6df9c0ecebdada +7313410020b93dea1f223d2ffc0b997385d7886c +b0b2948eac6b4e082bbd420da8dff3de6a187377 +360b51c738cde3fa09cef18c3d7672a1d20d3379 +cdaa77a96e1d96a672548b6dc0bd83bffe6f1619 +cd2cb113b1cd90e2ad235466df3a64dfc956877a +7140dd21ed480a3f47a59c647c1f4e690939caf0 +f2c9d3d8f0622e67574f386197b92570730fa61c +010cd75a5b587285f7697cdb6db6526bcc0320b2 +86983b2cddcc91369ad7d4ff61c9e6d258c78b71 +bad5a939cc0d695a97e7f3fedac53c93f04c3253 +0b8cb37a8d54e1761b3d99b8a6e6f921f07e00ae +613111ac2a3831a7291656bee2def306453552d4 +0e29e9cf2a08c4b35ba840bde03b0537e3821a74 +2f425306562bf94bf5f4567b8c63c5b204a2c414 +8bdc6411954d7163b137c4970f6d6431aeeb9ee6 +be9ef18a4a0a08f94a0340b2df0d0f83144299b0 +cafdb2911d9af6038659354058586d6bd5174338 +0c14f1a16ad395dd9aff4437452cf555ae8858d2 +c3db5ce828128fc91c5bbf59d144e825f49ad271 +273221df42c8dd9e0296cf3ec203c59fa205ecd2 +5abe61623d56bbb0d7bbc79f5ffa96732b3a1d97 +98d6f13e4f3ed36591298430dcb49bddf89003b5 +18686c211cc5f466c48fb8dfaf5eaa00cf3ef0ee +d85b6f625e9e57efaa1e3c21e626dd25e3414758 +69e2ec5703156e86990ba690b2a9d4dd0f5733b0 +cc665a9316a9b22400183776d396940ffeea2fde +319a27f92609641ee0f0fe1f6a88a9aab739b98a +94817c53b53b3979c3d32d197ec26dc489921e6b +c803d0b957abefb26e1fe0ad87aea5fc80180a20 +6c5b9c68e7890f4b683e148ac62dace194b45b59 +b377a0f9dac858e2bf7271bac531591140a56e33 +6604ac30dd3d9a29faf40def3e4549feab4d9d02 +b7fc4ea36690353003cd172e597c456534bb2811 +b42286dd9b577bcc261e838045e51133266f7fce +73f9e1772ab372f36898dc95ef18711434827717 +838a481841bf69bbe3ac8a3b53da54d692ffd084 +0a1c17d84f846eecce39356447e4d556ffdd07fa +98f8a09b98daa05cd412e83d724e4b914e8d921f +f859c4b5c39653753b21c575a98d525fc47c16bf +2abf786570b9f9de945c70d9678cb67dd2a2e57e +f4a4d960ecfb87fe06e38f56310a14d23caddb42 +d96bcf6baf6cf8c61d54ad9c9abd997bab870077 +c6eaf9d97b059f3e824a1ab4ffdfe45494e5f8a1 +8bbbd3d05e22fc3372abdbb471799bd5f5380a75 +09dba9542ee64697d789d47ecd3bd53bc8b0d953 +7d42400c49f9cb313345f7d9ea74c51d0147a4f8 +1f58239bc2fa91c3787bda0c2b9ca5cffaa7510d +a0e187806f8ec8f02e0d329ac114ac44fe69a4b4 +ad2e90052e15364c93011386322f6a5007314348 +adba3dadc7c6a3177346ba9119a8df8f5b81ae0c +af2d712599be471d1ba0b91fa18c347220ca595d +77c610007679f13e1f5d314ddcf4b14c7e57876b +0b1d14b9f6619ced37003ff77f22dbd122fca645 +310fe57a8497cb5215c1d70d9f9a22ab91d5c054 +7056abbadea0a2eec779e157890219171bc98938 +3863aca32c99daf6ef8a0de6f60471bd9c54b885 +b32151c4d36a4b42e9b832e14f539396627f8eae +f72c81d6c240a6dc8395c6ef33b64edc423b1fae +4409a8f066166ba9aad02611f6979b44fc91afae +75474ebca7f3413b4e814794d6ffc13663120bae +55e430f0c68f6f0c4cc996fcb87c2e53233e2738 +5dc954a346ff3dba1629e03f3e6485235d6d4742 +95a92435cdb8f2b51ede4ce6220c5613c8dbfc2c +a39a529543dcbd6c0088cc301bd82173feb5f18d +2b8179710437421d5a1f0d281515725ccebff3f3 +087f4bd441c0032caec0a1f65a139d336a09d133 +3636a522fd19a2ae4ff514319d5c1fc012c4bcb6 +c5f38edba57d815658097ba5ebe37532ca160d7f +b0f17178ff8d37e5343119bd8917e262f386203d +dedac076c1649b0edf55353e9bd374c0cd4ad956 +9a3e0978753d5354eaebdcec8550641314c71b83 +0c9881539c0c5249e911dd70b37cf7f74327b97d +80e02450839ab9c1a08082e404c6f0398ae2e92f +6e85c4357ae3dccaae1b354641d22a359a100d47 +b05f9fa99ca30d7ce2611a6deb139f2274d1ad3b +306a3efeb8ab8919079525b9aa747093bdcab563 +98c31464a9052c1ccddc9cbb71c2529f3fba6f4d +35ffb93ea7e6cb006d5019185815f03c67b94d77 +2f3f088adba0256b27e6da9efce4293106e34291 +02d287e484a9da84424d10eeb0c8f3ae52cbc70c +33dedcbe9423f6031122f8be1f7c2c69ea4ad4ad +03765604d9073697904c2dc4cf29e90b924f36f0 +d2845ecaded68fdc5b372d10c3663441ec8b358d +4cd67a6ead5211ad92e1faaaf71ef28f7da2f593 +806333db217efd7e2a4562bb73e695bad88e712c +1307f117d423d143e6083faa99255c2bf2a2f3fd +c92b96b8114ec521af30fc090cef40c07f9544d6 +04b3d6adf722b3b33524c8d288c74e7db2632a2a +ee5db06cf8ca3010774965c3674c273c680c1611 +709e0af32c2463474cb8ffb85d2dbc07960037c5 +d503ee11d8f7d43c67841ba1b6bd863a6180a223 +709555f0f163e09098b58d03898a9e0d6e7ca0b2 +010f73477cd20c14cab78ad9cef350ac8c0f55b3 +9e95f4744d105fbcbc32a12db7287cb64254325c +6e47b7f6a76cc2728e61f4bdd30bef697d6490d2 +5620139a78269335505edf23a902bb0c9c264e3e +70908608fd62696f99cb3f7a185b226fe32e475d +0e907ac0dd02b47f4a0726790d01f0c57037ec2f +bab2d77edbb5ffb3a7938f16fabd7ad3cc83fae9 +f5d5c855f2e708067e3532980aef101d20c40cab +c50895475b8a401824cc9f1bfaaa8fd7797e172c +98690365a6ab1e82c25ca08c26db63a834c21fb9 +3368d60307efa2661820d3240854967fbbf6fbc9 +1f7b9e38af5bfddef1649c83b119e45063bbee34 +3fc0feedd683b49702d0da9d7d3c36b7be02ca09 +2014aa562c1d05dbbba727db120d9e163fb8f43a +e22249a8da0886b4c1338dbf2e54b766b13f4db6 +8344960457778ea0a4fdbba33e7eebb69aa979bf +2c7fb7b897db7f304961e919cd5ef1a5a93877a4 +e2b409c9d97825acad579abe22e0a37b685d6ca9 +b2fadf8584ecefe2a32cc2eba6590d10dc8a9d26 +5f201c66bf26298986c3dd2aa84818a312a596e3 +8dffb657ffd1e331b99cc00cebf18645e219da12 +3236d66fb8a63916b6fd00c2f2ec417b5cde01b6 +95cf024d3f1e40344f16cf4faab052d6fb1e60cb +9faac843955464da41331af273942e38561c9a8f +d383fbcb1a69ef97a660318a2b36486e5fdd6a44 +9939959cf9cb1a14497e63aec0b88a08ad3e451c +3c0c3ce2681718b816289eeeb3ac343ddc037fc4 +259d8cccfcb9b9edc00d757ec6efecde6fc06110 +9057ca8e09723c9959f923a412e409ee793d0062 +79f5ac0831ef03c2ebb40d325758350937a55313 +3f36f6f4d3317275130051db2405459021f56b8b +e35c3d07dd54243acf4298ee0ce6ea7e4621e90f +115a59d5c4916cb14b4c408bec36bbc6116043cc +5d563efeae0cab135ec70ae4456a4e55bf598aff +2d8d73eade954a63f892414accaf2db229ff3312 +d42bd8a35e147633d3d750266939c6539aecece9 +27155662fda1f5febdbb42e6572dda8d9e31588a +a210a653a08fd0460b52c7eb68bdbde0c40ea63b +fb4be2d8538e5e4418042eb7d81491dc7e94dcc5 +53940e5d960d1b63e5ec84fae802fcd599b20f01 +213dc667b6c665a4257c4afef5e5fd39d42eb01a +08a16f7ffb9968774fe4562acfb79aa6a1a59a2b +d1caa4726d8ac1d9ad611708038db896828f06f7 +67e3d20bbadc184c57efe184ce8ccc402de23bed +ae05bddb7e816fd0e14e95cc525e06caf9392918 +b2bec4804d38db4d01520c4b65f410acb20e4d2a +78a8c13605a8eda09a0ac0f04910b414eed6b765 +ebdd1d2b3891d6f0de29ffa1eeed3f03bbef7912 +d54da603155d9e507b81d7188e1baee2f984a99a +18823ffe4e7d30056229c6b0c3b71f9c72c1d2e1 +86b8d10094b19ab1059b5dd7983f26fc2bb133ca +a16c62ef8dfc132a0a5c406e429a08e1d40b8756 +64e19e8802e2f598c5a84858a4b2c0c43b99877b +3fe31bfa86777b3f4a1bcbb46650f683fa477935 +d6e9929980eb730124e8cf6561991d43f19241e8 +20ffd27dc3be9eb895fe8a5ae3cffcd795ad100f +3960b049e19e3217968723430f3595fb1d4e1dab +846e738db5d5df03f621e5cca067016e84327f16 +2c20a17cc4846b8dc437fd00f84d08cd15d0c8d4 +16cbd9a93ee9067271748479378a31d24390e048 +77677862965b241d7b9c4ea61836ccf09b3e37a4 +3ec8db9d06345bf26aad0ccfd05408880946f4a7 +3747faa432e732538f1636c9aca56f068ec44a4d +f1959058e2074a54c0bddf7afc60131df132415a +216558f2fb3e918840acc2fca7c81f27c7a80e3f +1c7e55eaf41d1e43121755c1cd667d210e45a000 +ce5c91d45d83f9f114814c8db9a1230b2d79eb02 +e735d473af54e1ff29a66b379fff9e88ccd8a164 +7809602d8d9398f05b032bc399a922af1567c56a +e85a828dc7853ddcce5d7d919b07370236fd089d +0f81b75410062d52138ab8a67ae49d03321e991f +c9bfa4787bab601fe2e0749b4fb1e44d3f168373 +c03e79ef13869270df1be0c63ae86dddb7c21bf9 +91e1856c8de122ef09c10589afb9b3728bba9296 +0a661af9686af6c8f298c8309e8e1a96ef0cc08f +08102a4509565732289f843007d08cfa72ea5456 +9b9b7248f513f621089a6cdb956828a3fa6da09b +7e4ec1b8fa3a477f43f00075da2ed26a31db45f2 +4c5589e14718f8d6ef4027baa22b680f556d9ce9 +f63c67c039b3bfb83b3d46f4250e3509c2e9394a +8500584842b1a7abfce6a2453fb9f76c5b39d26d +5f43c3db85393d73b57174a6e3c72884cf1402e6 +1401f556e033d9f10dbdf83e9b5bfcf6a84823d1 +ff7e6d2a2c5fca5f33db717bd68228538fa09f37 +9d6addd57bfee73721c64830eb2d0fd27e8fb9bb +a55a50b6cd898fc79bf4657fef0f0ad44de6a5fc +6af08a408468481f5847013cd8b7f9c0ec7296ec +7d54beb04ea368c6386dc8174ffa1915b3414bb9 +93456d2e7f067d518838df8cd7f32ee85289f4bd +d603f66ebc365627756eab740140ed43f0e5f40d +e9217085cfc52f0fc47d91f2feb681a33f88fb59 +d0912f63112be8069398b3f6c926c727469f1191 +2d1edae390d9f079095606c8bed0a83f5bd5d767 +b6136dd5f245f26dece12bf294d524bf584bed69 +c4a4d6e24e7753e098e09324e903c3fc2cb45f74 +8f49dee3dcf6b505e43475e3b7c15a5e25f0d85c +edc1c91f5eb0547c18877e123cf3ec248ac734d7 +f9f269f3df343d14b11c40286b22f2c54d74d8af +99c98449dd5a99222dae7cfb14bc060852f220e4 +017d27d00eb43678c15cb4a8dd4723a035323219 +61e137b37db0b3157c04fab0a5f4314fe4b03931 +43f54e39221310d45fed028b202e0e26490846be +ff5b1552320e183941d8d58f726f589324035284 +f140814244c9e54cf4ff2085d7d52b2dd87d2737 +0264d85da73237f1967bcab20b2f99313a00250e +7d2215881b5399038a625726794c523be20e567a +77810c807b3c7452a00968927dc8b3b76c2aaf63 +361864a24f139d975cb02736e81d106b6b50de37 +5b08a44dcfaa7da30b066b62e688177ae4c27bc6 +f0bb651f7498ac35c750d4216b3fbdc1c6e83508 +0865448cb045a8b9568e679dbdb5b752ba0e38fc +afbe85965b4aae74bc86d5c56c75fe55e782c7a0 +c4e68babe61c2389be350f11dfc8e2c5ddc9f032 +7de82ac3cca30893284f93cc133d87276f39f8df +0e983ee75b47509844fddf43d06a989b3448376f +645ec5713498f91b494d39bbe8ac6619a20d45e8 +2a853caa0177515501abb206103e15fed7bf2315 +6b675d840afe29591d304e7b52a1edb442decf2e +aa4246332705bc11ed706555620cf99aecace692 +461aa6c463d8ed8a3485519f8347d3e8fd30d5f7 +0963c147bc9d5370ae2062863e776853744c64a7 +b6bc7591f950b6647f2d5cbf11bcfaccd8da0ec8 +aa3cbdb196eb266ebeb48c1be941df20ecc1bb90 +945cbe99df1af1b5db99d8dfcec142e5d0452065 +9c2b9de4b8928f63bfbaecc97bddee210e2cd38a +6ae4c366fec9f8ffb28f74e03fee29f300e4b0c8 +e9797953e895ec7596bb0c80d6c3e13a6170ba32 +4d63952d88ef8b61c631d92744b8b88d5900ba82 +6c668fb743f9af4bb080654040e6416f7e9b5605 +31d2a88aa62215e0046d4db0c0cfcb7390e16762 +941f9ba5091a41a41338a0b5c06ef998ab76bf92 +4f31114f7ead2ec76449bdfba502b576c8cbdc51 +636ed1de3d915dd13e94ea6f83ed418139898672 +e1b8a490189840089a0e42f357d7e18aa04d695f +e92c4914629728b8c18cf61320cf4a34baa77300 +9afcefd2944149fff4d5b74f5b26a39288b7cd59 +f802525632b1c8fa85b43911f07d8129694621c4 +c7b0320fd85f3ef25cee88621de6eb541b399c36 +ee5ee7b755e26ac0eabf5191e7747f6d72ddc84b +65a03a7e863b3a5b97576bb3fdda2d8c4380c706 +cefa54e79f57eec0b1273f69ff7149dcd90c7ee0 +c2add6ebc7d17385f7e0d0d9fca5fc98115c68fc +169f5f6ab3818fc14b9f2471ee0d8dbd61d5e566 +9862b8aab2db9c82fd1012792783a90ec79f7269 +0e1e33051f7d782d2643d645eff67157c37370a7 +8ab2735a3a614a5e95b2f53fabcc04cc482a0abd +1d400e7242d8570c79f9f34c392ce02e217e01b8 +c56d3923764328f6767dec2e5617f562cc88e791 +f9689fb9656132e1c6d186851563f2b968643791 +08d845c78055627eb898cb74bc38274794351b17 +ed3449c7f2b4a2f4f1548af509dc9ab1960e9fa9 +0892fc2908f83d76b147c3ba1847af0056a47e9f +eeb9ec2b66bfce439d6ad3f25e364d3b1d826bc4 +55dc9832dc56cae9f0bf180d2103a1d20c1b1ee8 +b05f9fa99ca30d7ce2611a6deb139f2274d1ad3b +3152602658285f9edadaa1d9cb7cc4948ab8fa54 +ba620c801834cdcd41547b08712734e30e84ae52 +c02f067640c67b1aa5013207c2c7782ac6b97399 +3349e092bb3bf21585d52e72e2c782692932b139 +26e816229351dfe7578c758ba07c4d2d2a891b2b +064f086f49fa410b664d59a0494367c421ed2f8a +1241b04b4380b1a796390d32183e3e738d7b82ff +79879bbee2c8f0b46fe44c80949e24b3c11ff7fb +2f4e7d5a0130b48ba687536a3bd5623fa906f9a8 +a72c848bc3bffda7aed21ece2b07327153fc11f4 +b10cfb970a746327ce47764050473ea27b15f649 +b30975204e2d948c1ca8d33a9f6e755f86d8e200 +e54abc8237ffb5e2172f192200fbde85a100cdcf +0eb8e5740eecc2098cd862cb5d1ff41f9aa97eb5 +0807b672dd1a7ee6f8038649f70a66cfa3ba4fed +e22b4d2a35411b0b2270871f83c19e9f6efbfa67 +6bb9d73ac47b68b90872d97b9ac1e1aa34ae72fa +c83faf99c08fd4d44d9ee38d1c3ef84c273909f2 +cf91d2b46870970ec013ea2ef0567f695ca80261 +151ea6f1dc4a40cf854a8b2d9fed22ea457d2afa +cd29f730499023601901dc9ca801c279637c5a81 +4fbaf01100e4d6ee1823f1b25ba309fe73ffb6d9 +49654512a36b27837b069fef447ebcc460b0c911 +09df70a379653872798f1284efe95240944f6af6 +43e9d988417d90e85868aa09b5c53e2ddf0364bf +5730ccc0f1a125be76253006f14a6d3a39fec5ae +65012f2f3ec9d16629eb8577d149de30257127dd +ae05bddb7e816fd0e14e95cc525e06caf9392918 +a65f3d75c5cbf99deccb00c9b94f91b5ad52a050 +bf6d04b98e0af89f073f4b71c5125017c9aa079b +6f1f25365bd131c0caf19acf0f4fd02a3535f538 +23a87dcd1007f73c4a6278d230aacb6411c71266 +a6d88d33454805c4c3b9f3c50b1b2482048c32a2 +d6392fc14b8c5e61bb6342dfada3b5085dfa691a +ad61eb84269497ea2e8d9e6f3b1a504d9bf82d7f +b90e7d7139f69d50d53d5bab66a560785596bcb4 +237f4b3fafb5bca89627701cabdb01a61ddef306 +8df29408cf5fbf40bcdb5a73d9eff3e30b928638 +02f0cb84d4e8f2c78189f3008c327db6a7dde4b6 +d7449a49a1e808c3d2f2c87f6b6b26dc8cbfe638 +096fb4a6df33a35a8a4c28cf6707d6093b8fb483 +54ce333c923bc3d8107ed1b803575c249d92a7ca +390a82f6f49cad470b3278465d07a9320c163fd3 +b44d920cfa42cb0ea8e279c4401f565577217323 +3c2a82b4460be3eb08988c038156f24e690ce149 +fbfdc61792dcee3d0102859ed2681489b037339b +5d788ec362a874cc113c2204b06fae82d1d70ea7 +e4a186c4590ba156eb3e45862c2a5b4181e2fab3 +5f7c94ff1e4c755c47343046fa0ed6823124b85f +aae35772a13f84876be5fadd919f1265159acae6 +22680dc843e4692474815b0c9ca78b9f4f1a116a +1c23f12e590b2cb4a89314c0b933f12b7193a37b +674cc0ecfb854619f3e50df0e4baecc67c73724a +fe41f09089b134bad7f40be0ea4a6fa7a691655b +12327d1afb02007b3b736570856176234edfa8bf +26fe548ac5f3ff1e700b2cc6890c2d5b152234b4 +861bbb99177d314267023bf3699ec069f3bda6d6 +cdaeefc0fc597b0b591b76d20f979c1207e98880 +2a370c32d2c1464da03bc2440bc96ad23059e428 +6bba3c06659ebeaeca823bb7517baac4425faae5 +e7ddfb8e15144c1a1e48d8b98ce1a44b666c18ae +62dd1f31cd4e2c8250c587b557f4c2be67d5e495 +3ab9f2c8f9512bf98dcd467971b3ffc6d612d308 +00120f91cfcab17bac165f7a4719019a628a9db3 +3ee94d218979b459196743ab0a3d2957f72422c3 +6970a0ff24ea28a7500763ec1b72a671bae412aa +ab356240d60a6e7d6efce1a9638415f13bcf6591 +39d0bcbfe75fb7bbbee7d4bb72a77cebf03e39cf +ae202f1474cd1ea41a5172230fb083e1bf932d17 +7dc9c7399c2e313bbc6264072f6a592b6915b082 +fa79ac193bddc262fb35a468c8bdfaae536bbd7d +412630c97420afb50e5278d3406e0cf4b08d0b77 +eb1cbbbc594b324145c3155bae5614a2553a17c9 +c948920cd3dde6dda8767bafc8173c0c37127430 +43ecf30e43686b2a11f6b329f5046a68082b7272 +49b2330923275c10d5faf66681fea724f9938893 +ea6c2b9b8479325e3c081252b59c61047988736c +d28da9e2bb92814351486125aa35b16d112f3a76 +393a89e4d5b67ce3e29678656d73a0dbc2ee930f +daed716140202e583ad4cdc98fcd8b4b3aa5ae35 +51ce09bfb256ad9ac38a8e071f36b0097f6fa68c +36beb23a74208850b1bec50c966e985aef7e4075 +52894d78609a1022f6dbc4ec4fe32fcc31fa9366 +3988b5a02009b7589973eeb2cb929f2d37c4f409 +cbf788987b75c11aa5c49518c5b3cb45e2c177df +ba1d997f0df5b17a0899bb643a467e95332cf0d8 +4a0453760d334ccbc84d00ff67a87865fabae97b +d39cf4acbfe4fbc26e001266243644ae35beb712 +d05b5e0e3619b87fc46e731ced5111f47ccda50d +bb98ef0258299046aecb9fe0070ec309d1cac401 +080b5e433c62201fde1221066f4d723e3ef427dd +92a6b2e9d9e7da09ab27eee906a8c38c0219f390 +04d7b42001e7249bef064a175aa1236be8211e4c +75855c9acc21043af85c9f161fdb0f68af165771 +2fc69a299b1a7ce0a9dd2552a910f319773f3eb5 +98ebcb90dfca1bfcc7bd2cecf53cc12f7dde6970 +db8025d7b55de50ac56606023c838bb9975dceee +f1530622d659a31a36b4cc5b79d3ad9302ecb384 +a9e946a8ee4153ae7e45143941da7f61fd04321a +c6f17e5ec10ffaed02b111f02af4afa86d347d3e +a7ce2524995e668268028e9f7237dbfbae3cadd1 +407f1d56cdeccb0e313c15ddaac53b186acdbf0c +a8c0086c0ce76f960aedff7a7d28d9009751cc34 +e28316232274fd9444562d8f7b5b6949072d0a2c +ce5ba5110238d225394df833987db22b197dc93d +32f66b696f66911ca0c73e36ee32708d59124f32 +242c50a749fe607d1a652db0e06f453d5d7f80f4 +4375cd51b7ac544735b9c89df29db30369b0eb9b +1458f8a2a10e49686cbc0b5e14a97acc3aa78a30 +c6ce5c4febbfb715453373d447c4b5572f5fcada +0c145a6bf87af0340fe06024ad8ccea391c9134f +b5d3ce4d0549d7802002f2d5e90f05ac35f5db5a +7944055eb6cd49f12b5d42ca0b971eaa6dd51e07 +63c0d33115ed9eedd51c7f34177a113f6b40156b +f2ab65bb852cc93aaeb521f0f65fc2ffd14e996c +c7147ba3242871a59b80860245ab60c3d04c5ecb +d9accb542e2321181468e8f7e490114b30c1cf53 +8ae25fe05b21fc819243746ad7caf4555e11df0f +a3ddfaae625902b0394f854d6b341b21684638c4 +1a71c481191a57d4ae387450f040d1da83c10eb1 +7045e190e940ad597893d85b3336afa77cabe20c +714db9c2b78eb2e4b26fb94c3927bf372a993900 +85ea5ac1300194927b58530756575dbc84dd46af +18de5b9eb32fade90cb550ca65052bc1e0095a99 +667ae97a088538b0b321579c5b5bd12fa101e04a +da9609278b099c165aa343793bc2e03c2ed17752 +27bb197b70f0475abd00cc0db2ffa53de84c9e75 +debde12bfd41f1960cacadf1239f1b50db2624d9 +c7ed3ba86b0e3978955714855a42b4a7d8c67233 +e5572a346b97cbaadbe68f0ec35a09d923a66383 +a729d22dafc85162347b87dd530c05caf64ae2f3 +c12d47630071fbbaa5b10507aa97f02c58aa37c7 +6d9afc0ab84073e890da12d0332a5987ab659d68 +240b251022182eb14ad96aa9f558150f8cb4c543 +000e009f6b1d954d827c9a550f3f24a5474ee82b +a8fd170d0ff3f6178900977ce422ddcded7c6c43 +8c8d04a5274fd92716fcf0926aea0c06e83e7987 +61e06a233a45c987979139488084ccd0012d466c +6d5f7fa2062d3e5ca89760dc09b13a16199d1359 +7e84933971ea1853295b9d73e4b75f3478498c72 +e10c7f72bc3de187cb7adfa31a1f098d0f47bbf9 +f450b642e3e32641ef9878aed22f732d314a8c4a +ef17977a93067d945566356f538640febe56157b +c2d40cafa9bdfbf9d04d096d09a6aab9584c3ef0 +87b1981c3d51bf560e628fec4e65e4bc8f54566a +f9f7fc8e9da723e776abddb6f7e836fe72136eaf +c2e3a01f8e6f8a021a1551f72529f92c9a7703e4 +d7dcb4acee7cca3e54b11ff196c0c26528e665ae +3c2452d9487b0b6b5426fc7c502b4d8115236051 +e430e7bb4e1316f3652bcf9ac93ecda4aae3729b +e21cec4d51eaca6fef39f717a12355853c8e25a8 +0a5c494f13f21e009f531c2a56543d274a8c5932 +3f68dbe78481050ae64297153361f374956140ce +a54ee0a7552c6d6a5fd5ff6e0b67ecf511a8777a +a1d08c47cbfe06de1206493bee12f301386725e2 +a91d9dc6a9ad7fa6aca2c9ca4d9c7aefd1503585 +991ee479aee8194c495fc11e06f91f7b33809161 +39405d7bb73434ad12a0106c15cc194689eb4de5 +89275be2a434addb83a29a275b63113f4500e328 +24a1a847d4d5f74b57a02c5898af9364aa83debf +6ef88240d12dc57a102450dd26ee7a0510a848df +3299e38fb5351c11d9beba7400722773f3b74e6c +99f61cef6386573c8cce688a30fa2cc82a1dc05d +a49f8c20eea4af67ec54408cca737bff98628769 +7cdffadd7c11226ad6973a1707e404fef96dd541 +d853339b5945d07de64c5d8738e89259dbd40401 +137797fd34ea672ca506d1c8848d0da355baf7d9 +9b00d0ce1008b4c3765bf57817dd5241bdeb5c8b +81984aa7d079c46380e4baaf49d078c86466edc7 +78e6ca8bc83669866fdf9fc5ecc19797f4011261 +70172cb5f244c48d2cb41621c35858ccfdf31997 +a509b26bb05b83f9fbdff3465a2acfef5b35ae30 +694c620e98472213a53e932214054137e278a073 +67518afdc981945e4cbf620ff05c773934607a44 +2d3dd578113df1602cf753d2b11a4e802f616990 +4abddb8c6e0df6689eee21bfe27aa231d0ae8dc9 +ee3ed04d53a5c7d1f60a5fc4d7c6832a7f32d3bf +f76269f3e3c431fccf5d9991a8a5da27977646fd +ba30d4f9339aca62283d1df7756b42158f637931 +31dc8c2da848a7eb21c8287e23990cb3ee8b6307 +68ac8fb1f847b3307be2a6a9a0fe66235a5e8c4f +8ce0548b386ac1e48150a945cf36dbb6a0bd0ae7 +2b4439bdb73d8cdb6637d275f426f13135d415fd +3b0b095afa3ef1b73a2bae29a5a131bd02c0e714 +9170bc3cbca5d8f82b02ae1e33128c62fa2a00a1 +4ecac34a02791711bc456edca64c086b9aef357b +d70512d3069e6532b7069fd0c8fda28d75324293 +9e39cf719ab85cc10326ed1d9df2273e75b67b89 +65c44ce096871da2588e1c140ac91ef771fbae97 +a8949852e1a6258f3e7146d5a0b073861d12dd56 +4baf74306ea4d6d60d89c2575484dbb111cbac83 +4a410aafb82e10f1cfee2062b5cf2e038a3d12af +7c7cf4f235cd2c455b2826e96803b1a6a47ba4f8 +d42801a7b4d67e49ed3d417db4efc7dc6d4b5ce0 +9964dc1ba45079060e594be6429829042854b4ff +db275395dd0a2455ae378265850a90a3025fac09 +65f5e35193414c2998a1b5de2f959cc785f1fd6c +19c0f1cd0bf5780a7c2a8abd5d5d8dcdbcf2fb86 +279e575b7c82e95beab30d37836e1e56176d7ee3 +3945e22c503109659c8d463d4674d153a6f5e8ec +e553763df924e731b9aecc68342af73ccf47ad2e +128c6f4780cc59cfb76bde414ce42bbd544efacf +ab3ae67419bec16ebce20cdbcf76f2e8508b35fd +42986dc0132012b6150eb6066fdc1047d57fbf29 +7ca2a2cf7fcf4afe8dc3ebb4e7f8f9a599d5748e +7c478b9c7f099c8149ecaf11b917d41b5cb36011 +6cdb121b56dcc9cced06a26d1b11bdc907e4aaa6 +2d60a7c9131bb6044c9636d42f6888295a519dcc +883b0ca02ebd243bb393bfc6144974539735d64a +d4c710c356bbc78529b427336e4bf7163a904239 +3952896b9156a43e6e2193cb3ae8a71a0cff6923 +79b92f4b2e09bb0bab7af1b036c03bcf075e2682 +6e0f11b530638be7f478b43a715dd3bcd6b17d04 +e7d4a4d0b37b35569ce85c261d6c9ce9b57558d2 +bad9a32c0b3f74e4f54de56f37f24a265f45fce9 +4ec3467fa91e3889a3ba2d695c863b5207ebe9c4 +d8a1ee24cafd2ad6a648b4b62be7b06f446b1a89 +d61dcadd8afce804e85d4b40ec5eacdf37f04fed +1a15d2dc834fbfb276c67a2cd73d7e8cf650bfb2 +08d5f0581bd3f7196c90036e40f615c43c97eec5 +020a823e09ab8f8c2f13f78aba48cd5549848cb7 +fd091455b9ed6ec71dc4eabf4b59ffb5650ea2fc +c0d25282a77168ec25c503cdba87a0b16f73e759 +19094aa75ff7ab5a9331eeefd36c15a201b0ab62 +2e74bfb1f3a9e72a00f6727067bb42cc6d8c4db1 +e3cfe5af31c4c3e4cb9bc30ddae635241b476b19 +d030d0a5f16a4087cc56137190fa0e7ddf19dacf +fb15d9bff157666a98e09d0e75cb0f05d9998e51 +64cbcdde35d49cb5220009a855561f6a440c91c9 +faf5e36c67de12654252e4890b40297de6f0f18e +f7ae25006ab8f8e2beeb218acbc5d273376d54ec +0b95bd0ff7b9d14421fba10a50634c26f3bb0692 +9db0f5d741239f0adf441c69f7037f1143c99fa1 +1a528a69a27510d5b3036ecef3f8ce416cc8a9b0 +3f0ba68515f730c5edcd6b5f7a2487672238b381 +28c9b5d17fd3e52e27b5b8d6d5338f823f8abe96 +52a4638c81b3feb5cbd2b66987b1c7fc1ca7ae59 +10af2962663aabf4b56357038b430adb7b2d0986 +65961456d11269a4191a41b0f0a0f2d92fcb6907 +46e7cc2ba0bf218cb004f58ebc249e5e72b8c29f +b616399a316a7816941a498c09de81c3ecdf0f03 +878cbf5db93ed95a2ddac0927543addd0d6105f6 +6d48e2be404813f7d346516d519369ede95d7226 +c968e9268088153bcf51f3555b80f69e7f162db3 +405f21274bd606e89a0366cd8aa82e6dbaf8050b +2b9872f1248cda295127c4374dbe49850b81d95b +c94ea8bfd1a74b0d93a2a207a1234b0ef1f73d0a +b0f4c1bd78e59b33cd73b510dac2b45e3cd735ef +158c5fc595eb5ebdd337f44438d98d5581a87756 +a210729626a48d3c75bf2adef15d856d0a9e5918 +248a68920a184395f2fb66fe69f7a2b1276e0f95 +8ceae55b2d091350328e94bb7e3ad1b2048efd6d +26c43e258d65949742057d164454efac73bbb63b +cd76aa45608dd3370639d3ae4d2e774ea7c3e5a9 +b645b284600692840ecb34473db3394bc354472f +9caea797113b583b5ab74990ea22db63d14c2f99 +9e9394a307c29b74289f20464554131438b34216 +90519a813017e881d6d95e4df8952a393d1a7726 +7dc1bba5f4ccf529d19ed517880a10491df307a0 +653abdecd41eb6b1cc3315bd4a6e5819d1831df8 +a644add72093f735a99ce94b304e91703f250b94 +ca1385936bd95b3005b923bff4ff0077816e3d68 +e85217770edae3f88d5114fec35166bf7a80e4d4 +47bbe920c329ac749fa3dcfb10570fdeacc6fc3f +dc36563ebbfbccc065d91fa24fe84f9b0402ed68 +1274f8b5947b2a5f87801d40503ef5b8c883771e +694ec8ba0a9dc85fe62b5cb5041b71198936be89 +c2fa24208e4bf7d91592ed094f88713be35fa708 +1ee394eff8bbe8488411ecb68712b0a6f08280f5 +2849071dfbc3e18241f7a5243d4ca06e4418174e +c0880199e5c76be3640005137c2c383f0c84b57c +f0814435ad279f8e908c65049775a8676ce15f94 +8669e9660c67b2489c0e4308eefe20b8fb3d2cf7 +1c61acd00fa431d425fc79b0c90ecefafecb3ace +999e5dd9f1857d3f65650882fa2cf6d19ae3b9ee +04e19a432042f4044bf0d51e3657f890f10cfaca +91a2a3da8d7ab82cb4034056381a44c4848ff19e +1c0a7f35b6eef0226ed7af5ad8ac87ce07fff38e +3458f834c56fbafded76527b01578e5ef34b9b42 +80bb4f72ab3dc526c23a3dff758e7777cf1b3c09 +6c4bccb7aeb5aac67d498b8da720199c63e277c1 +9061d2012210b95c86401af9dacb0e63ac871657 +fbeb39fdb55d24b827bdd578cd6a471a0a1063b4 +748b0badc59cdacb0717ac7a55a490f7e0ab4d71 +70952d29c9b7db955fbabde8800a629665a0d24e +a576c35a831b1e889631b757ed86916341fc7202 +a5858355506446cb36f949d98bfa811e7d37e76b +a5f97dba2ae0b7949bea49a0e7068a1c6ac42ae5 +1c16e278550ac208d9aa1a65d0a9795f4132bd5e +5a2cf867e368a77f135a855cd1de59ba5fde99a1 +132d7c4172cc25eb59c7745e6d74cc4a4dd88dd6 +061dc0ca6fa3ee55aa7e688910169e4e6c74257b +e2ee40eb4145cb1450572a7837ef544802b99866 +b2b0c6a0f14f3b76df69046861fd04972ac9f3ee +dfc83bc2dc59d24775e3e8228beddf9e654167d1 +8ea9a921345fa2ea894bd9b953081f15713224cf +c6b2ae26499e736ac081af57a7b41c39a7b97fda +abac6c071e35ac30cfe3317089061124ac301495 +5e0f057dcb8b6c21806b379cd349d85598f5bf39 +b4623e58d28e1d790a508d26b754a752f70c288b +621f194999cbead9449bbd7222c6e8852c5043c0 +5d952c025f36694c06917bb1a5395fa13ccb84d1 +a0fffbb1fe7c929f520de855ce045b840272cca6 +e2e8fe4405767b62766efa00f95dc7b501e9eff3 +d24495608f98c48e6f3030d4af691b009d09cf41 +826c9569ab9e52eb031dd692baf84337eb217cd6 +470ec40f14d3d077afc6702a1c1c0bb4baaeec57 +281237526c3d4125250aa204bd6798e16cca4bc3 +2fe0fa9e25453b1797f4cf786c40eddd64483d3f +d231ee5ec82309024acd028a83ae876d9ffcce94 +71732a91bd25ea50aec127f95b7f8b8609db3da3 +793ec3ff30c242c570c9a9e8c95d78b05c7489ed +4b5cb898edd34436e4065c5d3de05c2ec7d95153 +23d0930474aee4957dac9571e06d40757b5535aa +992a3b15640c2613b5481fbe2cf022178e5f3ff8 +e8c0ce0985596758a82b71bdb6759c72af43d06d +846bb87419fc959197879e04dc9c15f3723555d7 +8c6907fb70ecd74ffba960283bf596155a7fb273 +1cc3cd345edba8eedbcf183afd6e746b5b29a422 +f3e48a4d193edb98933989cf54dfb46310ffdd9f +30a4a422254a4026dfb77d2660467994b18b1eb3 +5f5ea4d8846ad79c33bb149e6acb853ac78b4247 +c6d0ae6864fc9ac5307e23d283b1ca4b291b21e7 +2de834aa1cf63d1e6b7098c5528e4d021f131f00 +55e79f31165cd20502922ceda572d3b7db9cb41c +6ee19b994fea7c9447b05e9dab49350e2f8c1377 +de44cbbe8fd64ea13caaddab77560a48806c2180 +a2619695901d714b44c3941aad3689a40abcf363 +485385e26c8fae0a7efd34ee11ba645662074a13 +8035d023b91b92978788fcdcaa6062c38883f4ae +38a7dee7cd042726d64a95d3c5c3d341d656d68e +0a280b46be0fde5d87ed47fc7e970e3ed494cad6 +aafdf3a5bd71126c9ad07d93285966dc04d40c85 +93cb2f4e32053398d3602e0cbdaa12e8bca062ef +4ab039bae14499bc4f432f9f20a2509fe9310fb0 +29c411d098ce2a631503bc168ebb0ee6f65df497 +88ce7a6fa029b6e8f51c92f1666b02a404b827e6 +4f98a1294468c67a563b48d1ab6b4766a6d899ec +ef0e221c7edde75caf0d5bedc0d93745890a854f +94a1929f8845d841bb1f47667c1e489ab21bbc56 +4ab4b43b33178d53a5a445283d6be39ac57106de +5ae9d4c70fa9dd4cc70037f4920ef15d4fc63d2f +08af6504291f39451465dd1f1df6466e61c4595b +23be51cd2399b9825facbac2a88475450c5927b9 +750af4d960982c1655e9edc08470aada3c72a9a8 +47f51c462114ca3590df18c3c96da04a217b79a1 +e9e46f551280ffcc98b45c3c9b18085ed14f38ba +cb0ce99d40bb9d942aeaf08cbb83b075927a96d0 +62045a56ba933fab5e2bc61be05eb5bfb81a8527 +9112ffd4afcd36702f2e6ff7aafd653edf2557f6 +b1fadf90fed9739d72e98b56727e471e070b85b3 +75fba14591fb8de7567bd2378b5c5c114bad77b1 +7e81e309cfbc2385f04c4d377c4562efac6ca238 +0a38f333c2a4adc64c5f8d074508632418074755 +f4aa472201d2337eef2115dae23439e0a6dd9663 +d1adc7acf92d644b3e1a821668a05b024974c350 +699fc0441179a3cf82b303cbe25bd5a3be551ea0 +697a6cba6628b2f233f2a1cf317fe8127e4d05eb +397e0ec274130aed3bc1bdc461bad41c485f629c +02aba4aeba128defbfa587e3f07efdef724666b7 +5e31470b18e9dd499b9f8787056cf0e68d52e055 +9507c23d0741682f71cde608ae517c0c1ad2a4f4 +16c115ba95f7b71292bf5c00a1d425a8586c551c +42ef162afc768b86b881bd4c59beb8839149d76a +a7714cc78a381995633711d95465e883b613ddd9 +91519a0367b9eaa66b5ffe27964ca4c913093aed +e90d736f7f5e32ff845a898036c529518cce0c6b +82b0860745881e030c57a3d1bbfea46bc404bce2 +26c3e814a69b6335ab65aeb2c4a1e97015595206 +7990c00e8ae3117587f54a880d7d20d0578d4646 +260d91a2ece614587559ea3bf37f76e4d5a48beb +bc19bf14032da3bd5d3e6b86fdddd47f80152747 +1badd9612877ba84a92b025096fca1e0a36f07e6 +4f339c24142d0442f20301c1992d523946d1c6d3 +fc89b29738b18fe4c0ece96bf00f2cbe687e45db +ae3e7378f86cac99783c3de50d0c073e79a92759 +a1e6044b7e31b86d42b6dfb7ddaa1eb6bac2070a +948ccfbc7690989a96170839cc5d622e12e0b044 +9ec16c5ec0fd561efdf57572fe22e3e768ecfdc4 +c1849b79963362d71d09ff4cea2c46f9b3a03d89 +e82a29a4c2fcb1ecaed942c6fb550a14b916345a +b8412b898cf77763bdb3da689bb1bc9d10447116 +147a073799722bed54c3606c8833cdd58b1aa1dd +79f4216bdb44dc618f168d2a5061481350c9a38b +c4352c74759634af80f1f6acc69c55261dd12acb +b6a68adc1d771af97938d64d3c21ef4fcc99cfb8 +b5f8a569e73948a0930d18622740f52ab91c1a42 +fb784ea50559ccf087521510e7760473038cef2e +b7aadd7612b6a1970dedfdc175ec4780a8732703 +0146da64eabaab5d7f53e1ce58aeb9e74dea18f8 +7465d748cb38e50921b446953ef27b0c0fb6abe1 +4cc4e5eb162c622f786e1c98a9e00237f5687ee6 +a95f85c1672b2ff74f860a5980d83b440715deae +afb99fceb6c0532a769b61a81e8dfeb7cb70a86a +8a7b29224fce56d21e0b4d8b83cb42c32a4a2e29 +2a4c040c4c53d763d1263d8cf797e0b672c154fc +a81140222a3c6e2323cb290f353d595686473491 +642db49c7519de4227b0dca5b23144945bbf54ad +40df1682f34e463ba031f077e211a1e8eb1b7e0c +cb778e62e3d6b15a836e50d65a18a269a8a82577 +3aed3c2855ec616b87c4cf79a69298ba45c427a0 +f18f929339d5ab26ce8e26e716da4ad095474768 +8e03a13f5223f71d02f875b4fe4e48cdc1ea3738 +4443f53a766617ae7c30c48d8cb55d6fdc3ceb30 +f38b2e2284f48100513689571ce9d41cff63bd4c +a5182de1c12d3ff131a5dedc6130e02b43c3b267 +9054ed7b6b3eca10003d19098e5d3c51a8dc071e +2f0833d2eed57049454b3e0f41dc02eb7587bcb9 +a9883044091acb92e2edc709d5136af372d06ebf +1360b846750bd2c7e31cfe015c77c5968a9b541c +156253c33dee7a50df8e9e5e78adcef72705f3c9 +42580982bcfa232a30d39a26fbcd605cb041e092 +5ea3d3524c6c824950289855e33037576e741d30 +87c21fd886502206ada74a652082ec8dbb0fe7c7 +958b77776ac602ff78ddee5ecc758ee170cb5fda +0baa5e5fe65401be934349bd1f067b31a4a0f0d0 +0f18a06bd539d1de1e3abf38469e1d14030ed41b +cc06d014fb7fc7f5d0fae8c3576c134a281ca14c +cae072ed5710b9cea48c8cb0b011dc3a9cdceacf +cc7928b6593cb03aa125a5865684da7fc0405d74 +9f71dfac529fa72a12235f016cd481b02192c3bc +2f77ee6c43941f768b9771bca5a02332d89bf80b +cb6bd932f7a9e1c4845bd0d974f8983f2d5d6968 +a3b808778e2f90b6605b09b68cd7f3eb4659477c +7e7f7e823c05955c57123af1b61dabeaa5221825 +784321dfdb51fa207c790a3f670f0022fd575775 +69891ae41f6320ec437455913c5dd6d76a0241d0 +a243d306ec4c022a4199b6e160bdeba677415fc8 +1990eae2d51f66f9b5dd3b2d2beaa17f2b95599f +4d55ea7163e490d3f37218482269961898c62a87 +4d33676bdf0c6738fa3088d5e972ceeadd3730e9 +54aefea5f3c14105ae08f09aa60ba5f6917b1b88 +d0fb72f2df7282c349193c0ea47af281034a2c32 +284f81ec4d1297d3949ed95a114f4c10011abf40 +83fec7b4d265b21ae38e07c6e1046416b7758993 +f78bc049eba41b15d9e2ea28bff38e508b0e71bc +bb940fa349ab09d69edcb5f3a8fe96e55cdb69b8 +4dce40768f628700555244e91a69c5775d6caf6c +d176f57c12f30fae319ccd5b50b3096837767ed5 +1bd87d9bd116cca4f00aa031cab25897d35418bf +684d8d97b04fdbce1a08fffb59e1e280318cdfb7 +99b1b03fc906723790db2ebd04ecc51b8ed52052 +ce7ff18c9588042aaf62c8c71c69f769a16c4a7a +ee14d42f2b34f4bb5bf90d8c813934aa5d6b5e01 +d7f730f7658fb4af7d492e848fb759d031726e34 +caefbcb40174cb97c8361dfaa7899beb20202509 +fbc427e1bc2cf82ac3756c8c7de4249b52e56505 +a2e4de47a027a36757d181f61e2d3fa6dde7274a +85b17ae766f1da36b8ed0556a932d63bec08c785 +36ebbab9aeba7a8a04ceb800b2e445a85e4b2c0a +5826b9a1cce4a960cbf4516004b194c988312730 +a06ecf2bf25af0a6b32be1d6a82ba618d9ecbb33 +8f03971de78085457c1440e3ca545ae5cbb5230a +06588a8ab74f068ec61b89de9ca03a28f5ebd6f4 +72bd7e434c944937912039c7cf79c07bd40241f4 +14b5d1ee3b508505b96a3f403f1b6685e110c3f5 +6ce0eec1ba71291ba928d4a825e582c919a2457a +c25eaa87d7ff1d1fd503bfb7049a41bbf282e916 +0ca9829040ed3d37f3df6341e28becc8df839409 +4e516c3549d4aa6a057dadc9f9f6f9aeabfe35db +8a63d4ed82617bb5f3da2ab351138b4690c9e03b +0956df18b019953eac5eeaf6eca49674af37e52a +a8dedb9efe2e9bfe658503702a0602fbefcc3316 +c0787c279f755fe76464ca4fbc94e24add71e3ac +b11cae312129d1e47a4102f87ad8e1f0781d34c7 +248e4202dbe0d45e76e930b614578206b3dbc383 +a0dc9ceccd24357326241c97c07df17c93e77420 +5468413e75a18f8d7acb2d26c2b80bddfc9adb99 +7c81d09ca7a80c686ba8530986cb53e555eb60a9 +8df13df883dbd7e8944d8098b74ebf3aeb4b735f +e2ee2a92b8f493b2960c4e1ba2abf4f2a54c6758 +c470e5a7568645a10488f402443f3701f69403a4 +23e3897002ff686867b2372767d5d8f121cc9b4a +f1f670d0fe617fb374b15bcc20110b89b6082aa4 +b17028b6a57a1301be1bb2021cf51d6fe4bdd354 +d3c8813d44913745f4ec4253e048af17d4cb159f +b1b3a8940587229a063dc836cb0422065ac0d292 +ff7672f15b344e93c02d0d3b9676b8070a735e93 +331073170c761735eab3c9a516903016c2aad8dd +07605b39c50fbc320453c583ee749ae4f97126d3 +e710845e4cc7eb6a1d99073dfbf6f9278c24bfa8 +0cce9dd80952ff900e8704e6115f9c1bacae894a +2ffdb3f488210d4ebbe41759618bd8c6d15878bc +16d1cc466220c90c009bad3f09c2a085bdd47d5a +1a71e84c0c599408ec18a189dbd779d5e20d4e21 +51de15d048a6f3b0330e8da198b2d17260ce8c85 +36ac969d234f196366b404c9c714c3b8d30ddf6a +244d6af0cf929f993a2ed2de0ede4f57d501eade +a834e49430e3b3b1cd596dc1338a028e7166643e +2e489b53225f71cc5b73f9aaef5c692737c0f6bf +1d52cef8af071ed110d3ef8feb3e4b275dfddd01 +ea0e27967a6c62875355c5f423e4962835c5921a +02535aacbfed4c3ab00a0945d59933dab54f6fd1 +0cf6cbdeca67c729a260b7c1f5710b7a1e0aefa5 +ca42191efe091ea06d25dceef9ebd84df8ce75e4 +c5001aeae4fe17f8b7ccc1d6c604727ae63c35e7 +730b9668fda289b194a3b66a53fd3745ef42ca32 +3207c07bc7ace3a01ad233641f1df91ab37a505e +de657634cef20a388d43127a184619105d110a27 +dfe850a4b3c6c002dbee134a112f16f8e1b974c5 +1b0e7715e01a62130ac573c38834b09274a7a866 +99c6160a2f6e22b5040bb47a279f81b4224fb222 +ceadf8419c256716569dce2c60d98dd703bf2cb1 +d8fb13c8444f71e7f309d7ccdd7ef329a47a4df3 +75805d5f1f22bc6fcdff850c88a4fcce7dc3e17f +e6bb6b913b34b30af0e19a93bca4f55b39579f88 +c29d27094de54106cc903c2e0dfeb89cdcf9ae02 +41ae19f40a339b6b47fceee00f512d849df292be +04090c2dadbd1d446a8364df894344687131f841 +ba47d6e2e838b11290d702d1fc03261d27ba59d5 +ebd463e2b4b89a626e16b43071b06f3145cfb661 +78141478f00ac19912fa2b283e8c91e30eb3a7c6 +d8a4f3fba1b67bb6848489e45a92e9c1229ff7d2 +929b032a966f563e8401285e4d96850b17f640da +82a3799090db99bdf611599094170b85bd4eee4e +620fdb835eb7e095e9a34f8a165843f81fe50328 +66f89413b6f050fb903d58b36ec961461145af82 +fb824fa4ce932e860604ac21db4b555c6ad1114e +d8c925f283216521073497659088f4ba707311c9 +a28c2815223f89026b6a198415a1291cd67eca0a +8201b77f669191dd01caacbea1e3b5ffbab92962 +dbfd44e667bdeeb17295ab40d123ddae70d3daff +f324f1736d24f14c7685df0f2a2cc4bb20999fa4 +296f977687e8ce959a2e38129ce1c0d31e755d8d +e4888dafd50eaf43e1476701bd26bf940865d973 +17ca1470986faac5115d246d3f9b78244b7215eb +0904469f246fecf43062b2863bc81f730a96b20e +09b7e506802fe6fa4a12154e322dddbc34553f9c +b526d3ce8d4649e96446e1e8947b674001fe16a5 +46665024a071b4916afcae4b9ed3cec0aaeabc7b +36fee230f41e1fc89a26b1b7bc7e884862dbf56f +702806939cff2095b2ff97a08d84bc14d1dfc5ae +30ea107c7831a846dfe6828947249489468f3ef7 +8d51b266df630345c667bdbc07f172b906e627af +3691b1bffd90518b4017ccefd8c15ffaa8d87d6d +6c81ff344b4285b42f2733cadf42536addd736d2 +6c15f6261e0d7d09ca59071955ce30d09bbe97ae +aa3acca1a17c375731214851c56020878929a068 +c4cf20cc2e3665ba0b7d948683bfa1e82aa9b7e2 +9ef570f878a8c2d9460a99ca523b835535de67d5 +7d76f893313ec0b855d1dd6ce9b8fc9bc77723cb +e7ad68df97b2c9bcdf6e56cc017301f84a7f9b4a +8f5a3860948e5dc213ed825fb4715f0ffa013ce3 +27e795d99164a2372106c9e1f118cc19258e41a2 +fb438b2cad9b7583f4eda4fbe6fe9e9cd1f59f10 +6c6e755b03472223c69700bb166d81d9adf080b1 +8cfbb990201cb91fff3db779885041d2b5c52c1e +eef5130bd17ede5cacf8be5881eab0c09a538bda +f20d8a304a9009a79a54867664bce33473947272 +aef5dcb164dba680b436bbb37faeeccbbc4fe2b4 +b412608a7f30af28fb8615e4b522b7dcecabe212 +799415d8ea5094bd6cca8c178d6d8531827da191 +ef5ba9f7f4c954dc6208e9a47fdaa730602fa27c +6204780ab854a5443a52c343534637fc227dd70b +58733bf4d2489d1823a432b2f515f22fa835a88b +a8f30d02868c8ffc924271d9da99e0c180477a1c +c8baff658f6506e04d7f530d9b266ba2d4b632b7 +87a31d871a336bc60987492515a20ef25d18d0d4 +e2929ddb475b033444f85c3cf7e5ca38e84ed7e6 +c7fb0295ad6226798e65332c841f6a1508eb9efe +db80674c14610f0b964fb574ac32c6984cd226f5 +ebed652b9c7ae1784ab032b2023445e8b8cbaa41 +08ac1c4c2c7589f889b2bce3687a724d0c636c40 +dae2a0e1c908135eebc98a0db33ca435ebe7ad5a +3d415472346209c9e90706dfd313728a0ea15003 +df08670661d8887644542806a8d69046e3ba87ab +32a96ae444a08d6ae828b34539aec76a835a95e4 +4a0465f87d082b8e9a22608da161f232e8d6f464 +c7f47a4f22bcd6f11e6ee97e9687b5e917d9e495 +9b4dec196b29bcc98a377d6f433638a85177e0c9 +c0f1425ba0cdac23bc342587ce6ea6cb53515c55 +ca6f373a6c76d4a4284240fe5e88c130bd56d27c +785966c05fb5fe10addeca3a86f1857329957fb0 +fd71a64340425384294a115d3a42bc8069ac9f67 +08784cd3a744ca0750c746910124a6056d46f608 +ee9284abb97ecdc3ed78a4807303124652924051 +c9cfae108e2aaea3b12010dfd0404f7bbffa5c2a +d7a63c81f8bed7df99b942d88a380c100e74accd +23fc6eff1bbf238513e2f9c76e40762f01b1737d +485c7afff53fcb4f694a5b3cfdc09c372cf73e18 +8656b25529d3e5aabde19eb42d10aec5d8af2088 +fe954e108708531e155eadf4945fff5e432c57b3 +0f5fe6ee00187bde832acb092e42431f9fa8430c +8827ce43536f7246e21f643fdcc1b1ad44c05a12 +869e1a290cb6ce44eada26c00d5abee0e5c2ecd5 +a2215dc789a33e1ab3be1dfcc03e8f7f02d046d5 +62a233d2e55b159001ed622fb96b9444fce9c11d +26fff6559df5149b98c3366e7c01236daaf2b1d1 +115e024f021871b307a7a315aef720bbffe1d54c +19719df575d3ae0d8c93c037f7f1972b9e10f1ba +3628f33e8ef1350912bab8d4ae467c7e1f3056fd +e123e08e23278c95e399b3b11da411325135da21 +77b3598df08e6f3a2b4ae157904e30d5aa2ad49a +e071ff877d67787d0a6582ac3dcbcb627dec9ac4 +722a05a34115832ebdfa990a99bd999d097a0ce5 +3dcebec3361c047d19cf639879437ed5b769e7f2 +8f37fd4e1147e623fe6f8cc6d190c304467d999e +5339c690ad044e082f8a31bfe929099d7e75531d +82fd9658604cefb93728b198e73889872ce7d70a +804983224e3f5cccbd52b26bebc53b88369c448c +562f9fdd5811793c11970c856d21c7f0c32118b8 +5914ca61115649643f88ae110eaf3da4b112e6e4 +f44b0a2d303b725a7f5c82048d7423858e78e490 +de5e6430a7c1166ca82500ab7fb82cb95cc643c1 +c174eab1c3615c3ba5dbc0c6c30ac67ab6b47024 +aae0341ca8ab04c9c169f4dde3e2e943d758422a +517dc966b1379d84a9ef741ff9ca43e281868c60 +436fd9441cf9517b6e8b5162db78031228b18d9a +cd7e1db2eb4709309b43cc400d6619aff480484b +5c100f2e25d49a90b25685b9d3bb17a35e325374 +362892feaf8dbf44a0429d3676f9b5e4ea6a46a2 +ca8c5f96adddb61025107907704ec344143b0088 +8c97077f3dc6794837f887a8d57bc8d3c05e8b4b +04ba3c53b4068a8bcf31bbfc674d520ab2843a2d +c91c677ceb1093b393d46dd21252147c3ddecd1f +d85824d0d1dbc389c30ba584837d82e85c5bcd37 +c0f7e29dfb195770d68e6ee608c7129e72a89e23 +e55510a4c7ea27d0e47137479fcb16562f8d380f +8845f1c8a8b45987a6fe69bcd89060ba38475d2d +8ebf95f844971fbacb819e2e05fea4e27402a34c +5be6327602aabcb3fafaf439f69ebc621601d30a +698560f44a2c58c87988498dcbe51e30ea62c989 +29f215bf015e848c5af9a9c70e1e3e052016704f +6211582a40d5a1d67e930e337ea11f1b3538ef5e +80cb64b8ae5710be8044127b678bbc0e010e79a2 +7e613e66f3b7da299b8f4689cfa31da7bb381e31 +128131f6fd6e7bb018806ed5f150a207ae8c7e69 +f686c1c3a2fce19f177aafc281d6c724977a6dfe +56f58b9bd5e4a5c6aec7f2c5a4a04a702fc3f2dc +e63e6ef318e5cc205518f7fc052da7020742f55a +c19c8562df56700121a61f5cdbc8525a46197e1f +7ffa78b92966e11b0142829ae17c871b9f6b5c15 +426952f1145f112142141f26556313924ce7465a +f975e857f57f0f6d96ed50006a7b4e126edf1f1a +8a6220895e1d634d0aa0f41ce6882c98d7b495d0 +12da0f4b955b911a893158bd3beb9b24f1a0043e +ff8441521f15f11db3c60850a1ee551b81661fef +0b88599d7b1e25e59f2da8338520ec3325de9337 +0fda61a11326021d7ff0071b6bd8b2b3517100c8 +acdaf288f8a96f77e2c34104fadf26c04307f5fc +16d04e701ed59f32ea3c4226b553b6f0f50c7426 +7b759405d39047b5aa0f0c22d91c3d254fbaeba1 +facb5a7732d083c66484c9b3dbb274ff1d6a1ee1 +f959116e0606392633e8d8eaeb710664e4532c6c +febbd51aa5181f74d56f3d0e01d38e264444f825 +90ffbc94fcd43cdbd2e54f5cad75d2a7d659bdd8 +61cfcbd1b8ef945165acef5e7145762bb510453d +5477b6eb53ccc404db0ac820d3d858052bdebbe2 +4c6156e3830087141b0014005bf955f1a87e1edc +12dc55dc446574144eb863292c3565736ce0bfc3 +a761ce0dc6d89ad3170a3b69e3d2c71bfd014b8e +8cf55dd9b1bd7a4c8350c81e469d92ec956af62a +8671360c5d830f38316ccc4f63362ded7a2d20a6 +97f1a15d8196c514517e76f1d80571fa769e28b3 +85b2c0a31be506ef27e0ca124be5a89c628de120 +935dfa6867b975280d63f75cdef228372adc40ef +63367984bfa6dcb0ae560d7bab812c622481920c +ec10a4353082865abdb5697560390653c3902065 +b7974f532d25aa2eda5e16e5dc58d3f726373c03 +f804d65a6009874a0c4d555b6e9d8d14cbf935ef +cf251f22dbe2c976d340eaa8469e1ea21ff88a42 +6998dad2b81635da9c328ef829a8b1125393c38b +2a073e0f2e510318e83c16ad9141f5c1a31cf6a2 +eb6f4e52212ccb96b2aa08e0af5949dc8c67a024 +09b9f830b520e68635c45a604641874e0f2bfeb0 +17a33c4bc2856e52acf16f3f86dd7053e340ffc5 +81f4e9eee7d046992f4091cd2d82a6a82981b354 +5a6746c9041d494e8f794e4ecfb6a7c941f5ccce +5249fba450a5865325c2b47ce5fac5a585b2ca23 +e35df1cddab6e311e0b4f0b732c555c51e8a739d +8f95ac3d57280ec506907f000e60b9bcb065b4bf +2750ae3dac18bcf9eecdf9127e5aedaeac19a67e +dc4d88520f9221eea943cdc54bd89e21e52677ca +bdfc42f3dce77e9e964ba2922c19faba2ca563ee +c3b349b83e4fa2389ee59ea9ca036001b358ca02 +3c992e03d64ea763d4b6db96e3371143294172b8 +f40f581bb9a644dc31feeea1bdc3dd6bbc42ccca +d59c8256b9451b83457299244fa9f81d0369081f +b015c20c7868a98a3cee9878553502c708fd96a0 +b6e30268a7f110d767dac9144454d2c6fe49eb34 +dbfc2a5e7753d96913593c41db73a32dac062ff8 diff --git a/retired_benchmarks/never_adopted/language/gpt3/megatron/dataset.py b/retired_benchmarks/never_adopted/language/gpt3/megatron/dataset.py new file mode 100644 index 000000000..f8bf44388 --- /dev/null +++ b/retired_benchmarks/never_adopted/language/gpt3/megatron/dataset.py @@ -0,0 +1,167 @@ +import os +import sys + +sys.path.append(os.environ["MEGATRON_PATH"]) +from megatron.tokenizer import build_tokenizer +from megatron.utils import get_ltor_masks_and_position_ids +import torch +import argparse +import utils +import json + +from argparse import Namespace + + +PROMPT_DICT = { + "prompt_input": ( + "{instruction}{input}" + ) +} + + +class Dataset: + def __init__( + self, + dataset_path, + batch_size=1, + args=Namespace(), + gen_kwards={}, + total_count_override=None, + perf_count_override=None, + debug=False, + ): + print("Constructing QSL") + + self.dataset = "cnn_dailymail" + self.dataset_path = dataset_path + self.batch_size = batch_size + self.debug = debug + self.gen_kwards = gen_kwards + + ## TODO: provide arguments in command line + args.rank = 0 + args.tokenizer_type = "SentencePieceTokenizer" + args.vocab_extra_ids = 0 + if 'make_vocab_size_divisible_by' not in vars(args): + args.make_vocab_size_divisible_by = 128 + if 'tensor_model_parallel_size' not in vars(args): + args.tensor_model_parallel_size = 8 + if 'tokenizer_model' not in vars(args): + args.tokenizer_model = "./data/c4_en_301_5Mexp2_spm.model" + + + self.tokenizer = build_tokenizer(args) + + self.list_data_dict = utils.jload(self.dataset_path) + self.max_input_tokens = 2048 + + prompt_input = PROMPT_DICT["prompt_input"] + self.sources = [ + prompt_input.format_map(example) for example in self.list_data_dict + ] + self.targets = [f"{example['output']}" for example in self.list_data_dict] + + ( + self.source_encoded_input_ids, + # self.source_encoded_attn_masks, + self.source_encoded_input_id_lengths, + ) = self.encode_samples() + self.count = total_count_override or len(self.sources) + self.perf_count = perf_count_override or self.count + + def _build_attention_mask(self, tokens): + """Build the attention mask and postition ids for the input tokens.""" + + # Since we are not interested in loss-mask and reset attention/position + # is also False, eod_token is not used so it is safe to set it to None. + attention_mask, _, position_ids = get_ltor_masks_and_position_ids( + data=tokens, + eod_token=None, + reset_position_ids=False, + reset_attention_mask=False, + eod_mask_loss=False, + ) + return attention_mask + + def tokenize_prompts(self, prompts, tokens_to_generate, add_BOS): + """Given a set of prompts and number of tokens to generate: + - tokenize prompts + - set the sequence length to be the max of length of prompts + plus the number of tokens we would like to generate + - pad all the sequences to this length so we can convert them + into a 2D tensor. + """ + + # Tokenize all the prompts. + if add_BOS: + prompts_tokens = [ + [self.tokenizer.eod] + self.tokenizer.tokenize(prompt) + for prompt in prompts + ] + else: + prompts_tokens = [self.tokenizer.tokenize(prompt)[:self.max_input_tokens] for prompt in prompts] + + # Now we have a list of list of tokens which each list has a different + # size. We want to extend this list to: + # - incorporate the tokens that need to be generated + # - make all the sequences equal length. + # Get the prompts length. + prompts_length = [len(prompt_tokens) for prompt_tokens in prompts_tokens] + # Get the max prompts length. + max_prompt_len = max(prompts_length) + # Number of tokens in the each sample of the batch. + samples_length = max_prompt_len + tokens_to_generate + # Now update the list of list to be of the same size: samples_length. + for prompt_tokens, prompt_length in zip(prompts_tokens, prompts_length): + padding_size = samples_length - prompt_length + prompt_tokens.extend([self.tokenizer.pad] * padding_size) + + # Now we are in a structured format, we can convert to tensors. + # prompts_tokens_tensor = torch.LongTensor(prompts_tokens) + # prompts_length_tensor = torch.LongTensor(prompts_length) + + return prompts_tokens, prompts_length + + def encode_samples(self): + print("Encoding Samples") + + total_samples = len(self.sources) + + source_encoded_input_ids = [] + source_encoded_input_id_lengths = [] + # source_encoded_attn_masks = [] + + for i in range(total_samples): + if i % 100 == 0 and self.debug: + print("Sentence:") + print(self.sources[i]) + print( + "--------------------------------------------------------------------------------" + ) + tokens, length = self.tokenize_prompts( + [self.sources[i]], self.gen_kwards.get("max_new_tokens", 128), None + ) + # attn_mask = self._build_attention_mask(tokens) + source_encoded_input_ids.append(tokens) + # source_encoded_attn_masks.append(attn_mask) + source_encoded_input_id_lengths.append(length) + if i % 100 == 0 and self.debug: + print(f"Tokens: {tokens}") + print(f"Original length: {length}") + # input("...") + + return ( + source_encoded_input_ids, + # source_encoded_attn_masks, + source_encoded_input_id_lengths, + ) + + def LoadSamplesToRam(self, sample_list): + pass + + def UnloadSamplesFromRam(self, sample_list): + pass + + def __del__(self): + print("Finished destroying QSL.") + diff --git a/retired_benchmarks/never_adopted/language/gpt3/megatron/download_cnndm.py b/retired_benchmarks/never_adopted/language/gpt3/megatron/download_cnndm.py new file mode 100644 index 000000000..ec47912ea --- /dev/null +++ b/retired_benchmarks/never_adopted/language/gpt3/megatron/download_cnndm.py @@ -0,0 +1,57 @@ +# experiment config +dataset_id = "cnn_dailymail" +dataset_config = "3.0.0" +text_column = "article" +summary_column = "highlights" + +from datasets import load_dataset + +import numpy as np +import os +import simplejson as json +import sys + +save_dataset_path = os.environ.get("DATASET_CNNDM_PATH", "data") + +# Check whether the specified path exists or not +isExist = os.path.exists(save_dataset_path) +if not isExist: + # Create a new directory because it does not exist + os.makedirs(save_dataset_path) + +# Load dataset from the hub +dataset = load_dataset(dataset_id, name=dataset_config) + +instruction_template = "summarize: " + + +def preprocess_function(sample, padding="max_length"): + # create list of samples + inputs = [] + + for i in range(0, len(sample[text_column])): + x = dict() + x["instruction"] = instruction_template + x["input"] = sample[text_column][i] + x["output"] = sample[summary_column][i] + inputs.append(x) + model_inputs = dict() + model_inputs["text"] = inputs + + return model_inputs + + +# process dataset +tokenized_dataset = dataset.map( + preprocess_function, batched=True, remove_columns=list(dataset["train"].features) +) + +# save dataset to disk + +with open(os.path.join(save_dataset_path, "cnn_eval.json"), "w") as write_f: + json.dump( + tokenized_dataset["validation"]["text"], write_f, indent=4, ensure_ascii=False + ) + + +print("Dataset saved in ", save_dataset_path) diff --git a/retired_benchmarks/never_adopted/language/gpt3/megatron/evaluation.py b/retired_benchmarks/never_adopted/language/gpt3/megatron/evaluation.py new file mode 100644 index 000000000..4e32289fc --- /dev/null +++ b/retired_benchmarks/never_adopted/language/gpt3/megatron/evaluation.py @@ -0,0 +1,76 @@ +from dataset import Dataset +import numpy as np +import json +import nltk + +import evaluate +import argparse +from argparse import Namespace + + +def get_args(): + """Parse commandline.""" + parser = argparse.ArgumentParser() + parser.add_argument("--mlperf-accuracy-file", required=True, help="path to mlperf_log_accuracy.json") + parser.add_argument("--dataset-file", required=True, help="path to cnn_eval.json") + parser.add_argument( + "--tokenizer-model", + default="./data/c4_en_301_5Mexp2_spm.model", + help="Path to tokenizer model", + ) + parser.add_argument("--verbose", action="store_true", help="verbose messages") + args = parser.parse_args() + return args + +def postprocess_text(preds, targets): + preds = [pred.strip() for pred in preds] + targets = [target.strip() for target in targets] + + # rougeLSum expects newline after each sentence + preds = ["\n".join(nltk.sent_tokenize(pred)) for pred in preds] + targets = ["\n".join(nltk.sent_tokenize(target)) for target in targets] + + return preds, targets + + +def main(): + args = get_args() + dataset_path = args.dataset_file + metric = evaluate.load("rouge") + nltk.download('punkt') + + dataset_args = Namespace(tokenizer_model = args.tokenizer_model) + data_object = Dataset(dataset_path, args=dataset_args) + + targets = data_object.targets + + with open(args.mlperf_accuracy_file, "r") as f: + results = json.load(f) + + + target_required = [] + preds_token_ids = [] + + for pred in results: + qsl_idx = pred['qsl_idx'] + target = targets[qsl_idx] + target_required.append(target) + preds = np.frombuffer(bytes.fromhex(pred['data']), np.int64).tolist() + preds = [int(p) for p in preds] + preds_token_ids.append(preds) + + + preds_decoded_text = [data_object.tokenizer.detokenize(ids) for ids in preds_token_ids] + preds, targets = postprocess_text(preds_decoded_text, target_required) + + + result = metric.compute(predictions=preds, references=targets, use_stemmer=True,use_aggregator=False) + result = {k: round(np.mean(v) * 100, 4) for k, v in result.items()} + prediction_lens = [len(pred) for pred in preds] + result["gen_len"] = np.sum(prediction_lens) + result["gen_num"] = len(preds) + print("\nResults\n") + print(result) + +if __name__ == "__main__": + main() diff --git a/retired_benchmarks/never_adopted/language/gpt3/megatron/main.py b/retired_benchmarks/never_adopted/language/gpt3/megatron/main.py new file mode 100644 index 000000000..183278e52 --- /dev/null +++ b/retired_benchmarks/never_adopted/language/gpt3/megatron/main.py @@ -0,0 +1,112 @@ +import subprocess +import mlperf_loadgen as lg +import argparse +import os +import sys + +import sys +from backend import get_SUT + +sys.path.insert(0, os.getcwd()) + + +def get_args(): + parser = argparse.ArgumentParser() + parser.add_argument( + "--scenario", + choices=["SingleStream", "Offline", "Server", "MultiStream"], + default="Offline", + help="Scenario", + ) + parser.add_argument("--dataset-path", default="./data/cnn_eval.json", help="") + parser.add_argument("--accuracy", action="store_true", help="enable accuracy pass") + parser.add_argument( + "--mlperf_conf", default="mlperf.conf", help="mlperf rules config" + ) + parser.add_argument( + "--user_conf", + default="user.conf", + help="user config for user LoadGen settings such as target QPS", + ) + parser.add_argument( + "--max_examples", + type=int, + default=13368, + help="Maximum number of examples to consider (not limited by default)", + ) + parser.add_argument( + "--make-vocab-size-divisible-by", + type=int, + default=128, + help="Make the vocab size divisible by", + ) + parser.add_argument( + "--tensor-model-parallel-size", + type=int, + default=8, + help="Degree of tensor model parallelism.", + ) + parser.add_argument( + "--tokenizer-model", + default="./data/c4_en_301_5Mexp2_spm.model", + help="Path to tokenizer model", + ) + args = parser.parse_args() + return args + + +scenario_map = { + "SingleStream": lg.TestScenario.SingleStream, + "Offline": lg.TestScenario.Offline, + "Server": lg.TestScenario.Server, + "MultiStream": lg.TestScenario.MultiStream, +} + + +def main(): + args = get_args() + os.environ["RANK"] = "0" + os.environ["MASTER_ADDR"] = "localhost" + os.environ["MASTER_PORT"] = "29500" + + sut = get_SUT( + scenario=args.scenario, + dataset_path=args.dataset_path, + max_examples=args.max_examples, + args=args, + ) + + settings = lg.TestSettings() + settings.scenario = scenario_map[args.scenario] + # Need to update the conf + settings.FromConfig(args.mlperf_conf, "gpt3", args.scenario) + settings.FromConfig(args.user_conf, "gpt3", args.scenario) + + if args.accuracy: + settings.mode = lg.TestMode.AccuracyOnly + else: + settings.mode = lg.TestMode.PerformanceOnly + log_path = os.environ.get("LOG_PATH") + if not log_path: + log_path = "build/logs" + if not os.path.exists(log_path): + os.makedirs(log_path) + log_output_settings = lg.LogOutputSettings() + log_output_settings.outdir = log_path + log_output_settings.copy_summary_to_stdout = True + log_settings = lg.LogSettings() + log_settings.log_output = log_output_settings + log_settings.enable_trace = True + + lg.StartTestWithLogSettings(sut.sut, sut.qsl, settings, log_settings) + print("Test Done!") + + print("Destroying SUT...") + lg.DestroySUT(sut.sut) + + print("Destroying QSL...") + lg.DestroyQSL(sut.qsl) + + +if __name__ == "__main__": + main() diff --git a/retired_benchmarks/never_adopted/language/gpt3/megatron/mlperf.conf b/retired_benchmarks/never_adopted/language/gpt3/megatron/mlperf.conf new file mode 100644 index 000000000..c31c0d7e2 --- /dev/null +++ b/retired_benchmarks/never_adopted/language/gpt3/megatron/mlperf.conf @@ -0,0 +1,68 @@ +# The format of this config file is 'key = value'. +# The key has the format 'model.scenario.key'. Value is mostly int64_t. +# Model maybe '*' as wildcard. In that case the value applies to all models. +# All times are in milli seconds + +# Set performance_sample_count for each model. +# User can optionally set this to higher values in user.conf. +resnet50.*.performance_sample_count_override = 1024 +ssd-mobilenet.*.performance_sample_count_override = 256 +retinanet.*.performance_sample_count_override = 64 +bert.*.performance_sample_count_override = 10833 +dlrm.*.performance_sample_count_override = 204800 +rnnt.*.performance_sample_count_override = 2513 +# set to 0 to let entire sample set to be performance sample +3d-unet.*.performance_sample_count_override = 0 + +# Set seeds. The seeds will be distributed two weeks before the submission. +# 0x8ad40a335b00b614 +*.*.qsl_rng_seed = 10003631887983097364 +# 0xee765861048ca2ba +*.*.sample_index_rng_seed = 17183018601990103738 +# 0xa867c8b40e66b636 +*.*.schedule_rng_seed = 12134888396634371638 +# Set seeds for TEST_05. The seeds will be distributed two weeks before the submission. +#0xcb41409ba0dd1540 +*.*.test05_qsl_rng_seed = 14646058500348515648 +#0x10c102904c81c5a2 +*.*.test05_sample_index_rng_seed = 1207248993894122914 +#0xa4db284011bcd876 +*.*.test05_schedule_rng_seed = 11879132697760422006 + + +*.SingleStream.target_latency_percentile = 90 +*.SingleStream.min_duration = 600000 +*.SingleStream.min_query_count = 1024 + +*.MultiStream.target_latency_percentile = 99 +*.MultiStream.samples_per_query = 8 +*.MultiStream.min_duration = 600000 +*.MultiStream.min_query_count = 270336 +retinanet.MultiStream.target_latency = 528 + +# 3D-UNet uses equal issue mode +3d-unet.*.sample_concatenate_permutation = 1 + +*.Server.target_latency = 10 +*.Server.target_latency_percentile = 99 +*.Server.target_duration = 0 +*.Server.min_duration = 600000 +*.Server.min_query_count = 270336 +resnet50.Server.target_latency = 15 +retinanet.Server.target_latency = 100 +bert.Server.target_latency = 130 +dlrm.Server.target_latency = 30 +rnnt.Server.target_latency = 1000 + +*.Offline.target_latency_percentile = 90 +*.Offline.min_duration = 600000 +# In Offline scenario, we always have one query. But LoadGen maps this to +# min_sample_count internally in Offline scenario, so set this to 24576 since +# the rule requires that Offline scenario run for at least 24576 samples. +*.Offline.min_query_count = 24576 + +# These fields should be defined and overridden by user.conf. +*.SingleStream.target_latency = 10 +*.MultiStream.target_latency = 80 +*.Server.target_qps = 1.0 +*.Offline.target_qps = 1.0 diff --git a/retired_benchmarks/never_adopted/language/gpt3/megatron/prepare-calibration.py b/retired_benchmarks/never_adopted/language/gpt3/megatron/prepare-calibration.py new file mode 100644 index 000000000..846e492d3 --- /dev/null +++ b/retired_benchmarks/never_adopted/language/gpt3/megatron/prepare-calibration.py @@ -0,0 +1,59 @@ +import os +import sys +import json +from argparse import ArgumentParser +from datasets import load_dataset + +def get_args(): + parser = ArgumentParser() + parser.add_argument("--calibration-list-file", required=True, help="Path to calibration list") + parser.add_argument("--output-dir", help="Output directory", default="calibration-data") + + return parser.parse_args() + +dataset_id='cnn_dailymail' +version='3.0.0' +split='train' + +instruction_template="Summarize the following news article:" + +def check_path(path): + return os.path.exists(path) + +def prepare_calibration_data(calibration_list_file, output_dir): + if not check_path(calibration_list_file): + print("Calibration list file not found: {}".format(calibration_list_file)) + sys.exit(1) + + dataset = load_dataset("cnn_dailymail", name="3.0.0", split='train') + train = dict((x['id'], x) for x in dataset) + + + with open(calibration_list_file, 'r') as fid: + calibration_ids = fid.read().splitlines() + + inputs = [] + for id in calibration_ids: + calibration_sample = train[id] + x = dict() + x["instruction"] = instruction_template + x["input"] = calibration_sample["article"] + x["output"] = calibration_sample["highlights"] + inputs.append(x) + + if not os.path.isdir(output_dir): + os.makedirs(output_dir) + + output_path = os.path.join(output_dir,"cnn_dailymail_calibration.json") + with open(output_path, 'w') as write_f: + json.dump(inputs, write_f, indent=4, ensure_ascii=False) + + print("Calibration data saved at {}".format(output_path)) + +def main(): + + args = get_args() + prepare_calibration_data(args.calibration_list_file, args.output_dir) + +if __name__=="__main__": + main() diff --git a/retired_benchmarks/never_adopted/language/gpt3/megatron/requirements.txt b/retired_benchmarks/never_adopted/language/gpt3/megatron/requirements.txt new file mode 100644 index 000000000..660aac27e --- /dev/null +++ b/retired_benchmarks/never_adopted/language/gpt3/megatron/requirements.txt @@ -0,0 +1,7 @@ +transformers +datasets +evaluate +accelerate +simplejson +nltk +rouge_score \ No newline at end of file diff --git a/retired_benchmarks/never_adopted/language/gpt3/megatron/run_generation_server.sh b/retired_benchmarks/never_adopted/language/gpt3/megatron/run_generation_server.sh new file mode 100644 index 000000000..441153729 --- /dev/null +++ b/retired_benchmarks/never_adopted/language/gpt3/megatron/run_generation_server.sh @@ -0,0 +1,32 @@ +#!/bin/bash +# This example will start serving the 175B model. +DISTRIBUTED_ARGS="--nproc_per_node 8 \ + --nnodes 1 \ + --node_rank 0 \ + --master_addr localhost \ + --master_port 6000" + +CHECKPOINT=$HOME/inference/language/gpt3/megatron/model/ +TOKENIZER_MODEL_FILE=$HOME/inference/language/gpt3/megatron/data/c4_en_301_5Mexp2_spm.model + +export CUDA_DEVICE_MAX_CONNECTIONS=1 + +pip install flask-restful + +torchrun $DISTRIBUTED_ARGS text_generation_server.py \ + --tensor-model-parallel-size 8 \ + --pipeline-model-parallel-size 1 \ + --num-layers 96 \ + --hidden-size 12288 \ + --num-attention-heads 96 \ + --max-position-embeddings 2048 \ + --tokenizer-type SentencePieceTokenizer \ + --micro-batch-size 1 \ + --seq-length 2048 \ + --tokenizer-model $TOKENIZER_MODEL_FILE \ + --seed 42 \ + --use-ext-ckpt \ + --no-load-rng \ + --fp16 \ + --use-beam-search \ + --load ${CHECKPOINT} \ No newline at end of file diff --git a/retired_benchmarks/never_adopted/language/gpt3/megatron/run_generation_server_debug.sh b/retired_benchmarks/never_adopted/language/gpt3/megatron/run_generation_server_debug.sh new file mode 100644 index 000000000..b89166234 --- /dev/null +++ b/retired_benchmarks/never_adopted/language/gpt3/megatron/run_generation_server_debug.sh @@ -0,0 +1,26 @@ +#!/bin/bash +# This example will start serving the 345M model. +DISTRIBUTED_ARGS="--nproc_per_node 1 \ + --nnodes 1 \ + --node_rank 0 \ + --master_addr localhost \ + --master_port 6000" + +TOKENIZER_MODEL_FILE=$HOME/inference/language/gpt3/megatron/data/c4_en_301_5Mexp2_spm.model + +export CUDA_DEVICE_MAX_CONNECTIONS=1 + +pip install flask-restful + +torchrun $DISTRIBUTED_ARGS text_generation_server.py \ + --tensor-model-parallel-size 1 \ + --pipeline-model-parallel-size 1 \ + --num-layers 24 \ + --hidden-size 1024 \ + --num-attention-heads 16 \ + --max-position-embeddings 4096 \ + --tokenizer-type SentencePieceTokenizer \ + --micro-batch-size 1 \ + --seq-length 1024 \ + --tokenizer-model $TOKENIZER_MODEL_FILE \ + --seed 42 \ No newline at end of file diff --git a/retired_benchmarks/never_adopted/language/gpt3/megatron/text_generation_server.py b/retired_benchmarks/never_adopted/language/gpt3/megatron/text_generation_server.py new file mode 100644 index 000000000..963938435 --- /dev/null +++ b/retired_benchmarks/never_adopted/language/gpt3/megatron/text_generation_server.py @@ -0,0 +1,270 @@ +import os +import sys + +sys.path.append(os.environ["MEGATRON_PATH"]) + +import datetime +import torch +import json +import threading +from flask import Flask, request, jsonify, current_app +from flask_restful import Resource, Api +from megatron import get_args +from megatron.text_generation.generation import ( + generate_tokens_probs_and_return_on_first_stage, + beam_search_and_return_on_first_stage +) + + +from megatron import get_args +from megatron import print_rank_0 +from megatron import mpu +from megatron.checkpointing import load_checkpoint +from megatron.initialize import initialize_megatron +from megatron.model import GPTModel +from megatron.training import get_model +import torch + + +GENERATE_NUM = 0 +BEAM_NUM = 1 +lock = threading.Lock() + + +class MegatronGenerate(Resource): + def __init__(self, model, gen_kwargs, log=None): + self.model = model + self.log = log + self.gen_kwargs = gen_kwargs + self.use_beam_search = self.gen_kwargs.get("use_beam_search", None) + + @staticmethod + def send_do_generate(): + choice = torch.cuda.LongTensor([GENERATE_NUM]) + torch.distributed.broadcast(choice, 0) + + @staticmethod + def send_do_beam_search(): + choice = torch.cuda.LongTensor([BEAM_NUM]) + torch.distributed.broadcast(choice, 0) + + @staticmethod + def sync_input(input_ids, input_length): + input_length_tensor = torch.cuda.LongTensor(input_length) + torch.distributed.broadcast(input_length_tensor, 0) + input_ids_tensor = torch.cuda.LongTensor(input_ids) + torch.distributed.broadcast(input_ids_tensor, 0) + return input_ids_tensor, input_length_tensor + + def put(self): + args = get_args() + if not "input_ids" in request.get_json(): + return "input_ids argument required", 400 + + if not "input_length" in request.get_json(): + return "input_length is required", 400 + + input_ids = request.get_json()["input_ids"] + input_length = request.get_json()["input_length"] + + with lock: # Need to get lock to keep multiple threads from hitting code + + if self.log: + print("request IP: " + str(request.remote_addr)) + print("start time: ", datetime.datetime.now()) + + try: + if self.use_beam_search: + try: + MegatronGenerate.send_do_beam_search() # Tell other ranks we're doing beam_search + input_ids_tensor, input_length_tensor = MegatronGenerate.sync_input( + input_ids, input_length + ) + ( + output_tokens, + _, + ) = beam_search_and_return_on_first_stage( + self.model, + input_ids_tensor, + input_length_tensor, + beam_size=self.gen_kwargs.get("beam_size", 4), + stop_token = self.gen_kwargs.get("beam_stop_token", 1), + num_return_gen = self.gen_kwargs.get("beam_num_return_gen", 1), + length_penalty = self.gen_kwargs.get("beam_length_penalty", 1), + min_length = self.gen_kwargs.get("min_new_tokens", 30), + ) + output_batch_truncated = [] + for data, source_len in zip(output_tokens, input_length_tensor): + output_batch_truncated.append( + data[source_len:].cpu().numpy().tolist() + ) + if self.log: + print("end time: ", datetime.datetime.now()) + return jsonify({"output": output_batch_truncated}) + except Exception as e: + print(str(e)) + print("ERROR") + return jsonify({"output": [[]], "is_error": True}) + else: + try: + MegatronGenerate.send_do_generate() # Tell other ranks we're doing generate + input_ids_tensor, input_length_tensor = MegatronGenerate.sync_input( + input_ids, input_length + ) + ( + output_tokens, + _, + _, + ) = generate_tokens_probs_and_return_on_first_stage( + self.model, + input_ids_tensor, + input_length_tensor, + top_k=self.gen_kwargs.get("top_k", 4), + temperature=self.gen_kwargs.get("temperature", 0.0), + min_length = gen_kwargs.get("min_new_tokens", 30), + ) + output_batch_truncated = [] + for data, source_len in zip(output_tokens, input_length_tensor): + output_batch_truncated.append( + data[source_len:].cpu().numpy().tolist() + ) + if self.log: + print("end time: ", datetime.datetime.now()) + return jsonify({"output": output_batch_truncated}) + except Exception as e: + print(str(e)) + print("ERROR") + return jsonify({"output": [[]], "is_error": True}) + + except ValueError as ve: + return ve.args[0] + + +class MegatronServer(object): + def __init__(self, model, gen_kwargs): + self.app = Flask(__name__, static_url_path="") + api = Api(self.app) + api.add_resource( + MegatronGenerate, "/api", resource_class_args=[model, gen_kwargs] + ) + + def run(self, url): + self.app.run(url, threaded=True, debug=False) + + +def model_provider(pre_process=True, post_process=True): + """Build the model.""" + + print_rank_0("building GPT model ...") + model = GPTModel( + num_tokentypes=0, + parallel_output=False, + pre_process=pre_process, + post_process=post_process, + ) + + return model + + +def add_text_generate_args(parser): + group = parser.add_argument_group(title='text generation') + group.add_argument("--use-beam-search", action = "store_true") + return parser + + +if __name__ == "__main__": + initialize_megatron( + extra_args_provider=add_text_generate_args, + args_defaults={ + "tokenizer_type": "SentencePieceTokenizer", + "no_load_rng": True, + "no_load_optim": True, + } + ) + + args = get_args() + gen_kwargs = { + "early_stopping": True, + "max_new_tokens": 128, + "min_new_tokens": 30, + "top_k": 40, + "temperature": 0.5, + "use_beam_search": args.use_beam_search, + "beam_size": 4, + "beam_stop_token": 1, + "beam_num_return_gen": 1, + "beam_length_penalty": 1 + } + if args.num_layers_per_virtual_pipeline_stage is not None: + print("Interleaved pipeline schedule is not yet supported for text generation.") + exit() + # Set up model and load checkpoint + model = get_model(model_provider, wrap_with_ddp=False) + + if args.load is not None: + _ = load_checkpoint(model, None, None) + + assert len(model) == 1, "Above condition should have caught this" + model = model[0] + if mpu.is_pipeline_first_stage() and mpu.get_tensor_model_parallel_rank() == 0: + server = MegatronServer(model, gen_kwargs) + server.run("127.0.0.1") + + while True: + choice = torch.cuda.LongTensor(1) + input_length_tensor = torch.cuda.LongTensor(1) + torch.distributed.broadcast(choice, 0) + if choice[0].item() == 0: + # Greedy or top-k + try: + torch.distributed.broadcast(input_length_tensor, 0) + input_ids_tensor = torch.cuda.LongTensor( + [ + [ + 0 + for _ in range( + input_length_tensor[0].item() + + gen_kwargs.get("max_new_tokens") + ) + ] + ] + ) + torch.distributed.broadcast(input_ids_tensor, 0) + generate_tokens_probs_and_return_on_first_stage( + model, + input_ids_tensor, + input_length_tensor, + top_k=gen_kwargs.get("top_k", 4), + temperature=gen_kwargs.get("temperature", 1.0), + min_length = gen_kwargs.get("min_new_tokens", 30), + ) + except ValueError as ve: + pass + elif choice[0].item() == 1: + # Beam search + try: + torch.distributed.broadcast(input_length_tensor, 0) + input_ids_tensor = torch.cuda.LongTensor( + [ + [ + 0 + for _ in range( + input_length_tensor[0].item() + + gen_kwargs.get("max_new_tokens") + ) + ] + ] + ) + torch.distributed.broadcast(input_ids_tensor, 0) + beam_search_and_return_on_first_stage( + model, + input_ids_tensor, + input_length_tensor, + beam_size=gen_kwargs.get("beam_size", 4), + stop_token = gen_kwargs.get("beam_stop_token", 1), + num_return_gen = gen_kwargs.get("beam_num_return_gen", 1), + length_penalty = gen_kwargs.get("beam_length_penalty", 1), + min_length = gen_kwargs.get("min_new_tokens", 30), + ) + except ValueError as ve: + pass diff --git a/retired_benchmarks/never_adopted/language/gpt3/megatron/user.conf b/retired_benchmarks/never_adopted/language/gpt3/megatron/user.conf new file mode 100644 index 000000000..07a10bbe2 --- /dev/null +++ b/retired_benchmarks/never_adopted/language/gpt3/megatron/user.conf @@ -0,0 +1,4 @@ +# The format of this config file is 'key = value'. +# The key has the format 'model.scenario.key'. Value is mostly int64_t. +# Model maybe '*' as wildcard. In that case the value applies to all models. +# All times are in milli seconds diff --git a/retired_benchmarks/never_adopted/language/gpt3/megatron/utils.py b/retired_benchmarks/never_adopted/language/gpt3/megatron/utils.py new file mode 100644 index 000000000..0fb2fbcd3 --- /dev/null +++ b/retired_benchmarks/never_adopted/language/gpt3/megatron/utils.py @@ -0,0 +1,15 @@ +import json +import os +import io + +def _make_r_io_base(f, mode: str): + if not isinstance(f, io.IOBase): + f = open(f, mode=mode, encoding='utf-8') + return f + +def jload(f, mode="r"): + """Load a .json file into a dictionary.""" + f = _make_r_io_base(f, mode) + jdict = json.load(f) + f.close() + return jdict diff --git a/recommendation/dlrm/pytorch/.dockerignore b/retired_benchmarks/recommendation/dlrm/pytorch/.dockerignore similarity index 100% rename from recommendation/dlrm/pytorch/.dockerignore rename to retired_benchmarks/recommendation/dlrm/pytorch/.dockerignore diff --git a/recommendation/dlrm/pytorch/.gitignore b/retired_benchmarks/recommendation/dlrm/pytorch/.gitignore similarity index 100% rename from recommendation/dlrm/pytorch/.gitignore rename to retired_benchmarks/recommendation/dlrm/pytorch/.gitignore diff --git a/recommendation/dlrm/pytorch/README.md b/retired_benchmarks/recommendation/dlrm/pytorch/README.md similarity index 100% rename from recommendation/dlrm/pytorch/README.md rename to retired_benchmarks/recommendation/dlrm/pytorch/README.md diff --git a/recommendation/dlrm/pytorch/VERSION_NUMBER b/retired_benchmarks/recommendation/dlrm/pytorch/VERSION_NUMBER similarity index 100% rename from recommendation/dlrm/pytorch/VERSION_NUMBER rename to retired_benchmarks/recommendation/dlrm/pytorch/VERSION_NUMBER diff --git a/recommendation/dlrm/pytorch/docker_cpu/Dockerfile b/retired_benchmarks/recommendation/dlrm/pytorch/docker_cpu/Dockerfile similarity index 100% rename from recommendation/dlrm/pytorch/docker_cpu/Dockerfile rename to retired_benchmarks/recommendation/dlrm/pytorch/docker_cpu/Dockerfile diff --git a/recommendation/dlrm/pytorch/docker_cpu/build_docker_cpu.sh b/retired_benchmarks/recommendation/dlrm/pytorch/docker_cpu/build_docker_cpu.sh similarity index 100% rename from recommendation/dlrm/pytorch/docker_cpu/build_docker_cpu.sh rename to retired_benchmarks/recommendation/dlrm/pytorch/docker_cpu/build_docker_cpu.sh diff --git a/recommendation/dlrm/pytorch/docker_cpu/run_docker_cpu.sh b/retired_benchmarks/recommendation/dlrm/pytorch/docker_cpu/run_docker_cpu.sh similarity index 100% rename from recommendation/dlrm/pytorch/docker_cpu/run_docker_cpu.sh rename to retired_benchmarks/recommendation/dlrm/pytorch/docker_cpu/run_docker_cpu.sh diff --git a/recommendation/dlrm/pytorch/docker_gpu/Dockerfile b/retired_benchmarks/recommendation/dlrm/pytorch/docker_gpu/Dockerfile similarity index 100% rename from recommendation/dlrm/pytorch/docker_gpu/Dockerfile rename to retired_benchmarks/recommendation/dlrm/pytorch/docker_gpu/Dockerfile diff --git a/recommendation/dlrm/pytorch/docker_gpu/build_docker_gpu.sh b/retired_benchmarks/recommendation/dlrm/pytorch/docker_gpu/build_docker_gpu.sh similarity index 100% rename from recommendation/dlrm/pytorch/docker_gpu/build_docker_gpu.sh rename to retired_benchmarks/recommendation/dlrm/pytorch/docker_gpu/build_docker_gpu.sh diff --git a/recommendation/dlrm/pytorch/docker_gpu/run_docker_gpu.sh b/retired_benchmarks/recommendation/dlrm/pytorch/docker_gpu/run_docker_gpu.sh similarity index 100% rename from recommendation/dlrm/pytorch/docker_gpu/run_docker_gpu.sh rename to retired_benchmarks/recommendation/dlrm/pytorch/docker_gpu/run_docker_gpu.sh diff --git a/recommendation/dlrm/pytorch/python/__init__.py b/retired_benchmarks/recommendation/dlrm/pytorch/python/__init__.py old mode 100755 new mode 100644 similarity index 100% rename from recommendation/dlrm/pytorch/python/__init__.py rename to retired_benchmarks/recommendation/dlrm/pytorch/python/__init__.py diff --git a/recommendation/dlrm/pytorch/python/backend.py b/retired_benchmarks/recommendation/dlrm/pytorch/python/backend.py similarity index 100% rename from recommendation/dlrm/pytorch/python/backend.py rename to retired_benchmarks/recommendation/dlrm/pytorch/python/backend.py diff --git a/recommendation/dlrm/pytorch/python/backend_onnxruntime.py b/retired_benchmarks/recommendation/dlrm/pytorch/python/backend_onnxruntime.py similarity index 100% rename from recommendation/dlrm/pytorch/python/backend_onnxruntime.py rename to retired_benchmarks/recommendation/dlrm/pytorch/python/backend_onnxruntime.py diff --git a/recommendation/dlrm/pytorch/python/backend_pytorch_native.py b/retired_benchmarks/recommendation/dlrm/pytorch/python/backend_pytorch_native.py similarity index 100% rename from recommendation/dlrm/pytorch/python/backend_pytorch_native.py rename to retired_benchmarks/recommendation/dlrm/pytorch/python/backend_pytorch_native.py diff --git a/recommendation/dlrm/pytorch/python/backend_tf.py b/retired_benchmarks/recommendation/dlrm/pytorch/python/backend_tf.py similarity index 100% rename from recommendation/dlrm/pytorch/python/backend_tf.py rename to retired_benchmarks/recommendation/dlrm/pytorch/python/backend_tf.py diff --git a/recommendation/dlrm/pytorch/python/criteo.py b/retired_benchmarks/recommendation/dlrm/pytorch/python/criteo.py similarity index 100% rename from recommendation/dlrm/pytorch/python/criteo.py rename to retired_benchmarks/recommendation/dlrm/pytorch/python/criteo.py diff --git a/recommendation/dlrm/pytorch/python/dataset.py b/retired_benchmarks/recommendation/dlrm/pytorch/python/dataset.py similarity index 100% rename from recommendation/dlrm/pytorch/python/dataset.py rename to retired_benchmarks/recommendation/dlrm/pytorch/python/dataset.py diff --git a/recommendation/dlrm/pytorch/python/main.py b/retired_benchmarks/recommendation/dlrm/pytorch/python/main.py similarity index 100% rename from recommendation/dlrm/pytorch/python/main.py rename to retired_benchmarks/recommendation/dlrm/pytorch/python/main.py diff --git a/recommendation/dlrm/pytorch/python/tf_dlrm.py b/retired_benchmarks/recommendation/dlrm/pytorch/python/tf_dlrm.py similarity index 100% rename from recommendation/dlrm/pytorch/python/tf_dlrm.py rename to retired_benchmarks/recommendation/dlrm/pytorch/python/tf_dlrm.py diff --git a/recommendation/dlrm/pytorch/python/version.py b/retired_benchmarks/recommendation/dlrm/pytorch/python/version.py similarity index 100% rename from recommendation/dlrm/pytorch/python/version.py rename to retired_benchmarks/recommendation/dlrm/pytorch/python/version.py diff --git a/recommendation/dlrm/pytorch/run_and_time.sh b/retired_benchmarks/recommendation/dlrm/pytorch/run_and_time.sh similarity index 100% rename from recommendation/dlrm/pytorch/run_and_time.sh rename to retired_benchmarks/recommendation/dlrm/pytorch/run_and_time.sh diff --git a/recommendation/dlrm/pytorch/run_common.sh b/retired_benchmarks/recommendation/dlrm/pytorch/run_common.sh similarity index 100% rename from recommendation/dlrm/pytorch/run_common.sh rename to retired_benchmarks/recommendation/dlrm/pytorch/run_common.sh diff --git a/recommendation/dlrm/pytorch/run_helper.sh b/retired_benchmarks/recommendation/dlrm/pytorch/run_helper.sh similarity index 100% rename from recommendation/dlrm/pytorch/run_helper.sh rename to retired_benchmarks/recommendation/dlrm/pytorch/run_helper.sh diff --git a/recommendation/dlrm/pytorch/run_local.sh b/retired_benchmarks/recommendation/dlrm/pytorch/run_local.sh similarity index 100% rename from recommendation/dlrm/pytorch/run_local.sh rename to retired_benchmarks/recommendation/dlrm/pytorch/run_local.sh diff --git a/recommendation/dlrm/pytorch/setup.py b/retired_benchmarks/recommendation/dlrm/pytorch/setup.py similarity index 100% rename from recommendation/dlrm/pytorch/setup.py rename to retired_benchmarks/recommendation/dlrm/pytorch/setup.py diff --git a/recommendation/dlrm/pytorch/tools/accuracy-dlrm.py b/retired_benchmarks/recommendation/dlrm/pytorch/tools/accuracy-dlrm.py similarity index 100% rename from recommendation/dlrm/pytorch/tools/accuracy-dlrm.py rename to retired_benchmarks/recommendation/dlrm/pytorch/tools/accuracy-dlrm.py diff --git a/recommendation/dlrm/pytorch/tools/dist_quantile.txt b/retired_benchmarks/recommendation/dlrm/pytorch/tools/dist_quantile.txt similarity index 100% rename from recommendation/dlrm/pytorch/tools/dist_quantile.txt rename to retired_benchmarks/recommendation/dlrm/pytorch/tools/dist_quantile.txt diff --git a/recommendation/dlrm/pytorch/tools/dist_trace_verification.txt b/retired_benchmarks/recommendation/dlrm/pytorch/tools/dist_trace_verification.txt similarity index 100% rename from recommendation/dlrm/pytorch/tools/dist_trace_verification.txt rename to retired_benchmarks/recommendation/dlrm/pytorch/tools/dist_trace_verification.txt diff --git a/recommendation/dlrm/pytorch/tools/make_fake_criteo.sh b/retired_benchmarks/recommendation/dlrm/pytorch/tools/make_fake_criteo.sh similarity index 100% rename from recommendation/dlrm/pytorch/tools/make_fake_criteo.sh rename to retired_benchmarks/recommendation/dlrm/pytorch/tools/make_fake_criteo.sh diff --git a/recommendation/dlrm/pytorch/tools/quickgen.py b/retired_benchmarks/recommendation/dlrm/pytorch/tools/quickgen.py similarity index 100% rename from recommendation/dlrm/pytorch/tools/quickgen.py rename to retired_benchmarks/recommendation/dlrm/pytorch/tools/quickgen.py diff --git a/recommendation/dlrm/pytorch/user.conf b/retired_benchmarks/recommendation/dlrm/pytorch/user.conf similarity index 100% rename from recommendation/dlrm/pytorch/user.conf rename to retired_benchmarks/recommendation/dlrm/pytorch/user.conf diff --git a/recommendation/dlrm/tf/README.md b/retired_benchmarks/recommendation/dlrm/tf/README.md similarity index 100% rename from recommendation/dlrm/tf/README.md rename to retired_benchmarks/recommendation/dlrm/tf/README.md diff --git a/recommendation/dlrm/tf/__init__.py b/retired_benchmarks/recommendation/dlrm/tf/__init__.py similarity index 100% rename from recommendation/dlrm/tf/__init__.py rename to retired_benchmarks/recommendation/dlrm/tf/__init__.py diff --git a/recommendation/dlrm/tf/common.py b/retired_benchmarks/recommendation/dlrm/tf/common.py similarity index 100% rename from recommendation/dlrm/tf/common.py rename to retired_benchmarks/recommendation/dlrm/tf/common.py diff --git a/recommendation/dlrm/tf/dataloader.py b/retired_benchmarks/recommendation/dlrm/tf/dataloader.py similarity index 100% rename from recommendation/dlrm/tf/dataloader.py rename to retired_benchmarks/recommendation/dlrm/tf/dataloader.py diff --git a/recommendation/dlrm/tf/dlrm.py b/retired_benchmarks/recommendation/dlrm/tf/dlrm.py similarity index 100% rename from recommendation/dlrm/tf/dlrm.py rename to retired_benchmarks/recommendation/dlrm/tf/dlrm.py diff --git a/recommendation/dlrm/tf/dlrm_embedding_runner.py b/retired_benchmarks/recommendation/dlrm/tf/dlrm_embedding_runner.py similarity index 100% rename from recommendation/dlrm/tf/dlrm_embedding_runner.py rename to retired_benchmarks/recommendation/dlrm/tf/dlrm_embedding_runner.py diff --git a/recommendation/dlrm/tf/dlrm_main.py b/retired_benchmarks/recommendation/dlrm/tf/dlrm_main.py similarity index 100% rename from recommendation/dlrm/tf/dlrm_main.py rename to retired_benchmarks/recommendation/dlrm/tf/dlrm_main.py diff --git a/recommendation/dlrm/tf/feature_config.py b/retired_benchmarks/recommendation/dlrm/tf/feature_config.py similarity index 100% rename from recommendation/dlrm/tf/feature_config.py rename to retired_benchmarks/recommendation/dlrm/tf/feature_config.py diff --git a/recommendation/dlrm/tf/mlp_log.py b/retired_benchmarks/recommendation/dlrm/tf/mlp_log.py similarity index 100% rename from recommendation/dlrm/tf/mlp_log.py rename to retired_benchmarks/recommendation/dlrm/tf/mlp_log.py diff --git a/recommendation/dlrm/tf/roc_metrics/python/roc_metrics.clif b/retired_benchmarks/recommendation/dlrm/tf/roc_metrics/python/roc_metrics.clif similarity index 100% rename from recommendation/dlrm/tf/roc_metrics/python/roc_metrics.clif rename to retired_benchmarks/recommendation/dlrm/tf/roc_metrics/python/roc_metrics.clif diff --git a/recommendation/dlrm/tf/roc_metrics/roc_metrics.cc b/retired_benchmarks/recommendation/dlrm/tf/roc_metrics/roc_metrics.cc similarity index 100% rename from recommendation/dlrm/tf/roc_metrics/roc_metrics.cc rename to retired_benchmarks/recommendation/dlrm/tf/roc_metrics/roc_metrics.cc diff --git a/recommendation/dlrm/tf/roc_metrics/roc_metrics.h b/retired_benchmarks/recommendation/dlrm/tf/roc_metrics/roc_metrics.h similarity index 100% rename from recommendation/dlrm/tf/roc_metrics/roc_metrics.h rename to retired_benchmarks/recommendation/dlrm/tf/roc_metrics/roc_metrics.h diff --git a/recommendation/dlrm/tf/train_and_eval_runner.py b/retired_benchmarks/recommendation/dlrm/tf/train_and_eval_runner.py similarity index 100% rename from recommendation/dlrm/tf/train_and_eval_runner.py rename to retired_benchmarks/recommendation/dlrm/tf/train_and_eval_runner.py diff --git a/recommendation/dlrm/tf/utils.py b/retired_benchmarks/recommendation/dlrm/tf/utils.py similarity index 100% rename from recommendation/dlrm/tf/utils.py rename to retired_benchmarks/recommendation/dlrm/tf/utils.py diff --git a/speech_recognition/rnnt/QSL.py b/retired_benchmarks/speech_recognition/rnnt/QSL.py similarity index 100% rename from speech_recognition/rnnt/QSL.py rename to retired_benchmarks/speech_recognition/rnnt/QSL.py diff --git a/speech_recognition/rnnt/README.md b/retired_benchmarks/speech_recognition/rnnt/README.md similarity index 100% rename from speech_recognition/rnnt/README.md rename to retired_benchmarks/speech_recognition/rnnt/README.md diff --git a/speech_recognition/rnnt/README_cm.md b/retired_benchmarks/speech_recognition/rnnt/README_cm.md similarity index 100% rename from speech_recognition/rnnt/README_cm.md rename to retired_benchmarks/speech_recognition/rnnt/README_cm.md diff --git a/speech_recognition/rnnt/accuracy_eval.py b/retired_benchmarks/speech_recognition/rnnt/accuracy_eval.py similarity index 100% rename from speech_recognition/rnnt/accuracy_eval.py rename to retired_benchmarks/speech_recognition/rnnt/accuracy_eval.py diff --git a/speech_recognition/rnnt/dev-clean-wav.json b/retired_benchmarks/speech_recognition/rnnt/dev-clean-wav.json similarity index 100% rename from speech_recognition/rnnt/dev-clean-wav.json rename to retired_benchmarks/speech_recognition/rnnt/dev-clean-wav.json diff --git a/speech_recognition/rnnt/environment.yml b/retired_benchmarks/speech_recognition/rnnt/environment.yml similarity index 100% rename from speech_recognition/rnnt/environment.yml rename to retired_benchmarks/speech_recognition/rnnt/environment.yml diff --git a/speech_recognition/rnnt/optional_harness_ck/README.md b/retired_benchmarks/speech_recognition/rnnt/optional_harness_ck/README.md similarity index 100% rename from speech_recognition/rnnt/optional_harness_ck/README.md rename to retired_benchmarks/speech_recognition/rnnt/optional_harness_ck/README.md diff --git a/speech_recognition/rnnt/pytorch/Dockerfile b/retired_benchmarks/speech_recognition/rnnt/pytorch/Dockerfile similarity index 100% rename from speech_recognition/rnnt/pytorch/Dockerfile rename to retired_benchmarks/speech_recognition/rnnt/pytorch/Dockerfile diff --git a/speech_recognition/rnnt/pytorch/LICENSE b/retired_benchmarks/speech_recognition/rnnt/pytorch/LICENSE similarity index 100% rename from speech_recognition/rnnt/pytorch/LICENSE rename to retired_benchmarks/speech_recognition/rnnt/pytorch/LICENSE diff --git a/speech_recognition/rnnt/pytorch/NOTICE b/retired_benchmarks/speech_recognition/rnnt/pytorch/NOTICE similarity index 100% rename from speech_recognition/rnnt/pytorch/NOTICE rename to retired_benchmarks/speech_recognition/rnnt/pytorch/NOTICE diff --git a/speech_recognition/rnnt/pytorch/configs/rnnt.toml b/retired_benchmarks/speech_recognition/rnnt/pytorch/configs/rnnt.toml similarity index 100% rename from speech_recognition/rnnt/pytorch/configs/rnnt.toml rename to retired_benchmarks/speech_recognition/rnnt/pytorch/configs/rnnt.toml diff --git a/speech_recognition/rnnt/pytorch/dataset.py b/retired_benchmarks/speech_recognition/rnnt/pytorch/dataset.py similarity index 100% rename from speech_recognition/rnnt/pytorch/dataset.py rename to retired_benchmarks/speech_recognition/rnnt/pytorch/dataset.py diff --git a/speech_recognition/rnnt/pytorch/decoders.py b/retired_benchmarks/speech_recognition/rnnt/pytorch/decoders.py similarity index 100% rename from speech_recognition/rnnt/pytorch/decoders.py rename to retired_benchmarks/speech_recognition/rnnt/pytorch/decoders.py diff --git a/speech_recognition/rnnt/pytorch/helpers.py b/retired_benchmarks/speech_recognition/rnnt/pytorch/helpers.py similarity index 100% rename from speech_recognition/rnnt/pytorch/helpers.py rename to retired_benchmarks/speech_recognition/rnnt/pytorch/helpers.py diff --git a/speech_recognition/rnnt/pytorch/metrics.py b/retired_benchmarks/speech_recognition/rnnt/pytorch/metrics.py similarity index 100% rename from speech_recognition/rnnt/pytorch/metrics.py rename to retired_benchmarks/speech_recognition/rnnt/pytorch/metrics.py diff --git a/speech_recognition/rnnt/pytorch/model_separable_rnnt.py b/retired_benchmarks/speech_recognition/rnnt/pytorch/model_separable_rnnt.py similarity index 100% rename from speech_recognition/rnnt/pytorch/model_separable_rnnt.py rename to retired_benchmarks/speech_recognition/rnnt/pytorch/model_separable_rnnt.py diff --git a/speech_recognition/rnnt/pytorch/parts/features.py b/retired_benchmarks/speech_recognition/rnnt/pytorch/parts/features.py similarity index 100% rename from speech_recognition/rnnt/pytorch/parts/features.py rename to retired_benchmarks/speech_recognition/rnnt/pytorch/parts/features.py diff --git a/speech_recognition/rnnt/pytorch/parts/manifest.py b/retired_benchmarks/speech_recognition/rnnt/pytorch/parts/manifest.py similarity index 100% rename from speech_recognition/rnnt/pytorch/parts/manifest.py rename to retired_benchmarks/speech_recognition/rnnt/pytorch/parts/manifest.py diff --git a/speech_recognition/rnnt/pytorch/parts/segment.py b/retired_benchmarks/speech_recognition/rnnt/pytorch/parts/segment.py similarity index 100% rename from speech_recognition/rnnt/pytorch/parts/segment.py rename to retired_benchmarks/speech_recognition/rnnt/pytorch/parts/segment.py diff --git a/speech_recognition/rnnt/pytorch/parts/text/LICENSE b/retired_benchmarks/speech_recognition/rnnt/pytorch/parts/text/LICENSE similarity index 100% rename from speech_recognition/rnnt/pytorch/parts/text/LICENSE rename to retired_benchmarks/speech_recognition/rnnt/pytorch/parts/text/LICENSE diff --git a/speech_recognition/rnnt/pytorch/parts/text/__init__.py b/retired_benchmarks/speech_recognition/rnnt/pytorch/parts/text/__init__.py similarity index 100% rename from speech_recognition/rnnt/pytorch/parts/text/__init__.py rename to retired_benchmarks/speech_recognition/rnnt/pytorch/parts/text/__init__.py diff --git a/speech_recognition/rnnt/pytorch/parts/text/cleaners.py b/retired_benchmarks/speech_recognition/rnnt/pytorch/parts/text/cleaners.py similarity index 100% rename from speech_recognition/rnnt/pytorch/parts/text/cleaners.py rename to retired_benchmarks/speech_recognition/rnnt/pytorch/parts/text/cleaners.py diff --git a/speech_recognition/rnnt/pytorch/parts/text/numbers.py b/retired_benchmarks/speech_recognition/rnnt/pytorch/parts/text/numbers.py similarity index 100% rename from speech_recognition/rnnt/pytorch/parts/text/numbers.py rename to retired_benchmarks/speech_recognition/rnnt/pytorch/parts/text/numbers.py diff --git a/speech_recognition/rnnt/pytorch/preprocessing.py b/retired_benchmarks/speech_recognition/rnnt/pytorch/preprocessing.py similarity index 100% rename from speech_recognition/rnnt/pytorch/preprocessing.py rename to retired_benchmarks/speech_recognition/rnnt/pytorch/preprocessing.py diff --git a/speech_recognition/rnnt/pytorch/rnn.py b/retired_benchmarks/speech_recognition/rnnt/pytorch/rnn.py similarity index 100% rename from speech_recognition/rnnt/pytorch/rnn.py rename to retired_benchmarks/speech_recognition/rnnt/pytorch/rnn.py diff --git a/speech_recognition/rnnt/pytorch/scripts/docker/build.sh b/retired_benchmarks/speech_recognition/rnnt/pytorch/scripts/docker/build.sh similarity index 100% rename from speech_recognition/rnnt/pytorch/scripts/docker/build.sh rename to retired_benchmarks/speech_recognition/rnnt/pytorch/scripts/docker/build.sh diff --git a/speech_recognition/rnnt/pytorch/scripts/docker/launch.sh b/retired_benchmarks/speech_recognition/rnnt/pytorch/scripts/docker/launch.sh similarity index 100% rename from speech_recognition/rnnt/pytorch/scripts/docker/launch.sh rename to retired_benchmarks/speech_recognition/rnnt/pytorch/scripts/docker/launch.sh diff --git a/speech_recognition/rnnt/pytorch/scripts/download_librispeech.sh b/retired_benchmarks/speech_recognition/rnnt/pytorch/scripts/download_librispeech.sh similarity index 100% rename from speech_recognition/rnnt/pytorch/scripts/download_librispeech.sh rename to retired_benchmarks/speech_recognition/rnnt/pytorch/scripts/download_librispeech.sh diff --git a/speech_recognition/rnnt/pytorch/scripts/evaluation.sh b/retired_benchmarks/speech_recognition/rnnt/pytorch/scripts/evaluation.sh similarity index 100% rename from speech_recognition/rnnt/pytorch/scripts/evaluation.sh rename to retired_benchmarks/speech_recognition/rnnt/pytorch/scripts/evaluation.sh diff --git a/speech_recognition/rnnt/pytorch/scripts/inference.sh b/retired_benchmarks/speech_recognition/rnnt/pytorch/scripts/inference.sh similarity index 100% rename from speech_recognition/rnnt/pytorch/scripts/inference.sh rename to retired_benchmarks/speech_recognition/rnnt/pytorch/scripts/inference.sh diff --git a/speech_recognition/rnnt/pytorch/scripts/inference_benchmark.sh b/retired_benchmarks/speech_recognition/rnnt/pytorch/scripts/inference_benchmark.sh similarity index 100% rename from speech_recognition/rnnt/pytorch/scripts/inference_benchmark.sh rename to retired_benchmarks/speech_recognition/rnnt/pytorch/scripts/inference_benchmark.sh diff --git a/speech_recognition/rnnt/pytorch/scripts/preprocess_librispeech.sh b/retired_benchmarks/speech_recognition/rnnt/pytorch/scripts/preprocess_librispeech.sh similarity index 100% rename from speech_recognition/rnnt/pytorch/scripts/preprocess_librispeech.sh rename to retired_benchmarks/speech_recognition/rnnt/pytorch/scripts/preprocess_librispeech.sh diff --git a/speech_recognition/rnnt/pytorch/scripts/train.sh b/retired_benchmarks/speech_recognition/rnnt/pytorch/scripts/train.sh similarity index 100% rename from speech_recognition/rnnt/pytorch/scripts/train.sh rename to retired_benchmarks/speech_recognition/rnnt/pytorch/scripts/train.sh diff --git a/speech_recognition/rnnt/pytorch/scripts/train_benchmark.sh b/retired_benchmarks/speech_recognition/rnnt/pytorch/scripts/train_benchmark.sh similarity index 100% rename from speech_recognition/rnnt/pytorch/scripts/train_benchmark.sh rename to retired_benchmarks/speech_recognition/rnnt/pytorch/scripts/train_benchmark.sh diff --git a/speech_recognition/rnnt/pytorch/utils/__init__.py b/retired_benchmarks/speech_recognition/rnnt/pytorch/utils/__init__.py similarity index 100% rename from speech_recognition/rnnt/pytorch/utils/__init__.py rename to retired_benchmarks/speech_recognition/rnnt/pytorch/utils/__init__.py diff --git a/speech_recognition/rnnt/pytorch/utils/convert_librispeech.py b/retired_benchmarks/speech_recognition/rnnt/pytorch/utils/convert_librispeech.py similarity index 100% rename from speech_recognition/rnnt/pytorch/utils/convert_librispeech.py rename to retired_benchmarks/speech_recognition/rnnt/pytorch/utils/convert_librispeech.py diff --git a/speech_recognition/rnnt/pytorch/utils/download_librispeech.py b/retired_benchmarks/speech_recognition/rnnt/pytorch/utils/download_librispeech.py similarity index 100% rename from speech_recognition/rnnt/pytorch/utils/download_librispeech.py rename to retired_benchmarks/speech_recognition/rnnt/pytorch/utils/download_librispeech.py diff --git a/speech_recognition/rnnt/pytorch/utils/download_utils.py b/retired_benchmarks/speech_recognition/rnnt/pytorch/utils/download_utils.py similarity index 100% rename from speech_recognition/rnnt/pytorch/utils/download_utils.py rename to retired_benchmarks/speech_recognition/rnnt/pytorch/utils/download_utils.py diff --git a/speech_recognition/rnnt/pytorch/utils/inference_librispeech.csv b/retired_benchmarks/speech_recognition/rnnt/pytorch/utils/inference_librispeech.csv similarity index 100% rename from speech_recognition/rnnt/pytorch/utils/inference_librispeech.csv rename to retired_benchmarks/speech_recognition/rnnt/pytorch/utils/inference_librispeech.csv diff --git a/speech_recognition/rnnt/pytorch/utils/librispeech-inference.csv b/retired_benchmarks/speech_recognition/rnnt/pytorch/utils/librispeech-inference.csv similarity index 100% rename from speech_recognition/rnnt/pytorch/utils/librispeech-inference.csv rename to retired_benchmarks/speech_recognition/rnnt/pytorch/utils/librispeech-inference.csv diff --git a/speech_recognition/rnnt/pytorch/utils/librispeech.csv b/retired_benchmarks/speech_recognition/rnnt/pytorch/utils/librispeech.csv similarity index 100% rename from speech_recognition/rnnt/pytorch/utils/librispeech.csv rename to retired_benchmarks/speech_recognition/rnnt/pytorch/utils/librispeech.csv diff --git a/speech_recognition/rnnt/pytorch/utils/preprocessing_utils.py b/retired_benchmarks/speech_recognition/rnnt/pytorch/utils/preprocessing_utils.py similarity index 100% rename from speech_recognition/rnnt/pytorch/utils/preprocessing_utils.py rename to retired_benchmarks/speech_recognition/rnnt/pytorch/utils/preprocessing_utils.py diff --git a/speech_recognition/rnnt/pytorch_SUT.py b/retired_benchmarks/speech_recognition/rnnt/pytorch_SUT.py similarity index 100% rename from speech_recognition/rnnt/pytorch_SUT.py rename to retired_benchmarks/speech_recognition/rnnt/pytorch_SUT.py diff --git a/speech_recognition/rnnt/run.py b/retired_benchmarks/speech_recognition/rnnt/run.py similarity index 100% rename from speech_recognition/rnnt/run.py rename to retired_benchmarks/speech_recognition/rnnt/run.py diff --git a/speech_recognition/rnnt/run.sh b/retired_benchmarks/speech_recognition/rnnt/run.sh similarity index 100% rename from speech_recognition/rnnt/run.sh rename to retired_benchmarks/speech_recognition/rnnt/run.sh diff --git a/speech_recognition/rnnt/user.conf b/retired_benchmarks/speech_recognition/rnnt/user.conf similarity index 100% rename from speech_recognition/rnnt/user.conf rename to retired_benchmarks/speech_recognition/rnnt/user.conf diff --git a/translation/gnmt/.gitignore b/retired_benchmarks/translation/gnmt/.gitignore similarity index 100% rename from translation/gnmt/.gitignore rename to retired_benchmarks/translation/gnmt/.gitignore diff --git a/translation/gnmt/README.md b/retired_benchmarks/translation/gnmt/README.md similarity index 100% rename from translation/gnmt/README.md rename to retired_benchmarks/translation/gnmt/README.md diff --git a/translation/gnmt/mlcube/mlcube.yaml b/retired_benchmarks/translation/gnmt/mlcube/mlcube.yaml similarity index 100% rename from translation/gnmt/mlcube/mlcube.yaml rename to retired_benchmarks/translation/gnmt/mlcube/mlcube.yaml diff --git a/translation/gnmt/mlcube/workspace/parameters.yaml b/retired_benchmarks/translation/gnmt/mlcube/workspace/parameters.yaml similarity index 100% rename from translation/gnmt/mlcube/workspace/parameters.yaml rename to retired_benchmarks/translation/gnmt/mlcube/workspace/parameters.yaml diff --git a/translation/gnmt/tensorflow/Dockerfile b/retired_benchmarks/translation/gnmt/tensorflow/Dockerfile similarity index 100% rename from translation/gnmt/tensorflow/Dockerfile rename to retired_benchmarks/translation/gnmt/tensorflow/Dockerfile diff --git a/translation/gnmt/tensorflow/README.md b/retired_benchmarks/translation/gnmt/tensorflow/README.md similarity index 100% rename from translation/gnmt/tensorflow/README.md rename to retired_benchmarks/translation/gnmt/tensorflow/README.md diff --git a/translation/gnmt/tensorflow/download_dataset.sh b/retired_benchmarks/translation/gnmt/tensorflow/download_dataset.sh similarity index 100% rename from translation/gnmt/tensorflow/download_dataset.sh rename to retired_benchmarks/translation/gnmt/tensorflow/download_dataset.sh diff --git a/translation/gnmt/tensorflow/download_trained_model.sh b/retired_benchmarks/translation/gnmt/tensorflow/download_trained_model.sh similarity index 100% rename from translation/gnmt/tensorflow/download_trained_model.sh rename to retired_benchmarks/translation/gnmt/tensorflow/download_trained_model.sh diff --git a/translation/gnmt/tensorflow/generic_loadgen.py b/retired_benchmarks/translation/gnmt/tensorflow/generic_loadgen.py similarity index 100% rename from translation/gnmt/tensorflow/generic_loadgen.py rename to retired_benchmarks/translation/gnmt/tensorflow/generic_loadgen.py diff --git a/translation/gnmt/tensorflow/loadgen_gnmt.py b/retired_benchmarks/translation/gnmt/tensorflow/loadgen_gnmt.py similarity index 100% rename from translation/gnmt/tensorflow/loadgen_gnmt.py rename to retired_benchmarks/translation/gnmt/tensorflow/loadgen_gnmt.py diff --git a/translation/gnmt/tensorflow/mlcube.py b/retired_benchmarks/translation/gnmt/tensorflow/mlcube.py similarity index 100% rename from translation/gnmt/tensorflow/mlcube.py rename to retired_benchmarks/translation/gnmt/tensorflow/mlcube.py diff --git a/translation/gnmt/tensorflow/nmt/__init__.py b/retired_benchmarks/translation/gnmt/tensorflow/nmt/__init__.py similarity index 100% rename from translation/gnmt/tensorflow/nmt/__init__.py rename to retired_benchmarks/translation/gnmt/tensorflow/nmt/__init__.py diff --git a/translation/gnmt/tensorflow/nmt/attention_model.py b/retired_benchmarks/translation/gnmt/tensorflow/nmt/attention_model.py similarity index 100% rename from translation/gnmt/tensorflow/nmt/attention_model.py rename to retired_benchmarks/translation/gnmt/tensorflow/nmt/attention_model.py diff --git a/translation/gnmt/tensorflow/nmt/gnmt_model.py b/retired_benchmarks/translation/gnmt/tensorflow/nmt/gnmt_model.py similarity index 100% rename from translation/gnmt/tensorflow/nmt/gnmt_model.py rename to retired_benchmarks/translation/gnmt/tensorflow/nmt/gnmt_model.py diff --git a/translation/gnmt/tensorflow/nmt/inference.py b/retired_benchmarks/translation/gnmt/tensorflow/nmt/inference.py similarity index 100% rename from translation/gnmt/tensorflow/nmt/inference.py rename to retired_benchmarks/translation/gnmt/tensorflow/nmt/inference.py diff --git a/translation/gnmt/tensorflow/nmt/inference_test.py b/retired_benchmarks/translation/gnmt/tensorflow/nmt/inference_test.py similarity index 100% rename from translation/gnmt/tensorflow/nmt/inference_test.py rename to retired_benchmarks/translation/gnmt/tensorflow/nmt/inference_test.py diff --git a/translation/gnmt/tensorflow/nmt/model.py b/retired_benchmarks/translation/gnmt/tensorflow/nmt/model.py similarity index 100% rename from translation/gnmt/tensorflow/nmt/model.py rename to retired_benchmarks/translation/gnmt/tensorflow/nmt/model.py diff --git a/translation/gnmt/tensorflow/nmt/model_helper.py b/retired_benchmarks/translation/gnmt/tensorflow/nmt/model_helper.py similarity index 100% rename from translation/gnmt/tensorflow/nmt/model_helper.py rename to retired_benchmarks/translation/gnmt/tensorflow/nmt/model_helper.py diff --git a/translation/gnmt/tensorflow/nmt/model_test.py b/retired_benchmarks/translation/gnmt/tensorflow/nmt/model_test.py similarity index 100% rename from translation/gnmt/tensorflow/nmt/model_test.py rename to retired_benchmarks/translation/gnmt/tensorflow/nmt/model_test.py diff --git a/translation/gnmt/tensorflow/nmt/nmt.py b/retired_benchmarks/translation/gnmt/tensorflow/nmt/nmt.py similarity index 100% rename from translation/gnmt/tensorflow/nmt/nmt.py rename to retired_benchmarks/translation/gnmt/tensorflow/nmt/nmt.py diff --git a/translation/gnmt/tensorflow/nmt/nmt_test.py b/retired_benchmarks/translation/gnmt/tensorflow/nmt/nmt_test.py similarity index 100% rename from translation/gnmt/tensorflow/nmt/nmt_test.py rename to retired_benchmarks/translation/gnmt/tensorflow/nmt/nmt_test.py diff --git a/translation/gnmt/tensorflow/nmt/scripts/__init__.py b/retired_benchmarks/translation/gnmt/tensorflow/nmt/scripts/__init__.py similarity index 100% rename from translation/gnmt/tensorflow/nmt/scripts/__init__.py rename to retired_benchmarks/translation/gnmt/tensorflow/nmt/scripts/__init__.py diff --git a/translation/gnmt/tensorflow/nmt/scripts/bleu.py b/retired_benchmarks/translation/gnmt/tensorflow/nmt/scripts/bleu.py similarity index 100% rename from translation/gnmt/tensorflow/nmt/scripts/bleu.py rename to retired_benchmarks/translation/gnmt/tensorflow/nmt/scripts/bleu.py diff --git a/translation/gnmt/tensorflow/nmt/scripts/rouge.py b/retired_benchmarks/translation/gnmt/tensorflow/nmt/scripts/rouge.py similarity index 100% rename from translation/gnmt/tensorflow/nmt/scripts/rouge.py rename to retired_benchmarks/translation/gnmt/tensorflow/nmt/scripts/rouge.py diff --git a/translation/gnmt/tensorflow/nmt/standard_hparams/wmt16_gnmt_4_layer.json b/retired_benchmarks/translation/gnmt/tensorflow/nmt/standard_hparams/wmt16_gnmt_4_layer.json similarity index 100% rename from translation/gnmt/tensorflow/nmt/standard_hparams/wmt16_gnmt_4_layer.json rename to retired_benchmarks/translation/gnmt/tensorflow/nmt/standard_hparams/wmt16_gnmt_4_layer.json diff --git a/translation/gnmt/tensorflow/nmt/train.py b/retired_benchmarks/translation/gnmt/tensorflow/nmt/train.py similarity index 100% rename from translation/gnmt/tensorflow/nmt/train.py rename to retired_benchmarks/translation/gnmt/tensorflow/nmt/train.py diff --git a/translation/gnmt/tensorflow/nmt/utils/__init__.py b/retired_benchmarks/translation/gnmt/tensorflow/nmt/utils/__init__.py similarity index 100% rename from translation/gnmt/tensorflow/nmt/utils/__init__.py rename to retired_benchmarks/translation/gnmt/tensorflow/nmt/utils/__init__.py diff --git a/translation/gnmt/tensorflow/nmt/utils/common_test_utils.py b/retired_benchmarks/translation/gnmt/tensorflow/nmt/utils/common_test_utils.py similarity index 100% rename from translation/gnmt/tensorflow/nmt/utils/common_test_utils.py rename to retired_benchmarks/translation/gnmt/tensorflow/nmt/utils/common_test_utils.py diff --git a/translation/gnmt/tensorflow/nmt/utils/evaluation_utils.py b/retired_benchmarks/translation/gnmt/tensorflow/nmt/utils/evaluation_utils.py similarity index 100% rename from translation/gnmt/tensorflow/nmt/utils/evaluation_utils.py rename to retired_benchmarks/translation/gnmt/tensorflow/nmt/utils/evaluation_utils.py diff --git a/translation/gnmt/tensorflow/nmt/utils/evaluation_utils_test.py b/retired_benchmarks/translation/gnmt/tensorflow/nmt/utils/evaluation_utils_test.py similarity index 100% rename from translation/gnmt/tensorflow/nmt/utils/evaluation_utils_test.py rename to retired_benchmarks/translation/gnmt/tensorflow/nmt/utils/evaluation_utils_test.py diff --git a/translation/gnmt/tensorflow/nmt/utils/iterator_utils.py b/retired_benchmarks/translation/gnmt/tensorflow/nmt/utils/iterator_utils.py similarity index 100% rename from translation/gnmt/tensorflow/nmt/utils/iterator_utils.py rename to retired_benchmarks/translation/gnmt/tensorflow/nmt/utils/iterator_utils.py diff --git a/translation/gnmt/tensorflow/nmt/utils/iterator_utils_test.py b/retired_benchmarks/translation/gnmt/tensorflow/nmt/utils/iterator_utils_test.py similarity index 100% rename from translation/gnmt/tensorflow/nmt/utils/iterator_utils_test.py rename to retired_benchmarks/translation/gnmt/tensorflow/nmt/utils/iterator_utils_test.py diff --git a/translation/gnmt/tensorflow/nmt/utils/misc_utils.py b/retired_benchmarks/translation/gnmt/tensorflow/nmt/utils/misc_utils.py similarity index 100% rename from translation/gnmt/tensorflow/nmt/utils/misc_utils.py rename to retired_benchmarks/translation/gnmt/tensorflow/nmt/utils/misc_utils.py diff --git a/translation/gnmt/tensorflow/nmt/utils/misc_utils_test.py b/retired_benchmarks/translation/gnmt/tensorflow/nmt/utils/misc_utils_test.py similarity index 100% rename from translation/gnmt/tensorflow/nmt/utils/misc_utils_test.py rename to retired_benchmarks/translation/gnmt/tensorflow/nmt/utils/misc_utils_test.py diff --git a/translation/gnmt/tensorflow/nmt/utils/nmt_utils.py b/retired_benchmarks/translation/gnmt/tensorflow/nmt/utils/nmt_utils.py similarity index 100% rename from translation/gnmt/tensorflow/nmt/utils/nmt_utils.py rename to retired_benchmarks/translation/gnmt/tensorflow/nmt/utils/nmt_utils.py diff --git a/translation/gnmt/tensorflow/nmt/utils/standard_hparams_utils.py b/retired_benchmarks/translation/gnmt/tensorflow/nmt/utils/standard_hparams_utils.py similarity index 100% rename from translation/gnmt/tensorflow/nmt/utils/standard_hparams_utils.py rename to retired_benchmarks/translation/gnmt/tensorflow/nmt/utils/standard_hparams_utils.py diff --git a/translation/gnmt/tensorflow/nmt/utils/vocab_utils.py b/retired_benchmarks/translation/gnmt/tensorflow/nmt/utils/vocab_utils.py similarity index 100% rename from translation/gnmt/tensorflow/nmt/utils/vocab_utils.py rename to retired_benchmarks/translation/gnmt/tensorflow/nmt/utils/vocab_utils.py diff --git a/translation/gnmt/tensorflow/nmt/utils/vocab_utils_test.py b/retired_benchmarks/translation/gnmt/tensorflow/nmt/utils/vocab_utils_test.py similarity index 100% rename from translation/gnmt/tensorflow/nmt/utils/vocab_utils_test.py rename to retired_benchmarks/translation/gnmt/tensorflow/nmt/utils/vocab_utils_test.py diff --git a/translation/gnmt/tensorflow/preprocess_input.sh b/retired_benchmarks/translation/gnmt/tensorflow/preprocess_input.sh similarity index 100% rename from translation/gnmt/tensorflow/preprocess_input.sh rename to retired_benchmarks/translation/gnmt/tensorflow/preprocess_input.sh diff --git a/translation/gnmt/tensorflow/process_accuracy.py b/retired_benchmarks/translation/gnmt/tensorflow/process_accuracy.py similarity index 100% rename from translation/gnmt/tensorflow/process_accuracy.py rename to retired_benchmarks/translation/gnmt/tensorflow/process_accuracy.py diff --git a/translation/gnmt/tensorflow/requirements.txt b/retired_benchmarks/translation/gnmt/tensorflow/requirements.txt similarity index 100% rename from translation/gnmt/tensorflow/requirements.txt rename to retired_benchmarks/translation/gnmt/tensorflow/requirements.txt diff --git a/translation/gnmt/tensorflow/run.sh b/retired_benchmarks/translation/gnmt/tensorflow/run.sh similarity index 100% rename from translation/gnmt/tensorflow/run.sh rename to retired_benchmarks/translation/gnmt/tensorflow/run.sh diff --git a/translation/gnmt/tensorflow/run_task.py b/retired_benchmarks/translation/gnmt/tensorflow/run_task.py similarity index 100% rename from translation/gnmt/tensorflow/run_task.py rename to retired_benchmarks/translation/gnmt/tensorflow/run_task.py diff --git a/translation/gnmt/tensorflow/train_gnmt.txt b/retired_benchmarks/translation/gnmt/tensorflow/train_gnmt.txt similarity index 100% rename from translation/gnmt/tensorflow/train_gnmt.txt rename to retired_benchmarks/translation/gnmt/tensorflow/train_gnmt.txt diff --git a/translation/gnmt/tensorflow/verify_dataset.sh b/retired_benchmarks/translation/gnmt/tensorflow/verify_dataset.sh similarity index 100% rename from translation/gnmt/tensorflow/verify_dataset.sh rename to retired_benchmarks/translation/gnmt/tensorflow/verify_dataset.sh diff --git a/retired_benchmarks/vision/classification_and_detection/.dockerignore b/retired_benchmarks/vision/classification_and_detection/.dockerignore new file mode 100644 index 000000000..91916221c --- /dev/null +++ b/retired_benchmarks/vision/classification_and_detection/.dockerignore @@ -0,0 +1,3 @@ +preprocessed +python/preprocessed +mlperf_* diff --git a/retired_benchmarks/vision/classification_and_detection/.gitignore b/retired_benchmarks/vision/classification_and_detection/.gitignore new file mode 100755 index 000000000..0ab826da3 --- /dev/null +++ b/retired_benchmarks/vision/classification_and_detection/.gitignore @@ -0,0 +1,19 @@ +.coverage* +*.pyc +.idea +build +dist +bin +obj +.ipynb_checkpoints +__pycache__ +*.pyc +*.swp +.cache +.eggs +*.egg-info +run.sh +preprocessed +python/preprocessed +output +mlperf_log_* diff --git a/retired_benchmarks/vision/classification_and_detection/Dockerfile.cpu b/retired_benchmarks/vision/classification_and_detection/Dockerfile.cpu new file mode 100755 index 000000000..b435c6143 --- /dev/null +++ b/retired_benchmarks/vision/classification_and_detection/Dockerfile.cpu @@ -0,0 +1,44 @@ +FROM ubuntu:16.04 + +ENV PYTHON_VERSION=3.7 +ENV LANG C.UTF-8 +ENV LC_ALL C.UTF-8 +ENV PATH /opt/anaconda3/bin:$PATH + +WORKDIR /root +ENV HOME /root + +RUN apt-get update + +RUN apt-get install -y --no-install-recommends \ + git \ + build-essential \ + software-properties-common \ + ca-certificates \ + wget \ + curl \ + htop \ + zip \ + unzip + +RUN cd /opt && \ + wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-4.6.14-Linux-x86_64.sh -O miniconda.sh && \ + /bin/bash ./miniconda.sh -b -p /opt/anaconda3 && \ + rm miniconda.sh && \ + /opt/anaconda3/bin/conda clean -tipsy && \ + ln -s /opt/anaconda3/etc/profile.d/conda.sh /etc/profile.d/conda.sh && \ + echo ". /opt/anaconda3/etc/profile.d/conda.sh" >> ~/.bashrc && \ + echo "conda activate base" >> ~/.bashrc && \ + conda config --set always_yes yes --set changeps1 no + +RUN conda install pytorch-cpu torchvision-cpu -c pytorch +RUN pip install future pillow onnx opencv-python-headless tensorflow onnxruntime +RUN pip install Cython && pip install pycocotools +RUN cd /tmp && \ + git clone --recursive https://github.com/mlcommons/inference && \ + cd inference/loadgen && \ + pip install pybind11 && \ + CFLAGS="-std=c++14" python setup.py install && \ + rm -rf mlperf + +ENTRYPOINT ["/bin/bash"] diff --git a/retired_benchmarks/vision/classification_and_detection/Dockerfile.gpu b/retired_benchmarks/vision/classification_and_detection/Dockerfile.gpu new file mode 100755 index 000000000..5ba2c680c --- /dev/null +++ b/retired_benchmarks/vision/classification_and_detection/Dockerfile.gpu @@ -0,0 +1,55 @@ +FROM nvidia/cuda:11.1-cudnn8-devel-ubuntu16.04 + +ENV PYTHON_VERSION=3.7 +ENV LANG C.UTF-8 +ENV LC_ALL C.UTF-8 +ENV PATH /opt/anaconda3/bin:$PATH + +WORKDIR /root +ENV HOME /root + +RUN apt-get update + +RUN apt-get install -y --no-install-recommends \ + git \ + build-essential \ + software-properties-common \ + ca-certificates \ + wget \ + curl \ + htop \ + zip \ + unzip + +RUN cd /opt && \ + wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-4.6.14-Linux-x86_64.sh -O miniconda.sh && \ + /bin/bash ./miniconda.sh -b -p /opt/anaconda3 && \ + rm miniconda.sh && \ + /opt/anaconda3/bin/conda clean -tipsy && \ + ln -s /opt/anaconda3/etc/profile.d/conda.sh /etc/profile.d/conda.sh && \ + echo ". /opt/anaconda3/etc/profile.d/conda.sh" >> ~/.bashrc && \ + echo "conda activate base" >> ~/.bashrc && \ + conda config --set always_yes yes --set changeps1 no + +RUN pip install --upgrade pip && \ + pip install cython future pillow onnx opencv-python-headless && \ + ln -s /usr/local/cuda/lib64 /usr/local/cuda/lib && \ + cp /usr/lib/x86_64-linux-gnu/libnccl* /usr/local/cuda/lib && \ + ldconfig + +RUN conda install pytorch torchvision -c pytorch +RUN pip install tensorflow onnxruntime-gpu +RUN pip install Cython && pip install pycocotools + + +RUN cd /tmp && \ + git clone --recursive https://github.com/mlcommons/inference && \ + cd inference/loadgen && \ + pip install pybind11 && \ + CFLAGS="-std=c++14" python setup.py install && \ + rm -rf mlperf + +RUN echo "/usr/local/cuda/compat" >> /etc/ld.so.conf.d/cuda-10-0.conf && \ + ldconfig + +ENTRYPOINT ["/bin/bash"] diff --git a/retired_benchmarks/vision/classification_and_detection/GettingStarted.ipynb b/retired_benchmarks/vision/classification_and_detection/GettingStarted.ipynb new file mode 100755 index 000000000..85419b693 --- /dev/null +++ b/retired_benchmarks/vision/classification_and_detection/GettingStarted.ipynb @@ -0,0 +1,413 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## A short tutorial how to use the mlperf inference reference benchmark" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We wrapped all inference models into a single benchmark app. The benchmark app will read the propper dataset, preprocesses it and interface with the backend. Traffic is generated by loadgen, which depending on the desired mode drives the desired traffic to the benchmark app. \n", + "\n", + "To run this notebook, pick a directory and clone the mlperf source tree:\n", + "```\n", + "cd /tmp\n", + "git clone https://github.com/mlperf/inference.git\n", + "cd inference/v0.5/classification_and_detection\n", + "jupyter notebook \n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "root = os.getcwd()" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "running develop\n", + "running egg_info\n", + "writing mlperf_loadgen.egg-info/PKG-INFO\n", + "writing dependency_links to mlperf_loadgen.egg-info/dependency_links.txt\n", + "writing top-level names to mlperf_loadgen.egg-info/top_level.txt\n", + "reading manifest file 'mlperf_loadgen.egg-info/SOURCES.txt'\n", + "writing manifest file 'mlperf_loadgen.egg-info/SOURCES.txt'\n", + "running build_ext\n", + "building 'mlperf_loadgen' extension\n", + "gcc -pthread -B /opt/anaconda3/compiler_compat -Wl,--sysroot=/ -DNDEBUG -g -fwrapv -O3 -Wall -Wstrict-prototypes -std=c++14 -fPIC -DMAJOR_VERSION=0 -DMINOR_VERSION=5 -I. -I../third_party/pybind/include -I/opt/anaconda3/include/python3.7m -c loadgen.cc -o build/temp.linux-x86_64-3.7/loadgen.o\n", + "\u001b[01m\u001b[Kcc1plus:\u001b[m\u001b[K \u001b[01;35m\u001b[Kwarning: \u001b[m\u001b[Kcommand line option ‘\u001b[01m\u001b[K-Wstrict-prototypes\u001b[m\u001b[K’ is valid for C/ObjC but not for C++\n", + "...\n", + "Using /opt/anaconda3/lib/python3.7/site-packages\n", + "Finished processing dependencies for mlperf-inference==0.1.0\n" + ] + } + ], + "source": [ + "!cd ../../loadgen; CFLAGS=\"-std=c++14\" python setup.py develop; cd {root}\n", + "!python setup.py develop" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The benchmark app uses a shell script to simplify command line options and the user can pick backend, model and device:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "usage: ./run_local.sh tf|onnxruntime|pytorch|tflite [resnet50|mobilenet|ssd-mobilenet|ssd-resnet34|ssd-resnet34] [cpu|gpu]\r\n" + ] + } + ], + "source": [ + "!./run_local.sh" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Before running the benchmark, device on model and dataset and set the environment variable ```MODEL_DIR``` and ```DATA_DIR```. \n", + "\n", + "For this tutorial we use onnxruntime (tensorflow and pytorch will work as well), mobilenet and a fake imagetnet dataset with a few images." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: onnxruntime in /opt/anaconda3/lib/python3.7/site-packages (0.4.0)\r\n" + ] + } + ], + "source": [ + "!pip install onnxruntime" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Step 1 - download the model. You find the links to the models [here](https://github.com/mlperf/inference/tree/master/v0.5/classification_and_detection#supported-models)." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "!wget -q https://zenodo.org/record/3157894/files/mobilenet_v1_1.0_224.onnx" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Step 2 - download the dataset. For this tutorial we create a small, fake dataset that pretends to be imagenet.\n", + "Normally you'd need to download imagenet2012/valiation for image classification or coco2017/valiation for object detections.\n", + "\n", + "Links and instructions how to download the datasets can be found in the [README](https://github.com/mlperf/inference/tree/master/v0.5/classification_and_detection#datasets)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "!tools/make_fake_imagenet.sh" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Step 3 - tell the benchmark where to find model and data " + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "os.environ['MODEL_DIR'] = root\n", + "os.environ['DATA_DIR'] = os.path.join(root, \"fake_imagenet\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "For mlperf submission number of queries, time, latencies and percentiles are given and we default to those settings. But for this tuturial we pass in some extra options to make things go quicker.\n", + "run_local.sh will look for the evironment variable EXTRA_OPS and add this to the arguments. You can also add additional arguments in the command line.\n", + "The options below will limit the time that the benchmarks run to 10 seconds and adds accuracy reporting." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "os.environ['EXTRA_OPS'] =\"--queries-offline 20 --time 10 --max-latency 0.2\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Step 4 - run the benchmark." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "INFO:main:Namespace(accuracy=True, backend='onnxruntime', cache=0, count=None, data_format=None, dataset='imagenet_mobilenet', dataset_list=None, dataset_path='/home/gs/inference/v0.5/classification_and_detection/fake_imagenet', inputs=None, max_batchsize=32, max_latency=[0.2], model='/home/gs/inference/v0.5/classification_and_detection/mobilenet_v1_1.0_224.onnx', output='/home/gs/inference/v0.5/classification_and_detection/output/mobilenet-onnxruntime-cpu/results.json', outputs=['MobilenetV1/Predictions/Reshape_1:0'], profile='mobilenet-onnxruntime', qps=10, queries_multi=24576, queries_offline=20, queries_single=1024, scenario=[TestScenario.SingleStream], threads=8, time=10)\n", + "INFO:imagenet:loaded 8 images, cache=0, took=0.2sec\n", + "INFO:main:starting TestScenario.SingleStream\n", + "TestScenario.SingleStream qps=69.75, mean=0.0115, time=0.11, acc=62.50, queries=8, tiles=50.0:0.0112,80.0:0.0115,90.0:0.0121,95.0:0.0129,99.0:0.0135,99.9:0.0137\n" + ] + } + ], + "source": [ + "!./run_local.sh onnxruntime mobilenet cpu --accuracy " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The line ```Accuracy``` reports accuracy or mAP together with some latencies in various percentiles so you can insight how this run was. Above accuracy was 87.5%.\n", + "\n", + "The line ```TestScenario.SingleStream-1.0``` reports the latency and qps seen during the benchmark.\n", + "\n", + "For submission the official logging is found in [mlperf_log_summary.txt](mlperf_log_summary.txt) and [mlperf_log_detail.txt](mlperf_log_detail.txt)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If you read over the mlperf inference rules guide you'll find multiple scenarios to be run for the inference benchmarks:\n", + "\n", + "|scenario|description|\n", + "|:---|:---|\n", + "|SingleStream|The LoadGen sends the next query as soon as the SUT completes the previous one|\n", + "|MultiStream|The LoadGen sends a new query every Latency Constraint, if the SUT has completed the prior query. Otherwise, the new query is dropped. Such an event is one overtime query.|\n", + "|Server|The LoadGen sends new queries to the SUT according to a Poisson distribution. Overtime queries must not exceed 2x the latency bound.|\n", + "|Offline|The LoadGen sends all queries to the SUT at one time.|\n", + "\n", + "We can run those scenario using the ```--scenario``` option in the command line, for example:" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "INFO:main:Namespace(accuracy=False, backend='onnxruntime', cache=0, count=None, data_format=None, dataset='imagenet_mobilenet', dataset_list=None, dataset_path='/home/gs/inference/v0.5/classification_and_detection/fake_imagenet', inputs=None, max_batchsize=32, max_latency=[0.2], model='/home/gs/inference/v0.5/classification_and_detection/mobilenet_v1_1.0_224.onnx', output='/home/gs/inference/v0.5/classification_and_detection/output/mobilenet-onnxruntime-cpu/results.json', outputs=['MobilenetV1/Predictions/Reshape_1:0'], profile='mobilenet-onnxruntime', qps=10, queries_multi=24576, queries_offline=20, queries_single=1024, scenario=[TestScenario.Offline], threads=8, time=10)\n", + "INFO:imagenet:loaded 8 images, cache=0, took=0.0sec\n", + "INFO:main:starting TestScenario.Offline\n", + "TestScenario.Offline qps=44.11, mean=2.3486, time=2.49, queries=110, tiles=50.0:2.4500,80.0:2.4687,90.0:2.4687,95.0:2.4687,99.0:2.4687,99.9:2.4687\n" + ] + } + ], + "source": [ + "!./run_local.sh onnxruntime mobilenet cpu --scenario Offline" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Additional logfiles\n", + "\n", + "We log some additional information [here](output/mobilenet-onnxruntime-cpu/results.json) which can be used to plot graphs." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Under the hood\n", + "\n", + "In case you wonder what the run_local.sh does, it only assembles the command line for the python based benchmark app. Command ine options for the app are documented [here](https://github.com/mlperf/inference/blob/master/cloud/image_classification)\n", + "\n", + "Calling\n", + "```\n", + "!bash -x ./run_local.sh onnxruntime mobilenet cpu --accuracy \n", + "```\n", + "will results in the following command line:\n", + "```\n", + "python python/main.py --profile mobilenet-onnxruntime --model /tmp/inference/cloud/image_classification/mobilenet_v1_1.0_224.onnx --dataset-path /tmp/inference/cloud/image_classification/fake_imagenet --output /tmp/inference/cloud/image_classification/output/mobilenet-onnxruntime-cpu/results.json --queries-offline 20 --time 10 --max-latency 0.2 --accuracy\n", + "```\n", + "During testing you can change some of the options to have faster test cycles but for final submission use the defaults." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Using docker\n", + "\n", + "Instead of run_local.sh you can use run_and_time.sh which does have the same options but instead of running local will run the benchmark under docker." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Sending build context to Docker daemon 18.54MB\n", + "Step 1/15 : FROM ubuntu:16.04\n", + " ---> bd3d4369aebc\n", + "Step 2/15 : ENV PYTHON_VERSION=3.7\n", + " ---> Using cache\n", + " ---> e25f214201a2\n", + "Step 3/15 : ENV LANG C.UTF-8\n", + " ---> Using cache\n", + " ---> 12986ee696e1\n", + "Step 4/15 : ENV LC_ALL C.UTF-8\n", + " ---> Using cache\n", + " ---> 1460535b24e1\n", + "Step 5/15 : ENV PATH /opt/anaconda3/bin:$PATH\n", + " ---> Using cache\n", + " ---> f4c922578fdf\n", + "Step 6/15 : WORKDIR /root\n", + " ---> Using cache\n", + " ---> fb0ec9a436a5\n", + "Step 7/15 : ENV HOME /root\n", + " ---> Using cache\n", + " ---> edeb7c15ebfb\n", + "Step 8/15 : RUN apt-get update\n", + " ---> Using cache\n", + " ---> 42da1a4fa3fd\n", + "Step 9/15 : RUN apt-get install -y --no-install-recommends git build-essential software-properties-common ca-certificates wget curl htop zip unzip\n", + " ---> Using cache\n", + " ---> a1de66a3c7bd\n", + "Step 10/15 : RUN cd /opt && wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-4.6.14-Linux-x86_64.sh -O miniconda.sh && /bin/bash ./miniconda.sh -b -p /opt/anaconda3 && rm miniconda.sh && /opt/anaconda3/bin/conda clean -tipsy && ln -s /opt/anaconda3/etc/profile.d/conda.sh /etc/profile.d/conda.sh && echo \". /opt/anaconda3/etc/profile.d/conda.sh\" >> ~/.bashrc && echo \"conda activate base\" >> ~/.bashrc && conda config --set always_yes yes --set changeps1 no\n", + " ---> Using cache\n", + " ---> b3a1fa068421\n", + "Step 11/15 : RUN conda install pytorch-cpu torchvision-cpu -c pytorch\n", + " ---> Using cache\n", + " ---> 0f7c294fe4c8\n", + "Step 12/15 : RUN pip install future pillow onnx opencv-python-headless tensorflow onnxruntime\n", + " ---> Using cache\n", + " ---> 160977b84ece\n", + "Step 13/15 : RUN pip install Cython && pip install pycocotools\n", + " ---> Using cache\n", + " ---> ffc479fc7d11\n", + "Step 14/15 : RUN cd /tmp && git clone https://github.com/mlperf/inference && cd inference/loadgen && pip install pybind11 && CFLAGS=\"-std=c++14\" python setup.py install && rm -rf mlperf\n", + " ---> Using cache\n", + " ---> 20eb0ce678b0\n", + "Step 15/15 : ENTRYPOINT [\"/bin/bash\"]\n", + " ---> Using cache\n", + " ---> 9440a8884457\n", + "Successfully built 9440a8884457\n", + "Successfully tagged mlperf-infer-imgclassify-cpu:latest\n", + "Clearing caches.\n", + "3\n", + "STARTING RUN AT 2019-07-23 04:09:29 PM\n", + "INFO:main:Namespace(accuracy=False, backend='onnxruntime', cache=0, count=None, data_format=None, dataset='imagenet_mobilenet', dataset_list=None, dataset_path='/home/gs/inference/v0.5/classification_and_detection/fake_imagenet', inputs=None, max_batchsize=32, max_latency=[0.2], model='/home/gs/inference/v0.5/classification_and_detection/mobilenet_v1_1.0_224.onnx', output='/output/results.json', outputs=['MobilenetV1/Predictions/Reshape_1:0'], profile='mobilenet-onnxruntime', qps=10, queries_multi=24576, queries_offline=20, queries_single=1024, scenario=[TestScenario.SingleStream], threads=8, time=10)\n", + "INFO:imagenet:loaded 8 images, cache=0, took=0.3sec\n", + "INFO:main:starting TestScenario.SingleStream\n", + "TestScenario.SingleStream qps=37.18, mean=0.0268, time=10.09, queries=375, tiles=50.0:0.0261,80.0:0.0262,90.0:0.0266,95.0:0.0271,99.0:0.0385,99.9:0.0823\n", + "ENDING RUN AT 2019-07-23 04:09:45 PM\n" + ] + } + ], + "source": [ + "!./run_and_time.sh onnxruntime mobilenet cpu " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Preparing for offical submision\n", + "\n", + "TODO." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/retired_benchmarks/vision/classification_and_detection/README.md b/retired_benchmarks/vision/classification_and_detection/README.md new file mode 100755 index 000000000..45009870f --- /dev/null +++ b/retired_benchmarks/vision/classification_and_detection/README.md @@ -0,0 +1,270 @@ +# MLPerf Inference Benchmarks for Image Classification and Object Detection Tasks + +This is the reference implementation for MLPerf Inference benchmarks. + +You can find a short tutorial how to use this benchmark [here](https://github.com/mlperf/inference/blob/master/vision/classification_and_detection/GettingStarted.ipynb). + +## Supported Models + +| model | framework | accuracy | dataset | model link | model source | precision | notes | +| ---- | ---- | ---- | ---- | ---- | ---- | ---- | ---- | +| resnet50-v1.5 | tensorflow | 76.456% | imagenet2012 validation | [from zenodo](https://zenodo.org/record/2535873/files/resnet50_v1.pb) | [mlperf](https://github.com/mlperf/training/tree/master/image_classification), [tensorflow](https://github.com/tensorflow/models/tree/master/official/resnet) | fp32 | NHWC. More information on resnet50 v1.5 can be found [here](https://github.com/tensorflow/models/tree/master/official/resnet).|| +| resnet50-v1.5 | onnx | 76.456% | imagenet2012 validation | from zenodo: [opset-8](https://zenodo.org/record/2592612/files/resnet50_v1.onnx), [opset-11](https://zenodo.org/record/4735647/files/resnet50_v1.onnx) | [from zenodo](https://zenodo.org/record/2535873/files/resnet50_v1.pb) converted with [this script](https://github.com/mlcommons/inference/blob/master/vision/classification_and_detection/tools/convert-to-onnx.sh) | fp32 | NCHW, tested on pytorch and onnxruntime | +| resnet50-v1.5 | pytorch | 76.014% | imagenet2012 validation | [from zenodo](https://zenodo.org/record/4588417/files/resnet50-19c8e357.pth) | [from TorchVision](https://github.com/pytorch/vision/blob/v0.8.2/torchvision/models/resnet.py) | fp32 | NCHW | +| resnet50-v1.5 | pytorch | 75.790% | imagenet2012 validation | [from zenodo](https://zenodo.org/record/4589637/files/resnet50_INT8bit_quantized.pt) | Edgecortix [quantization script](tools/calibrate_torchvision_model.py) | A: int8, W: uint8 | NCHW | +| mobilenet-v1 (depreciated since mlperf-v0.7)| tensorflow | 71.676% | imagenet2012 validation | [from zenodo](https://zenodo.org/record/2269307/files/mobilenet_v1_1.0_224.tgz) | [from tensorflow](http://download.tensorflow.org/models/mobilenet_v1_2018_08_02/mobilenet_v1_1.0_224.tgz) | fp32 | NHWC | +| mobilenet-v1 quantized (depreciated since mlperf-v0.7)| tensorflow | 70.694% | imagenet2012 validation | [from zenodo](https://zenodo.org/record/2269307/files/mobilenet_v1_1.0_224_quant.tgz) | [from tensorflow](http://download.tensorflow.org/models/mobilenet_v1_2018_08_02/mobilenet_v1_1.0_224_quant.tgz) | int8 | NHWC | +| mobilenet-v1 (depreciated since mlperf-v0.7)| tflite | 71.676% | imagenet2012 validation | [from zenodo](https://zenodo.org/record/2269307/files/mobilenet_v1_1.0_224.tgz) | [from tensorflow](http://download.tensorflow.org/models/mobilenet_v1_2018_08_02/mobilenet_v1_1.0_224.tgz) | fp32 | NHWC | +| mobilenet-v1 quantized (depreciated since mlperf-v0.7)| tflite | 70.762% | imagenet2012 validation | [from zenodo](https://zenodo.org/record/2269307/files/mobilenet_v1_1.0_224_quant.tgz) | [from tensorflow](http://download.tensorflow.org/models/mobilenet_v1_2018_08_02/mobilenet_v1_1.0_224_quant.tgz) | int8 | NHWC | +| mobilenet-v1 (depreciated since mlperf-v0.7)| onnx | 71.676% | imagenet2012 validation | from zenodo: [opset-8](https://zenodo.org/record/3157894/files/mobilenet_v1_1.0_224.onnx), [opset-11](https://zenodo.org/record/4735651/files/mobilenet_v1_1.0_224.onnx) | [from tensorflow](http://download.tensorflow.org/models/mobilenet_v1_2018_08_02/mobilenet_v1_1.0_224.tgz) converted with [this script](https://github.com/mlcommons/inference/blob/master/vision/classification_and_detection/tools/convert-to-onnx.sh) | fp32 | NCHW, tested on pytorch and onnxruntime | +| mobilenet-v1 (depreciated since mlperf-v0.7)| onnx, pytorch | 70.9% | imagenet2012 validation | [from zenodo](https://zenodo.org/record/3353417/files/Quantized%20MobileNet.zip) | ??? | int8 | ??? | +| ssd-mobilenet 300x300 | tensorflow | mAP 0.23 | coco resized to 300x300 | [from tensorflow](http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v1_coco_2018_01_28.tar.gz) | [from tensorflow](http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v1_coco_2018_01_28.tar.gz) | fp32 | NHWC | +| ssd-mobilenet 300x300 quantized finetuned | tensorflow | mAP 0.23594 | coco resized to 300x300 | [from zenodo](https://zenodo.org/record/3252084/files/mobilenet_v1_ssd_8bit_finetuned.tar.gz) | Habana | int8 | ??? | +| ssd-mobilenet 300x300 symmetrically quantized finetuned | tensorflow | mAP 0.234 | coco resized to 300x300 | [from zenodo](https://zenodo.org/record/3401714/files/ssd_mobilenet_v1_quant_ft_no_zero_point_frozen_inference_graph.pb) | Habana | int8 | ??? | +| ssd-mobilenet 300x300 | pytorch | mAP 0.23 | coco resized to 300x300 | [from zenodo](https://zenodo.org/record/3239977/files/ssd_mobilenet_v1.pytorch) | [from tensorflow](http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v1_coco_2018_01_28.tar.gz) | fp32 | NHWC | +| ssd-mobilenet 300x300 | onnx | mAP 0.23 | coco resized to 300x300 | from zenodo [opset-8](https://zenodo.org/record/3163026/files/ssd_mobilenet_v1_coco_2018_01_28.onnx), [opset-11](https://zenodo.org/record/4735652/files/ssd_mobilenet_v1_coco_2018_01_28.onnx) | [from tensorflow](http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v1_coco_2018_01_28.tar.gz) converted using [this script](https://github.com/mlcommons/inference/blob/master/vision/classification_and_detection/tools/convert-to-onnx.sh) | fp32 | NHWC, tested on onnxruntime, some runtime warnings | +| ssd-mobilenet 300x300 | onnx, pytorch | mAP 0.23 | coco resized to 300x300 | [from zenodo](https://zenodo.org/record/3252084/files/mobilenet_v1_ssd_8bit_finetuned.tar.gz) | ??? | int8 | ??? | +| ssd-resnet34 1200x1200 | tensorflow | mAP 0.20 | coco resized to 1200x1200| [from zenodo](https://zenodo.org/record/3345892/files/tf_ssd_resnet34_22.1.zip?download=1) | [from mlperf](https://github.com/mlperf/inference/tree/master/others/cloud/single_stage_detector/tensorflow), [training model](https://github.com/lji72/inference/tree/tf_ssd_resent34_align_onnx/others/cloud/single_stage_detector/tensorflow) | fp32 | NCHW | +| ssd-resnet34 1200x1200 | pytorch | mAP 0.20 | coco resized to 1200x1200 | [from zenodo](https://zenodo.org/record/3236545/files/resnet34-ssd1200.pytorch) | [from mlperf](https://github.com/mlperf/inference/tree/master/others/cloud/single_stage_detector/pytorch) | fp32 | NCHW | +| ssd-resnet34 1200x1200 | onnx | mAP 0.20 | coco resized to 1200x1200 | from zenodo [opset-8](https://zenodo.org/record/3228411/files/resnet34-ssd1200.onnx) | [from mlperf](https://github.com/mlperf/inference/tree/master/others/cloud/single_stage_detector) converted using the these [instructions](https://github.com/BowenBao/inference/tree/master/cloud/single_stage_detector/pytorch#6-onnx) | fp32 | Converted from pytorch model. | +| ssd-resnet34 1200x1200 | onnx | mAP 0.20 | coco resized to 1200x1200 | from zenodo [opset-11](https://zenodo.org/record/4735664/files/ssd_resnet34_mAP_20.2.onnx) | [from zenodo](https://zenodo.org/record/3345892/files/tf_ssd_resnet34_22.1.zip) converted using [this script](https://github.com/mlcommons/inference/blob/master/vision/classification_and_detection/tools/convert-to-onnx.sh) | fp32 | Converted from the tensorflow model and uses the same interface as the tensorflow model. | + +## Disclaimer +This benchmark app is a reference implementation that is not meant to be the fastest implementation possible. +It is written in python which might make it less suitable for lite models like mobilenet or large number of cpu's. +We are thinking to provide a c++ implementation with identical functionality in the near future. + +## Tools for preparing datasets and validating accuracy +The reference implementation includes all required pre-processing of datasets. +It also includes a ```--accuracy``` option to validate accuracy as required by mlperf. +If you are not using the reference implementation, a few scripts will help: +### Prepare the coco dataset +The tool is [here](../../tools/upscale_coco). +You can run it for ssd-mobilenet like: +``` +python upscale_coco.py --inputs /data/coco/ --outputs /data/coco-300 --size 300 300 --format png +``` +and for ssd-resnet34 like: +``` +python upscale_coco.py --inputs /data/coco/ --outputs /data/coco-1200 --size 1200 1200 --format png +``` +### Prepare the imagenet dataset +to come. + +### Validate accuracy for resnet50 and mobilenet benchmarks +The tool is [here](tools/accuracy-imagenet.py). You can run it like: +``` +python tools/accuracy-imagenet.py --mlperf-accuracy-file mlperf_log_accuracy.json --imagenet-val-file /data/imagenet2012/val_map.txt +``` + +### Validate accuracy for ssd-mobilenet and ssd-resnet34 benchmarks +The tool is [here](tools/accuracy-coco.py). You can run it like: +``` +python tools/accuracy-coco.py --mlperf-accuracy-file mlperf_log_accuracy.json --coco-dir /data/coco --use-inv-map +``` + +## Datasets +| dataset | download link | +| ---- | ---- | +| imagenet2012 (validation) | http://image-net.org/challenges/LSVRC/2012/ | +| coco (validation) | http://images.cocodataset.org/zips/val2017.zip | +| coco (annotations) | http://images.cocodataset.org/annotations/annotations_trainval2017.zip | + +### Using Collective Knowledge (CK) + +Alternatively, you can download the datasets using the [Collective Knowledge](http://cknowledge.org) +framework (CK) for collaborative and reproducible research. + +First, install CK and pull its repositories containing dataset packages: +```bash +$ python -m pip install ck --user +$ ck version +V1.9.8.1 +$ ck pull repo:ck-env +``` + +#### ImageNet 2012 validation dataset +Download the original dataset and auxiliaries: +```bash +$ ck install package --tags=image-classification,dataset,imagenet,val,original,full +$ ck install package --tags=image-classification,dataset,imagenet,aux +``` +Copy the labels next to the images: +```bash +$ ck locate env --tags=image-classification,dataset,imagenet,val,original,full +/home/dvdt/CK-TOOLS/dataset-imagenet-ilsvrc2012-val +$ ck locate env --tags=image-classification,dataset,imagenet,aux +/home/dvdt/CK-TOOLS/dataset-imagenet-ilsvrc2012-aux +$ cp `ck locate env --tags=aux`/val.txt `ck locate env --tags=val`/val_map.txt +``` + +#### COCO 2017 validation dataset +```bash +$ ck install package --tags=object-detection,dataset,coco,2017,val,original +$ ck locate env --tags=object-detection,dataset,coco,2017,val,original +/home/dvdt/CK-TOOLS/dataset-coco-2017-val +``` + +## Prerequisites and Installation +We support [tensorfow+tflite](https://github.com/tensorflow/tensorflow), [onnxruntime](https://github.com/Microsoft/onnxruntime) and [pytoch](http://pytorch.org) backend's with the same benchmark tool. +Support for other backends can be easily added. + +The following steps are **only** needed if you run the benchmark **without Docker**. + +Python 3.5, 3.6 or 3.7 is supported and we recommend to use Anaconda (See [Dockerfile](Dockerfile.cpu) for a minimal Anaconda install). + +Install the desired backend. +For tensorflow: +``` +pip install tensorflow or pip install tensorflow-gpu +``` +For onnxruntime: +``` +pip install onnxruntime or pip install onnxruntime-gpu +``` + +Build and install the benchmark: +``` +cd ../../loadgen; CFLAGS="-std=c++14" python setup.py develop --user; cd ../vision/classification_and_detection + +python setup.py develop +``` + + +## Running the benchmark +### One time setup + +Download the model and dataset for the model you want to benchmark. + +Both local and docker environment need to set 2 environment variables: +``` +export MODEL_DIR=YourModelFileLocation +export DATA_DIR=YourImageNetLocation +``` + + +### Run local +``` +./run_local.sh backend model device + +backend is one of [tf|onnxruntime|pytorch|tflite] +model is one of [resnet50|mobilenet|ssd-mobilenet|ssd-resnet34] +device is one of [cpu|gpu] + + +For example: + +./run_local.sh tf resnet50 gpu +``` + +### Run as Docker container +``` +./run_and_time.sh backend model device + +backend is one of [tf|onnxruntime|pytorch|tflite] +model is one of [resnet50|mobilenet|ssd-mobilenet|ssd-resnet34] +device is one of [cpu|gpu] + +For example: + +./run_and_time.sh tf resnet50 gpu +``` +This will build and run the benchmark. + +### Examples for testing +During development running the full benchmark is unpractical. Some options to help: + +```--count``` limits the number of items in the dataset used for accuracy pass + +```--time``` limits the time the benchmark runs + +```--accuracy``` enables accuracy pass + +```--max-latency``` the latency used for Server mode + +So if you want to tune for example Server mode, try: +``` +./run_local.sh tf resnet50 gpu --count 100 --time 60 --scenario Server --qps 200 --max-latency 0.1 +or +./run_local.sh tf ssd-mobilenet gpu --count 100 --time 60 --scenario Server --qps 100 --max-latency 0.1 + +``` + +If you want run with accuracy pass, try: +``` +./run_local.sh tf ssd-mobilenet gpu --accuracy --time 60 --scenario Server --qps 100 --max-latency 0.2 +``` + + +### Usage +``` +usage: main.py [-h] + [--mlperf_conf ../../mlperf.conf] + [--user_conf user.conf] + [--dataset {imagenet,imagenet_mobilenet,coco,coco-300,coco-1200,coco-1200-onnx,coco-1200-pt,coco-1200-tf}] + --dataset-path DATASET_PATH [--dataset-list DATASET_LIST] + [--data-format {NCHW,NHWC}] + [--profile {defaults,resnet50-tf,resnet50-onnxruntime,mobilenet-tf,mobilenet-onnxruntime,ssd-mobilenet-tf,ssd-mobilenet-onnxruntime,ssd-resnet34-tf,ssd-resnet34-pytorch,ssd-resnet34-onnxruntime}] + [--scenario list of SingleStream,MultiStream,Server,Offline] + [--max-batchsize MAX_BATCHSIZE] + --model MODEL [--output OUTPUT] [--inputs INPUTS] + [--outputs OUTPUTS] [--backend BACKEND] [--threads THREADS] + [--time TIME] [--count COUNT] [--qps QPS] + [--max-latency MAX_LATENCY] [--cache CACHE] [--accuracy] +``` + +```--mlperf_conf``` +the mlperf config file to use for rules compliant parameters, defaults to ../../mlperf.conf + +```--user_conf``` +the user config file to use for user LoadGen settings such as target QPS, defaults to user.conf + +```--dataset``` +use the specified dataset. Currently we only support ImageNet. + +```--dataset-path``` +path to the dataset. + +```--data-format {NCHW,NHWC}``` +data-format of the model (default: the backends prefered format). + +```--scenario {SingleStream,MultiStream,Server,Offline}``` +comma separated list of benchmark modes. + +```--profile {resnet50-tf,resnet50-onnxruntime,mobilenet-tf,mobilenet-onnxruntime,ssd-mobilenet-tf,ssd-mobilenet-onnxruntime,ssd-resnet34-tf,ssd-resnet34-onnxruntime}``` +this fills in default command line options with the once specified in the profile. Command line options that follow may override the those. + +```--model MODEL``` +the model file. + +```--inputs INPUTS``` +comma separated input name list in case the model format does not provide the input names. This is needed for tensorflow since the graph does not specify the inputs. + +```--outputs OUTPUTS``` +comma separated output name list in case the model format does not provide the output names. This is needed for tensorflow since the graph does not specify the outputs. + +```--output OUTPUT]``` +location of the JSON output. + +```--backend BACKEND``` +which backend to use. Currently supported is tensorflow, onnxruntime, pytorch and tflite. + +```--threads THREADS``` +number of worker threads to use (default: the number of processors in the system). + +```--count COUNT``` +Number of images the dataset we use (default: use all images in the dataset). + +```--qps QPS``` +Expected QPS. + +```--max-latency MAX_LATENCY``` +comma separated list of which latencies (in seconds) we try to reach in the 99 percentile (deault: 0.01,0.05,0.100). + +```--max-batchsize MAX_BATCHSIZE``` +maximum batchsize we generate to backend (default: 128). + + +## License + +[Apache License 2.0](LICENSE) diff --git a/retired_benchmarks/vision/classification_and_detection/VERSION_NUMBER b/retired_benchmarks/vision/classification_and_detection/VERSION_NUMBER new file mode 100644 index 000000000..6e8bf73aa --- /dev/null +++ b/retired_benchmarks/vision/classification_and_detection/VERSION_NUMBER @@ -0,0 +1 @@ +0.1.0 diff --git a/retired_benchmarks/vision/classification_and_detection/optional_harness_ck/classification/README.md b/retired_benchmarks/vision/classification_and_detection/optional_harness_ck/classification/README.md new file mode 100644 index 000000000..bc63302c9 --- /dev/null +++ b/retired_benchmarks/vision/classification_and_detection/optional_harness_ck/classification/README.md @@ -0,0 +1,421 @@ +[![compatibility](https://github.com/ctuning/ck-guide-images/blob/master/ck-compatible.svg)](https://github.com/ctuning/ck) +[![License](https://img.shields.io/badge/License-BSD%203--Clause-blue.svg)](https://opensource.org/licenses/BSD-3-Clause) + +# MLPerf Inference - Image Classification + +MLPerf Inference v0.5 uses MobileNets-v1-1.0-224 (called MobileNet in what follows) and ResNet50-v1.5 (called ResNet in what follows). + +# Table of contents + +1. [Installation](#installation) + - [Install prerequisites](#installation-debian) (Debian-specific) + - [Install CK workflows](#installation-workflows) (universal) +1. [Benchmarking](#benchmarking) + - [via TensorFlow Lite](tflite/README.md) + - [via TensorFlow (C++)](tf-cpp/README.md) + - [via TensorFlow (Python)](tf-py/README.md) + - [via ONNX](onnx/README.md) +1. [Understanding the anatomy of a benchmark](#anatomy) +1. [Inspecting experimental results](#results) + + +# Installation + +**NB:** If you would like to get a feel of CK workflows, you can skip +installation instructions and try [benchmarking](#benchmarking) +instructions on available Docker images: +- TensorFlow Lite: + - [Debian 9](https://github.com/ctuning/ck-mlperf/tree/master/docker/image-classification-tflite.debian-9) + - [CentOS 7](https://github.com/ctuning/ck-mlperf/tree/master/docker/image-classification-tflite.centos-7) + - [Ubuntu 16.04](https://github.com/ctuning/ck-mlperf/tree/master/docker/image-classification-tflite.ubuntu-16.04) + - [Ubuntu 18.04](https://github.com/ctuning/ck-mlperf/tree/master/docker/image-classification-tflite.ubuntu-18.04) + - [Ubuntu 18.04 - Dashboard](https://github.com/ctuning/ck-mlperf/tree/master/docker/image-classification-tflite.dashboard.ubuntu-18.04) +- TensorFlow (C++): + - [Debian 9](https://github.com/ctuning/ck-mlperf/tree/master/docker/image-classification-tf-cpp.debian-9) +- Arm NN: + - [Debian 9](https://github.com/ARM-software/armnn-mlperf/tree/master/docker/image-classification-armnn-tflite.debian-9) + +Even if you would like to run CK workflows natively (e.g. on an Arm-based +development board or Android phone), you may wish to have a quick look into the +latest Dockerfile's to check for latest updates e.g. system-specific +dependencies. + + +## Debian + +- Common tools and libraries. +- [Python](https://www.python.org/), [pip](https://pypi.org/project/pip/), [NumPy](https://numpy.org/), [Collective Knowledge](https://cknowledge.org) (CK). +- (Optional) [Android SDK](https://developer.android.com/studio/), [Android NDK](https://developer.android.com/ndk/). + +### Install common tools and libraries +```bash +$ sudo apt install git wget libz-dev curl +$ sudo apt install gcc g++ autoconf autogen libtool +``` + +### Install Python 3 and the latest pip +```bash +$ sudo apt install python3 python3-pip +$ sudo python3 -m pip install --upgrade pip +``` + +**NB:** Care must be taken not to mix Python 3 and Python 2 packages. +If your system uses Python 2 by default, we recommend you prefix +all CK commands, for example, with `CK_PYTHON=python3` for CK to run under Python 3: +``` +$ python --version +Python 2.7.13 +$ ck python_version +2.7.13 (default, Sep 26 2018, 18:42:22) +[GCC 6.3.0 20170516] +$ CK_PYTHON=python3 ck python_version +3.5.3 (default, Sep 27 2018, 17:25:39) +[GCC 6.3.0 20170516] +``` +Similarly, if you use multiple Python 3 versions (e.g. 3.5 and 3.6), we recommend +you stick to one of them for consistency: +``` +$ CK_PYTHON=python3.5 ck python_version +3.5.2 (default, Nov 12 2018, 13:43:14) +[GCC 5.4.0 20160609] +$ CK_PYTHON=python3.6 ck python_version +3.6.7 (default, Oct 25 2018, 09:16:13) +[GCC 5.4.0 20160609] +``` + +### Install required Python 3 packages +Choose one of the following installation options: +1. system-wide via pip; +1. user-space via pip; +1. user-space via CK. + +With the first two options, packages get installed via pip and get registered +with CK later (typically, on the first run of a program). + +With the last option, packages also get installed via pip but get registered +with CK at the same time (so there is less chance of mixing things up). + +#### Option 1: system-wide installation via pip (under `/usr`) +```bash +$ sudo python3 -m pip install numpy ck +``` +#### Option 2: user-space installation via pip (under `$HOME`) +```bash +$ python3 -m pip install numpy ck --user +``` +#### Option 3: User-space installation via CK (under `$HOME` and `$CK_TOOLS`) +Install CK via pip (or [from GitHub](https://github.com/ctuning/ck#installation)): +```bash +$ python3 -m pip install ck --user +$ ck version +V1.10.3 +``` + +Install and register Python packages with CK: +```bash +$ ck pull repo:ck-env +$ ck detect soft:compiler.python --full_path=`which python3` +$ ck install package --tags=lib,python-package,numpy +``` + +If the above dependencies have been installed on a clean system, you should be +able to inspect the registered CK environments e.g. as follows: +``` +$ ck show env --tags=python-package +Env UID: Target OS: Bits: Name: Version: Tags: + +4e82bab01c8ee3b7 linux-64 64 Python NumPy library 1.16.2 64bits,host-os-linux-64,lib,needs-python,needs-python-3.5.2,numpy,python-package,target-os-linux-64,v1,v1.16,v1.16.2,vmaster + +$ ck cat env --tags=python-package | grep PYTHONPATH +export PYTHONPATH=/home/anton/CK_TOOLS/lib-python-numpy-compiler.python-3.5.2-linux-64/build:${PYTHONPATH} +``` + +### [Optional] Install Android SDK and NDK + +You can optionally target Android API 23 (v6.0 "Marshmallow") devices using the +`--target_os=android23-arm64` flag +(or [similar](https://source.android.com/setup/start/build-numbers)), when using +the TensorFlow Lite benchmark (recommended) and TensorFlow (C++) benchmark (not recommended). + +On Debian Linux, you can install the [Android SDK](https://developer.android.com/studio/) and the [Android NDK](https://developer.android.com/ndk/) as follows: +``` +$ sudo apt install android-sdk +$ adb version +Android Debug Bridge version 1.0.36 +Revision 1:7.0.0+r33-2 +$ sudo apt install google-android-ndk-installer +``` +**NB:** On Ubuntu 18.04, NDK r13b gets installed. On Ubuntu 16.04, download [NDK r18b](https://dl.google.com/android/repository/android-ndk-r18b-linux-x86_64.zip) and extract it into e.g. `/usr/local`. NDK r18c only supports LLVM, which currently requires a CK quirk to work properly (removing a dependency on `soft:compiler.gcc.android.ndk` from `soft:compiler.llvm.android.ndk`). + + +## Install CK workflows for MLPerf + +### Pull CK repositories +```bash +$ ck pull repo:ck-mlperf +``` +**NB:** Transitive dependencies include [repo:ck-tensorflow](https://github.com/ctuning/ck-tensorflow). + +### Install a small dataset (500 images) +```bash +$ ck install package:imagenet-2012-val-min +``` +**NB:** ImageNet dataset descriptions are in [repo:ck-env](https://github.com/ctuning/ck-env). + +### Install the full dataset (50,000 images) +```bash +$ ck install package:imagenet-2012-val +``` + +**NB:** If you already have the ImageNet validation dataset downloaded in a directory e.g. `$HOME/ilsvrc2012-val/`, you can simply detect it as follows: +```bash +$ ck detect soft:dataset.imagenet.val --full_path=$HOME/ilsvrc2012-val/ILSVRC2012_val_00000001.JPEG +``` + +### Preprocess datasets + +ImageNet can be preprocessed in many different ways, +which can significantly affect the resulting accuracy. +We currently support 3 different preprocessing methods: +``` +$ ck install package --tags=dataset,imagenet,preprocessed + +More than one package or version found: + + 0) dataset-imagenet-preprocessed-using-tensorflow (fac1d0d5f4e69a85) + 1) dataset-imagenet-preprocessed-using-pillow (a6a4613ba6dfd570) + 2) dataset-imagenet-preprocessed-using-opencv (4932bbdd2ac7a17b) + +Please select the package to install [ hit return for "0" ]: +``` + +Preprocessing using OpenCV (option 2) is the current official method. +You can perform it directly by adding the `using-opencv` tag as follows: +``` +$ ck install package --tags=dataset,imagenet,preprocessed,using-opencv --ask +``` + +You can locate the preprocessed files on disk using the same tags as follows: +``` +$ ck cat env --tags=dataset,imagenet,preprocessed,using-opencv | grep CK_ENV_DATASET_IMAGENET_PREPROCESSED_DIR +export CK_ENV_DATASET_IMAGENET_PREPROCESSED_DIR=/datasets/dataset-imagenet-preprocessed-using-opencv +``` + +CK installs all the dependencies automatically. (More information on recommended choices for dependencies can be provided on demand.) + +#### Summary of preprocessing methods + +The table below summarizes the available methods. + +| Model | Pillow | OpenCV universal | OpenCV for MobileNet | OpenCV for ResNet | TensorFlow | +|-|-|-|-|-|-| +| Matches official? | No | No | Yes | Yes | No | +| Additional tags | `using-pillow` | `using-opencv,universal` | `using-opencv,for-mobilenet` | `using-opencv,for-resnet` | `using-tensorflow` | +| Supported models | ResNet, MobileNet | ResNet, MobileNet | MobileNet only | ResNet only | ResNet only | +| Supported platforms | x86, arm | x86 | x86 | x86 | x86 (prebuilt TF) | +| Data format | rgb8 (int8) | rgb8 (int8) | rgbf32 (float32) | rgbf32 (float32) | rgbf32 (float32) | +| Data size | 7.1G | 7.1G | 29G | 29G | 29G | + +The official code [preprocesses](https://github.com/mlperf/inference/blob/master/v0.5/classification_and_detection/python/dataset.py) images using OpenCV. ResNet and MobileNet require different preprocessing after resizing: ResNet requires [means to be subtracted](https://github.com/mlperf/inference/blob/master/v0.5/classification_and_detection/python/dataset.py#L178); MobileNet requires [normalization to the (-1, 1) range](https://github.com/mlperf/inference/blob/master/v0.5/classification_and_detection/python/dataset.py#L195). In addition, the official ResNet preprocessing uses [area interpolation](https://github.com/mlperf/inference/blob/master/v0.5/classification_and_detection/python/dataset.py#L172), instead of the default [bilinear interpolation](https://github.com/mlperf/inference/blob/master/v0.5/classification_and_detection/python/dataset.py#L154). The same behaviour can be reproduced via CK. Preprocessed images for both MobileNet and ResNet, however, require 58G on disk, as pixels are stored as 32-bit floats. + +An alternative, dubbed OpenCV (universal), uses bilinear interpolation and stores pixels as 8-bit integers. At load time, however, minor additional processing may be required depending on the model (e.g. see sample [code for normalization](https://github.com/ctuning/ck-tensorflow/blob/master/program/image-classification-tflite/benchmark.h#L463) and [code for subtracting means](https://github.com/ctuning/ck-tensorflow/blob/master/program/image-classification-tflite/benchmark.h#L469)). + + +#### Accuracy on the ImageNet 2012 validation set + +The table below shows the accuracy on the ImageNet 2012 validation set +(50,000 images) measured [via TensorFlow (C++)](tf-cpp/README.md). + +| Model | Metric | Pillow | OpenCV universal | OpenCV for MobileNet | OpenCV for ResNet | TensorFlow | +|-|-|-|-|-|-|-| +| ResNet | Top1 | 0.76170 | 0.76422 | N/A | 0.76456 | 0.76522 | +| | Top5 | 0.92866 | 0.93074 | N/A | 0.93016 | 0.93066 | +| MobileNet non-quantized | Top1 | 0.71226 | 0.71676 | 0.71676 | N/A | N/A | +| | Top5 | 0.89834 | 0.90118 | 0.90118 | N/A | N/A | +| MobileNet quantized | Top1 | 0.70348 | 0.70700 | 0.70694 | N/A | N/A | +| | Top5 | 0.89376 | 0.89594 | 0.89594 | N/A | N/A | + +Considering Top1, the universal OpenCV method is slightly less accurate for ResNet, but slightly more accurate for MobileNet quantized than the official code. The TensorFlow method is most accurate for ResNet, but is not suitable for MobileNet. The Pillow method is least accurate, but can be used on Arm platforms. The difference between the universal OpenCV and the TensorFlow methods on ResNet is exactly 0.1% or 50 images. + +#### Detect datasets preprocessed on a different machine + +**TODO** + + + +## Benchmarking + +You can benchmark MobileNet using one of the available options: +- [via TensorFlow Lite](tflite/README.md) +- [via TensorFlow (C++)](tf-cpp/README.md) +- [via TensorFlow (Python)](tf-py/README.md) +- [via ONNX](onnx/README.md) + +Please come back here if you would like to learn about [the anatomy of a benchmark](#anatomy), or +how to inspect and visualize [experimental results](#results). + + +## The anatomy of a benchmark + +While the componentized nature of CK workflows streamlines +[installation](#installation) and [benchmarking](#benchmarking), it also makes +it less obvious what the components are and where they are stored. This section +describes the anatomy of a benchmark in terms of its components. We use the +[TFLite MobileNet implementation](tflite/README.md) as a running example. + +### Model + +To search for the CK entry of an installed model, use `ck search env` with the same tags you used to install it e.g.: +``` +$ ck search env --tags=model,tflite,mlperf,mobilenet,quantized +local:env:3f0ca5c4d25b4ea3 +``` + +To view more information about the CK entry, use `ck show env` e.g.: +``` +$ ck show env --tags=model,tflite,mlperf,mobilenet,quantized +Env UID: Target OS: Bits: Name: Version: Tags: + +3f0ca5c4d25b4ea3 linux-64 64 TensorFlow model and weights (mobilenet-v1-1.0-224-quant-2018_08_02) 1_1.0_224_quant_2018_08_02 2018_08_02,64bits,downloaded,host-os-linux-64,mlperf,mobilenet,mobilenet-v1,mobilenet-v1-1.0-224,model,nhwc,python,quantised,quantized,target-os-linux-64,tensorflowmodel,tf,tflite,v1,v1.1,v1.1.0,v1.1.0.224,v1.1.0.224.0,v1.1.0.224.0.2018,v1.1.0.224.0.2018.8,v1.1.0.224.0.2018.8.2,weights +``` + +To view the environment variables set up by the CK entry, use `ck cat env` e.g.: +``` +$ ck cat `ck search env --tags=model,tflite,mlperf,mobilenet,quantized` +#! /bin/bash +# +# --------------------[ TensorFlow model and weights (mobilenet-v1-1.0-224-quant-2018_08_02) ver. 1_1.0_224_quant_2018_08_02, /home/anton/CK_REPOS/local/env/3f0ca5c4d25b4ea3/env.sh ]-------------------- +# Tags: 2018_08_02,64bits,downloaded,host-os-linux-64,mlperf,mobilenet,mobilenet-v1,mobilenet-v1-1.0-224,model,nhwc,python,quantised,quantized,target-os-linux-64,tensorflowmodel,tf,tflite,v1,v1.1,v1.1.0,v1.1.0.224,v1.1.0.224.0,v1.1.0.224.0.2018,v1.1.0.224.0.2018.8,v1.1.0.224.0.2018.8.2,weights +# +# CK generated script + +if [ "$1" != "1" ]; then if [ "$CK_ENV_TENSORFLOW_MODEL_SET" == "1" ]; then return; fi; fi + +# Soft UOA = model.tensorflow.py (439b9f1757f27091) (tensorflowmodel,model,weights,python,tf,tflite,nhwc,mobilenet,mobilenet-v1,mobilenet-v1-1.0-224,2018_08_02,quantized,quantised,mlperf,downloaded,host-os-linux-64,target-os-linux-64,64bits,v1,v1.1,v1.1.0,v1.1.0.224,v1.1.0.224.0,v1.1.0.224.0.2018,v1.1.0.224.0.2018.8,v1.1.0.224.0.2018.8.2) +# Host OS UOA = linux-64 (4258b5fe54828a50) +# Target OS UOA = linux-64 (4258b5fe54828a50) +# Target OS bits = 64 +# Tool version = 1_1.0_224_quant_2018_08_02 +# Tool split version = [1, 1, 0, 224, 0, 2018, 8, 2] + +export CK_ENV_TENSORFLOW_MODEL_IMAGE_HEIGHT=224 +export CK_ENV_TENSORFLOW_MODEL_IMAGE_WIDTH=224 +export CK_ENV_TENSORFLOW_MODEL_INPUT_LAYER_NAME=input +export CK_ENV_TENSORFLOW_MODEL_MOBILENET_MULTIPLIER=1.0 +export CK_ENV_TENSORFLOW_MODEL_MOBILENET_RESOLUTION=224 +export CK_ENV_TENSORFLOW_MODEL_MOBILENET_VERSION=1 +export CK_ENV_TENSORFLOW_MODEL_MODULE=/home/anton/CK_TOOLS/model-tf-mlperf-mobilenet-quantized-downloaded/mobilenet-model.py +export CK_ENV_TENSORFLOW_MODEL_NORMALIZE_DATA=YES +export CK_ENV_TENSORFLOW_MODEL_OUTPUT_LAYER_NAME=MobilenetV1/Predictions/Reshape_1 +export CK_ENV_TENSORFLOW_MODEL_ROOT=/home/anton/CK_TOOLS/model-tf-mlperf-mobilenet-quantized-downloaded +export CK_ENV_TENSORFLOW_MODEL_TFLITE_FILENAME=mobilenet_v1_1.0_224_quant.tflite +export CK_ENV_TENSORFLOW_MODEL_TFLITE_FILEPATH=/home/anton/CK_TOOLS/model-tf-mlperf-mobilenet-quantized-downloaded/mobilenet_v1_1.0_224_quant.tflite +export CK_ENV_TENSORFLOW_MODEL_TF_FROZEN_FILENAME=mobilenet_v1_1.0_224_quant_frozen.pb +export CK_ENV_TENSORFLOW_MODEL_TF_FROZEN_FILEPATH=/home/anton/CK_TOOLS/model-tf-mlperf-mobilenet-quantized-downloaded/mobilenet_v1_1.0_224_quant_frozen.pb +export CK_ENV_TENSORFLOW_MODEL_WEIGHTS=/home/anton/CK_TOOLS/model-tf-mlperf-mobilenet-quantized-downloaded/mobilenet_v1_1.0_224_quant.ckpt +export CK_ENV_TENSORFLOW_MODEL_WEIGHTS_ARE_CHECKPOINTS=YES +export CK_MODEL_DATA_LAYOUT=NHWC + +export CK_ENV_TENSORFLOW_MODEL_SET=1 +``` + +To inspect the model's files on disk, use `ck locate env` e.g.: +``` +$ ck locate env --tags=model,tflite,mlperf,mobilenet,quantized +/home/anton/CK_TOOLS/model-tf-mlperf-mobilenet-quantized-downloaded +$ ls -la `ck locate env --tags=model,tflite,mlperf,mobilenet,quantized` +total 43524 +drwxr-xr-x 2 anton dvdt 4096 Mar 25 12:31 . +drwxrwxr-x 18 anton dvdt 4096 Mar 25 12:32 .. +-rw-rw-r-- 1 anton dvdt 2240 Mar 25 12:31 ck-install.json +-rw-rw-r-- 1 anton dvdt 3477 Mar 25 12:31 mobilenet-model.py +-rw-rw-r-- 1 anton dvdt 20309 Mar 25 12:31 mobilenet_v1.py +-rw-r--r-- 1 anton dvdt 17020468 Aug 3 2018 mobilenet_v1_1.0_224_quant.ckpt.data-00000-of-00001 +-rw-r--r-- 1 anton dvdt 14644 Aug 3 2018 mobilenet_v1_1.0_224_quant.ckpt.index +-rw-r--r-- 1 anton dvdt 5143394 Aug 3 2018 mobilenet_v1_1.0_224_quant.ckpt.meta +-rw-r--r-- 1 anton dvdt 4276352 Aug 3 2018 mobilenet_v1_1.0_224_quant.tflite +-rw-r--r-- 1 anton dvdt 885850 Aug 3 2018 mobilenet_v1_1.0_224_quant_eval.pbtxt +-rw-r--r-- 1 anton dvdt 17173742 Aug 3 2018 mobilenet_v1_1.0_224_quant_frozen.pb +-rw-r--r-- 1 anton dvdt 89 Aug 3 2018 mobilenet_v1_1.0_224_quant_info.txt +``` + +**NB:** The TFLite weights are in the `mobilenet_v1_1.0_224*.tflite` file. Only +the TFLite weights are different between the `2018_02_22` and `2018_08_02` +MobileNets-v1 packages. We have adopted the latter for MLPerf Inference v0.5. + + + +## Inspecting and visualizing experimental results + +### Inspecting recorded experimental results + +If you run the same command several times selecting different models (quantized +or non-quantized) or datasets (500 images or 50,000 images), CK will create +several _experimental points_ in the same repository e.g.: +```bash +$ ck find local:experiment:mlperf-mobilenet-tf-cpp-accuracy +/home/anton/CK_REPOS/local/experiment/mlperf-mobilenet-tf-cpp-accuracy +$ ck list_points local:experiment:mlperf-mobilenet-tf-cpp-accuracy +78dae6354e471199 +918c80bc5d4906b0 +``` +You can then retrieve various run parameters from such experimental points. + +#### Accuracy +You can quickly inspect the accuracy recorded for a particular point as follows: +```bash +$ grep \"run\": -A2 /home/anton/CK_REPOS/local/experiment/mlperf-mobilenet-tf-cpp-accuracy/ckp-918c80bc5d4906b0.0001.json + "run": { + "accuracy_top1": 0.718, + "accuracy_top5": 0.9, +$ grep \"run\": -A2 /home/anton/CK_REPOS/local/experiment/mlperf-mobilenet-tf-cpp-accuracy/ckp-78dae6354e471199.0001.json + "run": { + "accuracy_top1": 0.704, + "accuracy_top5": 0.898, +``` + +#### Model +You can quickly inspect the model used for a particular point as follows: +```bash +$ grep RUN_OPT_GRAPH_FILE /home/anton/CK_REPOS/local/experiment/mlperf-mobilenet-tf-cpp-accuracy/ckp-918c80bc5d4906b0.0001.json + "RUN_OPT_GRAPH_FILE": "/home/anton/CK_TOOLS/model-tf-mlperf-mobilenet-downloaded/mobilenet_v1_1.0_224_frozen.pb", +$ grep RUN_OPT_GRAPH_FILE /home/anton/CK_REPOS/local/experiment/mlperf-mobilenet-tf-cpp-accuracy/ckp-78dae6354e471199.0001.json + "RUN_OPT_GRAPH_FILE": "/home/anton/CK_TOOLS/model-tf-mlperf-mobilenet-quantized-downloaded/mobilenet_v1_1.0_224_quant_frozen.pb", +``` +As expected, the lower accuracy comes from the quantized model. + +#### Dataset +Unfortunately, the dataset path is recorded only to `pipeline.json`. +This file gets overwritten on each run of `ck benchmark`, so only +the dataset used in the latest command can be retrieved: +```bash +$ grep \"CK_ENV_DATASET_IMAGENET_VAL\": /home/anton/CK_REPOS/local/experiment/mlperf-mobilenet-tf-cpp-accuracy/pipeline.json + "CK_ENV_DATASET_IMAGENET_VAL": "/home/anton/CK_TOOLS/dataset-imagenet-ilsvrc2012-val-min" +``` + +#### Batch count +You can, however, check the batch count e.g.: +```bash +$ grep CK_BATCH_COUNT /home/anton/CK_REPOS/local/experiment/mlperf-mobilenet-tf-cpp-accuracy/ckp-78dae6354e471199.0001.json + "CK_BATCH_COUNT": "500", +``` + +#### Image cropping +By default, input images preprocessed for a client program [get cropped](https://github.com/ctuning/ck-tensorflow/tree/master/program/image-classification-tf-cpp#ck_crop_percent) by 87.5%: +```bash +$ grep CK_CROP_PERCENT /home/anton/CK_REPOS/local/experiment/mlperf-mobilenet-tf-cpp-accuracy/ckp-78dae6354e471199.0001.json + "CK_CROP_PERCENT": 87.5, +``` + +This can be changed by passing e.g. `--env.CK_CROP_PERCENT=100` to `ck +benchmark` (but see [here](https://github.com/mlperf/inference/tree/21efaf57d55ccd78e77b87c668bb09d47564bb6a/v0.5/classification_and_detection/optional_harness_ck/classification/tf-cpp#reference-accuracy) how this can make results worse). + + +### Visualizing experimental results + +If you have collected some experimental results on your machine (or transferred to it results collected from elsewhere), you can visualize them as follows: +``` +$ ck display dashboard --scenario=mlperf.mobilenets +``` + +You can also see a demo CK [MobileNets](http://cknowledge.org/dashboard/mlperf.mobilenets) dashboard online. diff --git a/retired_benchmarks/vision/classification_and_detection/optional_harness_ck/classification/onnx/README.md b/retired_benchmarks/vision/classification_and_detection/optional_harness_ck/classification/onnx/README.md new file mode 100644 index 000000000..bfce26e75 --- /dev/null +++ b/retired_benchmarks/vision/classification_and_detection/optional_harness_ck/classification/onnx/README.md @@ -0,0 +1,239 @@ +# Image Classification via ONNX + +1. [Installation instructions](#installation) +2. [Benchmarking instructions](#benchmarking) +3. [Reference accuracy](#accuracy) +4. [Further information](#further-info) + + + +## Installation instructions + +Please follow the [common installation instructions](../README.md#installation) first. + +### Install ONNX + +Install the ONNX library and runtime: +``` +$ ck install package --tags=lib,python-package,onnx +$ ck install package --tags=lib,python-package,onnxruntime +``` + +### Install the MobileNet model for ONNX + +To select interactively from one of the non-quantized and quantized MobileNets-v1-1.0-224 models: +``` +$ ck install package --tags=model,onnx,mlperf,mobilenet + +More than one package or version found: + + 0) model-onnx-mlperf-mobilenet Version 1_1.0_224_2018_08_02 (b47f4980eefabffa) + 1) model-onnx-convert-from-tf (22b1d864174bf743), Variations: mobilenet + +Please select the package to install [ hit return for "0" ]: +``` +Option 1 downloads the TF model and converts it to ONNX. Option 0 uses a pre-converted ONNX model. + +We recommend [converting models on-the-fly](https://github.com/ctuning/ck-mlperf/blob/master/package/model-onnx-convert-from-tf/README.md), as you can additionally control the data layout as follows: +- NHWC: +``` +$ ck install package --tags=onnx,model,mobilenet,converted,nhwc +``` +- NCHW +``` +$ ck install package --tags=onnx,model,mobilenet,converted,nchw +``` +Note that without the layout tags (`nhwc` or `nchw`), the layout is selected nondeterministically. + +#### Bonus + +##### Install the ResNet model + +You can similarly convert ResNet as follows: +- NHWC +``` +$ ck install package --tags=onnx,model,resnet,converted,nhwc +``` +- NCHW +``` +$ ck install package --tags=onnx,model,resnet,converted,nchw +``` + +You can benchmark ResNet exactly in the same way as MobileNet. +Just replace `mobilenet` with `resnet` in the [benchmarking instructions](#benchmarking) below. + + +### Run the ONNX Image Classification client + +#### MobileNet, NHWC +``` +$ ck run program:image-classification-onnx-py +... +*** Dependency 3 = weights (ONNX model): + +More than one environment found for "ONNX model" with tags="model,image-classification,onnx" and setup={"host_os_uoa": "linux-64", "target_os_uoa": "linux-64", "target_os_bits": "64"}: + + 0) ONNX-from-TF model (MLPerf MobileNet) - v1_1.0_224_2018_08_02 (64bits,converted,converted-from-tf,host-os-linux-64,image-classification,mlperf,mobilenet,model,nhwc,onnx,target-os-linux-64,v1,v1.1,v1.1 +.0,v1.1.0.224,v1.1.0.224.2018,v1.1.0.224.2018.8,v1.1.0.224.2018.8.2 (f18d48538fbfbd46)) + - Depends on "python" (env UOA=7c8bbf2343208d88, tags="compiler,python", version=3.6.7) + - Depends on "lib-python-numpy" (env UOA=fe9d0436cbfd34c8, tags="lib,python-package,numpy", version=1.16.2) + - Depends on "lib-tensorflow" (env UOA=9c34f3f9b9b8dfd4, tags="lib,tensorflow,vprebuilt", version=1.13.1) + - Depends on "lib-python-onnx" (env UOA=c9a3c5ad5de9adcb, tags="lib,python-package,onnx", version=1.4.1) + - Depends on "lib-python-tf2onnx" (env UOA=44dd6b520ae81482, tags="lib,python-package,tf2onnx", version=1.4.1) + - Depends on "model-source" (env UOA=e5cf6f254447a629, tags="model,image-classification,tf", version=1_1.0_224_2018_08_02) + + 1) ONNX-from-TF model (MLPerf MobileNet) - v1_1.0_224_2018_08_02 (64bits,converted,converted-from-tf,host-os-linux-64,image-classification,mlperf,mobilenet,model,nchw,onnx,target-os-linux-64,v1,v1.1,v1.1 +.0,v1.1.0.224,v1.1.0.224.2018,v1.1.0.224.2018.8,v1.1.0.224.2018.8.2 (2e1b5534351b7e33)) + - Depends on "python" (env UOA=7c8bbf2343208d88, tags="compiler,python", version=3.6.7) + - Depends on "lib-python-numpy" (env UOA=fe9d0436cbfd34c8, tags="lib,python-package,numpy", version=1.16.2) + - Depends on "lib-tensorflow" (env UOA=9c34f3f9b9b8dfd4, tags="lib,tensorflow,vprebuilt", version=1.13.1) + - Depends on "lib-python-onnx" (env UOA=c9a3c5ad5de9adcb, tags="lib,python-package,onnx", version=1.4.1) + - Depends on "lib-python-tf2onnx" (env UOA=44dd6b520ae81482, tags="lib,python-package,tf2onnx", version=1.4.1) + - Depends on "model-source" (env UOA=e5cf6f254447a629, tags="model,image-classification,tf", version=1_1.0_224_2018_08_02) + + +Select one of the options for "ONNX model" with tags="model,image-classification,onnx" and setup={"host_os_uoa": "linux-64", "target_os_uoa": "linux-64", "target_os_bits": "64"} [ hit return for "0" ]: 0 + + Resolved. CK environment UID = f18d48538fbfbd46 (version 1_1.0_224_2018_08_02) +... +-------------------------------- +Process results in predictions +--------------------------------------- +ILSVRC2012_val_00000001.JPEG - (65) n01751748 sea snake +0.84 - (65) n01751748 sea snake +0.08 - (58) n01737021 water snake +0.04 - (34) n01665541 leatherback turtle, leatherback, leather... +0.01 - (54) n01729322 hognose snake, puff adder, sand viper +0.01 - (57) n01735189 garter snake, grass snake +--------------------------------------- + +Summary: +------------------------------- +Graph loaded in 0.018571s +All images loaded in 0.001246s +All images classified in 0.188061s +Average classification time: 0.188061s +Accuracy top 1: 1.0 (1 of 1) +Accuracy top 5: 1.0 (1 of 1) +-------------------------------- +``` + +#### MobileNet, NCHW +``` +$ ck run program:image-classification-onnx-py +... +*** Dependency 3 = weights (ONNX model): + +More than one environment found for "ONNX model" with tags="model,image-classification,onnx" and setup={"host_os_uoa": "linux-64", "target_os_uoa": "linux-64", "target_os_bits": "64"}: + + 0) ONNX-from-TF model (MLPerf MobileNet) - v1_1.0_224_2018_08_02 (64bits,converted,converted-from-tf,host-os-linux-64,image-classification,mlperf,mobilenet,model,nhwc,onnx,target-os-linux-64,v1,v1.1,v1.1 +.0,v1.1.0.224,v1.1.0.224.2018,v1.1.0.224.2018.8,v1.1.0.224.2018.8.2 (f18d48538fbfbd46)) + - Depends on "python" (env UOA=7c8bbf2343208d88, tags="compiler,python", version=3.6.7) + - Depends on "lib-python-numpy" (env UOA=fe9d0436cbfd34c8, tags="lib,python-package,numpy", version=1.16.2) + - Depends on "lib-tensorflow" (env UOA=9c34f3f9b9b8dfd4, tags="lib,tensorflow,vprebuilt", version=1.13.1) + - Depends on "lib-python-onnx" (env UOA=c9a3c5ad5de9adcb, tags="lib,python-package,onnx", version=1.4.1) + - Depends on "lib-python-tf2onnx" (env UOA=44dd6b520ae81482, tags="lib,python-package,tf2onnx", version=1.4.1) + - Depends on "model-source" (env UOA=e5cf6f254447a629, tags="model,image-classification,tf", version=1_1.0_224_2018_08_02) + + 1) ONNX-from-TF model (MLPerf MobileNet) - v1_1.0_224_2018_08_02 (64bits,converted,converted-from-tf,host-os-linux-64,image-classification,mlperf,mobilenet,model,nchw,onnx,target-os-linux-64,v1,v1.1,v1.1 +.0,v1.1.0.224,v1.1.0.224.2018,v1.1.0.224.2018.8,v1.1.0.224.2018.8.2 (2e1b5534351b7e33)) + - Depends on "python" (env UOA=7c8bbf2343208d88, tags="compiler,python", version=3.6.7) + - Depends on "lib-python-numpy" (env UOA=fe9d0436cbfd34c8, tags="lib,python-package,numpy", version=1.16.2) + - Depends on "lib-tensorflow" (env UOA=9c34f3f9b9b8dfd4, tags="lib,tensorflow,vprebuilt", version=1.13.1) + - Depends on "lib-python-onnx" (env UOA=c9a3c5ad5de9adcb, tags="lib,python-package,onnx", version=1.4.1) + - Depends on "lib-python-tf2onnx" (env UOA=44dd6b520ae81482, tags="lib,python-package,tf2onnx", version=1.4.1) + - Depends on "model-source" (env UOA=e5cf6f254447a629, tags="model,image-classification,tf", version=1_1.0_224_2018_08_02) + + +Select one of the options for "ONNX model" with tags="model,image-classification,onnx" and setup={"host_os_uoa": "linux-64", "target_os_uoa": "linux-64", "target_os_bits": "64"} [ hit return for "0" ]: 1 + + Resolved. CK environment UID = 2e1b5534351b7e33 (version 1_1.0_224_2018_08_02) +... +-------------------------------- +Process results in predictions +--------------------------------------- +ILSVRC2012_val_00000001.JPEG - (65) n01751748 sea snake +0.84 - (65) n01751748 sea snake +0.08 - (58) n01737021 water snake +0.04 - (34) n01665541 leatherback turtle, leatherback, leather... +0.01 - (54) n01729322 hognose snake, puff adder, sand viper +0.01 - (57) n01735189 garter snake, grass snake +--------------------------------------- + +Summary: +------------------------------- +Graph loaded in 0.018411s +All images loaded in 0.001247s +All images classified in 0.189969s +Average classification time: 0.189969s +Accuracy top 1: 1.0 (1 of 1) +Accuracy top 5: 1.0 (1 of 1) +-------------------------------- +``` + + +## Benchmarking instructions + +### Benchmark the performance +``` +$ ck benchmark program:image-classification-onnx-py --cmd_key=preprocessed \ +--repetitions=10 --env.CK_BATCH_SIZE=1 --env.CK_BATCH_COUNT=2 \ +--record --record_repo=local --record_uoa=mlperf-image-classification-mobilenet-onnx-py-performance \ +--tags=mlperf,image-classification,mobilenet,onnx-py,performance \ +--skip_print_timers --skip_stat_analysis --process_multi_keys +``` + +**NB:** When using the batch count of **N**, the program classifies **N** images, but +the slow first run is not taken into account when computing the average +classification time e.g.: +```bash +$ ck benchmark program:image-classification-onnx-py --cmd_key=preprocessed \ +--repetitions=10 --env.CK_BATCH_SIZE=1 --env.CK_BATCH_COUNT=2 +... +Batch 1 of 2 +Batch loaded in 0.001307s +Batch classified in 0.186297s + +Batch 2 of 2 +Batch loaded in 0.000721s +Batch classified in 0.029533s +... +Summary: +------------------------------- +Graph loaded in 0.018409s +All images loaded in 0.002028s +All images classified in 0.029533s +Average classification time: 0.029533s +Accuracy top 1: 0.5 (1 of 2) +Accuracy top 5: 1.0 (2 of 2) +-------------------------------- +``` + +### Benchmark the accuracy +```bash +$ ck benchmark program:image-classification-onnx-py --cmd_key=preprocessed \ +--repetitions=1 --env.CK_BATCH_SIZE=1 --env.CK_BATCH_COUNT=50000 \ +--record --record_repo=local --record_uoa=mlperf-image-classification-mobilenet-onnx-py-accuracy \ +--tags=mlperf,image-classification,mobilenet,onnx-py,accuracy \ +--skip_print_timers --skip_stat_analysis --process_multi_keys +``` +**NB:** For the `imagenet-2012-val-min` dataset, change `--env.CK_BATCH_COUNT=50000` +to `--env.CK_BATCH_COUNT=500` (or drop completely to test on a single image as if +with `--env.CK_BATCH_COUNT=1`). + + + +## Reference accuracy +**TODO** + + +## Further information + +### Using Collective Knowledge +See the [common instructions](../README.md) for information on how to use Collective Knowledge +to learn about [the anatomy of a benchmark](../README.md#anatomy), or +to inspect and visualize [experimental results](../README.md#results). + +### Using the client program + +See [`ck-mlperf:program:image-classification-onnx-py`](https://github.com/ctuning/ck-mlperf/tree/master/program/image-classification-onnx-py) for more details about the client program. diff --git a/retired_benchmarks/vision/classification_and_detection/optional_harness_ck/classification/tf-cpp/README.md b/retired_benchmarks/vision/classification_and_detection/optional_harness_ck/classification/tf-cpp/README.md new file mode 100644 index 000000000..75d1c6c85 --- /dev/null +++ b/retired_benchmarks/vision/classification_and_detection/optional_harness_ck/classification/tf-cpp/README.md @@ -0,0 +1,367 @@ +# Image Classification via TensorFlow (C++) + +1. [Installation instructions](#installation) +2. [Benchmarking instructions](#benchmarking) +3. [Reference accuracy](#accuracy) +4. [Further information](#further-info) + + + +## Installation instructions + +Please follow the [common installation instructions](../README.md#installation) first. + +### Install TensorFlow (C++) + +Install TensorFlow (C++) v1.13.1 from source: +```bash +$ ck install package:lib-tensorflow-1.13.1-src-static [--target_os=android23-arm64] +``` +**NB:** The ResNet model has a [known issue with v1.14.0](https://github.com/ctuning/ck-tensorflow/blob/master/package/lib-tensorflow-1.14.0-src-static/README.md). + +### Install models for TensorFlow (C++) + +#### ResNet + +To install the [ResNet50-v1.5 model](https://zenodo.org/record/2535873): +```bash +$ ck install package --tags=model,tf,mlperf,resnet +``` + +#### MobileNet +To select interactively from one of the non-quantized and quantized MobileNets-v1-1.0-224 models: +``` +$ ck install package --tags=model,tf,mlperf,mobilenet +``` + +##### MobileNet non-quantized + +To install the non-quantized MobileNet model from: +- [zenodo.org](https://zenodo.org/record/2269307/files/mobilenet_v1_1.0_224.tgz) (default): +```bash +$ ck install package --tags=model,tf,mlperf,mobilenet,non-quantized,from-zenodo +``` +- [tensorflow.org](http://download.tensorflow.org/models/mobilenet_v1_2018_08_02/mobilenet_v1_1.0_224.tgz) +```bash +$ ck install package --tags=model,tf,mlperf,mobilenet,non-quantized,from-google +``` + +##### MobileNet quantized + +To install the quantized MobileNet model from: +- [zenodo.org](https://zenodo.org/record/2269307/files/mobilenet_v1_1.0_224_quant.tgz) (default): +```bash +$ ck install package --tags=model,tf,mlperf,mobilenet,quantized,from-zenodo +``` +- [tensorflow.org](http://download.tensorflow.org/models/mobilenet_v1_2018_08_02/mobilenet_v1_1.0_224_quant.tgz) +```bash +$ ck install package --tags=model,tf,mlperf,mobilenet,quantized,from-google +``` + +##### Bonus: other MobileNets models + +You can also install any other MobileNets model compatible with TensorFlow (C++) as follows: +```bash +$ ck install package --tags=tensorflowmodel,mobilenet,frozen --no_tags=mobilenet-all +``` + +### Compile the TensorFlow (C++) Image Classification client + +Compile the client. (For Android, append e.g. `--target_os=android23-arm64` to the command.) + +```bash +$ ck compile program:image-classification-tf-cpp --speed +``` + +### Run the TensorFlow (C++) Image Classification client + +Run the client. (For Android, connect an Android device to your host machine via USB and append e.g. `--target_os=android23-arm64` to the command). + +If you have preprocessed input data using more than one method (OpenCV, Pillow or TensorFlow), you need to select the particular preprocessed dataset. Note that the TensorFlow preprocessing method is not applicable to the MobileNet models. + +#### ResNet + +##### OpenCV preprocessing +```bash +$ ck run program:image-classification-tf-cpp \ +--dep_add_tags.images=preprocessed,using-opencv --dep_add_tags.weights=resnet +... +--------------------------------------- +ILSVRC2012_val_00000001.JPEG - (65) n01751748 sea snake +0.95 - (65) n01751748 sea snake +0.01 - (58) n01737021 water snake +0.01 - (54) n01729322 hognose snake, puff adder, sand viper +0.01 - (66) n01753488 horned viper, cerastes, sand viper, horn... +0.00 - (60) n01740131 night snake, Hypsiglena torquata +--------------------------------------- +``` + +##### Pillow preprocessing +```bash +$ ck run program:image-classification-tf-cpp \ +--dep_add_tags.images=preprocessed,using-pillow --dep_add_tags.weights=resnet +... +--------------------------------------- +ILSVRC2012_val_00000001.JPEG - (65) n01751748 sea snake +0.95 - (65) n01751748 sea snake +0.02 - (58) n01737021 water snake +0.01 - (54) n01729322 hognose snake, puff adder, sand viper +0.01 - (60) n01740131 night snake, Hypsiglena torquata +0.01 - (66) n01753488 horned viper, cerastes, sand viper, horn... +--------------------------------------- +``` + +##### TensorFlow preprocessing +```bash +$ ck run program:image-classification-tf-cpp \ +--dep_add_tags.images=preprocessed,using-tf --dep_add_tags.weights=resnet +... +--------------------------------------- +ILSVRC2012_val_00000001.JPEG - (65) n01751748 sea snake +0.95 - (65) n01751748 sea snake +0.01 - (54) n01729322 hognose snake, puff adder, sand viper +0.01 - (58) n01737021 water snake +0.01 - (66) n01753488 horned viper, cerastes, sand viper, horn... +0.00 - (60) n01740131 night snake, Hypsiglena torquata +--------------------------------------- +``` + +#### MobileNet non-quantized + +##### OpenCV preprocessing +```bash +$ ck run program:image-classification-tf-cpp \ +--dep_add_tags.images=preprocessed,using-opencv --dep_add_tags.weights=mobilenet,non-quantized +... +--------------------------------------- +ILSVRC2012_val_00000001.JPEG - (65) n01751748 sea snake +0.86 - (65) n01751748 sea snake +0.05 - (58) n01737021 water snake +0.04 - (34) n01665541 leatherback turtle, leatherback, leather... +0.01 - (54) n01729322 hognose snake, puff adder, sand viper +0.01 - (57) n01735189 garter snake, grass snake +--------------------------------------- +``` + +##### Pillow preprocessing +```bash +$ ck run program:image-classification-tf-cpp \ +--dep_add_tags.images=preprocessed,using-pillow --dep_add_tags.weights=mobilenet,non-quantized +... +--------------------------------------- +ILSVRC2012_val_00000001.JPEG - (65) n01751748 sea snake +0.87 - (65) n01751748 sea snake +0.06 - (34) n01665541 leatherback turtle, leatherback, leather... +0.04 - (58) n01737021 water snake +0.01 - (54) n01729322 hognose snake, puff adder, sand viper +0.01 - (57) n01735189 garter snake, grass snake +--------------------------------------- +``` + +##### TensorFlow preprocessing (**NOT APPLICABLE!**) +```bash +$ ck run program:image-classification-tf-cpp \ +--dep_add_tags.images=preprocessed,using-tf --dep_add_tags.weights=mobilenet,non-quantized +... +--------------------------------------- +ILSVRC2012_val_00000001.JPEG - (65) n01751748 sea snake +0.67 - (616) n03627232 knot +0.08 - (584) n03476684 hair slide +0.06 - (488) n02999410 chain +0.02 - (792) n04208210 shovel +0.02 - (549) n03291819 envelope +--------------------------------------- +``` + +#### MobileNet quantized + +##### OpenCV preprocessing +```bash +$ ck run program:image-classification-tf-cpp \ +--dep_add_tags.images=preprocessed,using-opencv --dep_add_tags.weights=mobilenet,quantized +... +--------------------------------------- +ILSVRC2012_val_00000001.JPEG - (65) n01751748 sea snake +0.91 - (65) n01751748 sea snake +0.05 - (58) n01737021 water snake +0.03 - (34) n01665541 leatherback turtle, leatherback, leather... +0.01 - (54) n01729322 hognose snake, puff adder, sand viper +0.00 - (57) n01735189 garter snake, grass snake +--------------------------------------- +``` + +##### Pillow preprocessing +```bash +$ ck run program:image-classification-tf-cpp \ +--dep_add_tags.images=preprocessed,using-pillow --dep_add_tags.weights=mobilenet,quantized +... +--------------------------------------- +ILSVRC2012_val_00000001.JPEG - (65) n01751748 sea snake +0.92 - (65) n01751748 sea snake +0.04 - (58) n01737021 water snake +0.02 - (34) n01665541 leatherback turtle, leatherback, leather... +0.00 - (390) n02526121 eel +0.00 - (54) n01729322 hognose snake, puff adder, sand viper +--------------------------------------- +``` + +##### TensorFlow preprocessing (**NOT APPLICABLE!**) +```bash +$ ck run program:image-classification-tf-cpp \ +--dep_add_tags.images=preprocessed,using-tf --dep_add_tags.weights=mobilenet,quantized +... +--------------------------------------- +ILSVRC2012_val_00000001.JPEG - (65) n01751748 sea snake +0.55 - (616) n03627232 knot +0.39 - (488) n02999410 chain +0.01 - (71) n01770393 scorpion +0.01 - (310) n02219486 ant, emmet, pismire +0.01 - (695) n03874599 padlock +--------------------------------------- +``` + + +## Benchmarking instructions + +### Benchmark the performance + +**NB:** When using the batch count of **N**, the program classifies **N** images, but +the slow first run is not taken into account when computing the average +classification time e.g.: +```bash +$ ck benchmark program:image-classification-tf-cpp \ +--repetitions=1 --env.CK_BATCH_SIZE=1 --env.CK_BATCH_COUNT=2 +... +Processing batches... + +Batch 1 of 2 +Batch loaded in 0.00341696 s +Batch classified in 0.355268 s + +Batch 2 of 2 +Batch loaded in 0.00335902 s +Batch classified in 0.0108837 s +... +Summary: +------------------------------- +Graph loaded in 0.053440s +All images loaded in 0.006776s +All images classified in 0.366151s +Average classification time: 0.010884s +Accuracy top 1: 0.5 (1 of 2) +Accuracy top 5: 1.0 (2 of 2) +-------------------------------- +``` + +#### ResNet +``` +$ ck benchmark program:image-classification-tf-cpp --speed \ +--repetitions=10 --env.CK_BATCH_SIZE=1 --env.CK_BATCH_COUNT=2 \ +--skip_print_timers --skip_stat_analysis --process_multi_keys --record --record_repo=local \ +--record_uoa=mlperf-image-classification-tf-cpp-performance-using-opencv-resnet \ +--tags=mlperf,image-classification,tf-cpp,performance,using-opencv,resnet \ +--dep_add_tags.images=preprocessed,using-opencv --dep_add_tags.weights=resnet +``` + +#### MobileNet non-quantized +``` +$ ck benchmark program:image-classification-tf-cpp --speed \ +--repetitions=10 --env.CK_BATCH_SIZE=1 --env.CK_BATCH_COUNT=2 \ +--skip_print_timers --skip_stat_analysis --process_multi_keys --record --record_repo=local \ +--record_uoa=mlperf-image-classification-tf-cpp-performance-using-opencv-mobilenet-non-quantized \ +--tags=mlperf,image-classification,tf-cpp,performance,using-opencv,mobilenet,non-quantized \ +--dep_add_tags.images=preprocessed,using-opencv --dep_add_tags.weights=mobilenet,non-quantized +``` + +#### MobileNet quantized +``` +$ ck benchmark program:image-classification-tf-cpp --speed \ +--repetitions=10 --env.CK_BATCH_SIZE=1 --env.CK_BATCH_COUNT=2 \ +--skip_print_timers --skip_stat_analysis --process_multi_keys --record --record_repo=local \ +--record_uoa=mlperf-image-classification-tf-cpp-performance-using-opencv-mobilenet-quantized \ +--tags=mlperf,image-classification,tf-cpp,performance,using-opencv,mobilenet,quantized \ +--dep_add_tags.images=preprocessed,using-opencv --dep_add_tags.weights=mobilenet,quantized +``` + + +### Benchmark the accuracy + +**NB:** For the `imagenet-2012-val-min` dataset, change `--env.CK_BATCH_COUNT=50000` +to `--env.CK_BATCH_COUNT=500` (or drop completely to test on a single image as if +with `--env.CK_BATCH_COUNT=1`). + +#### ResNet +```bash +$ ck benchmark program:image-classification-tf-cpp --speed \ +--repetitions=1 --env.CK_BATCH_SIZE=1 --env.CK_BATCH_COUNT=50000 \ +--skip_print_timers --skip_stat_analysis --process_multi_keys --record --record_repo=local \ +--record_uoa=mlperf-image-classification-tf-cpp-accuracy-using-opencv-resnet \ +--tags=mlperf,image-classification,tf-cpp,accuracy,using-opencv,resnet \ +--dep_add_tags.images=preprocessed,using-opencv --dep_add_tags.weights=resnet +``` + +#### MobileNet non-quantized +```bash +$ ck benchmark program:image-classification-tf-cpp --speed \ +--repetitions=1 --env.CK_BATCH_SIZE=1 --env.CK_BATCH_COUNT=50000 \ +--skip_print_timers --skip_stat_analysis --process_multi_keys --record --record_repo=local \ +--record_uoa=mlperf-image-classification-tf-cpp-accuracy-using-opencv-mobilenet-non-quantized \ +--tags=mlperf,image-classification,tf-cpp,accuracy,using-opencv,mobilenet,non-quantized \ +--dep_add_tags.images=preprocessed,using-opencv --dep_add_tags.weights=mobilenet,non-quantized +``` + +#### MobileNet quantized +```bash +$ ck benchmark program:image-classification-tf-cpp --speed \ +--repetitions=1 --env.CK_BATCH_SIZE=1 --env.CK_BATCH_COUNT=50000 \ +--skip_print_timers --skip_stat_analysis --process_multi_keys --record --record_repo=local \ +--record_uoa=mlperf-image-classification-tf-cpp-accuracy-using-opencv-mobilenet-quantized \ +--tags=mlperf,image-classification,tf-cpp,accuracy,using-opencv,mobilenet,quantized \ +--dep_add_tags.images=preprocessed,using-opencv --dep_add_tags.weights=mobilenet,quantized +``` + + + +## Reference accuracy + +### Example: universal OpenCV preprocessing (default), MobileNet non-quantized +```bash +$ ck benchmark program:image-classification-tf-cpp \ +--repetitions=1 --env.CK_BATCH_SIZE=1 --env.CK_BATCH_COUNT=50000 \ +--skip_print_timers --skip_stat_analysis --process_multi_keys --record --record_repo=local \ +--record_uoa=mlperf-image-classification-tf-cpp-accuracy-using-opencv-mobilenet-non-quantized \ +--tags=mlperf,image-classification,tf-cpp,accuracy,using-opencv,mobilenet,non-quantized \ +--dep_add_tags.images=preprocessed,using-opencv --dep_add_tags.weights=mobilenet,non-quantized +``` + +### ImageNet validation dataset (50,000 images) + +```bash +$ ck benchmark program:image-classification-tf-cpp \ +--repetitions=1 --env.CK_BATCH_SIZE=1 --env.CK_BATCH_COUNT=50000 \ +--skip_print_timers --skip_stat_analysis --process_multi_keys --record --record_repo=local \ +--dep_add_tags.images=preprocessed,using-opencv \ +--record_uoa=mlperf-image-classification-tf-cpp-accuracy \ +--tags=mlperf,image-classification,tf-cpp,accuracy +``` + +| Model | Metric | Pillow | OpenCV universal | OpenCV for MobileNet | OpenCV for ResNet | TensorFlow | +|-|-|-|-|-|-|-| +| ResNet | Top1 | 0.76170 | 0.76422 | N/A | 0.76456 | 0.76522 | +| | Top5 | 0.92866 | 0.93074 | N/A | 0.93016 | 0.93066 | +| MobileNet non-quantized | Top1 | 0.71226 | 0.71676 | 0.71676 | N/A | N/A | +| | Top5 | 0.89834 | 0.90118 | 0.90118 | N/A | N/A | +| MobileNet quantized | Top1 | 0.70348 | 0.70700 | 0.70694 | N/A | N/A | +| | Top5 | 0.89376 | 0.89594 | 0.89594 | N/A | N/A | + + + +## Further information + +### Using Collective Knowledge +See the [common instructions](../README.md) for information on how to use Collective Knowledge +to learn about [the anatomy of a benchmark](../README.md#anatomy), or +to inspect and visualize [experimental results](../README.md#results). + +### Using the client program +See [`ck-tensorflow:program:image-classification-tf-cpp`](https://github.com/ctuning/ck-tensorflow/tree/master/program/image-classification-tf-cpp) for more details about the client program. diff --git a/retired_benchmarks/vision/classification_and_detection/optional_harness_ck/classification/tf-py/README.md b/retired_benchmarks/vision/classification_and_detection/optional_harness_ck/classification/tf-py/README.md new file mode 100644 index 000000000..cfc5c7632 --- /dev/null +++ b/retired_benchmarks/vision/classification_and_detection/optional_harness_ck/classification/tf-py/README.md @@ -0,0 +1,181 @@ +# Image Classification via TensorFlow (Python) + +1. [Installation instructions](#installation) +2. [Benchmarking instructions](#benchmarking) +3. [Reference accuracy](#accuracy) +4. [Further information](#further-info) + + + +## Installation instructions + +Please follow the [common installation instructions](../README.md#installation) first. + +### Install TensorFlow (Python) + +Install TensorFlow (Python) from an `x86_64` binary package: +``` +$ ck install package:lib-tensorflow-1.13.1-cpu +``` +or from source: +``` +$ ck install package:lib-tensorflow-1.13.1-src-cpu +``` + +### Install the MobileNet model for TensorFlow (Python) + +To select interactively from one of the non-quantized and quantized MobileNets-v1-1.0-224 models: +``` +$ ck install package --tags=model,tf,mlperf,mobilenet +``` + +To install the [non-quantized model](https://zenodo.org/record/2269307/files/mobilenet_v1_1.0_224.tgz) directly: +``` +$ ck install package --tags=model,tf,mlperf,mobilenet,non-quantized +``` + +To install the [quantized model](http://download.tensorflow.org/models/mobilenet_v1_2018_08_02/mobilenet_v1_1.0_224_quant.tgz) directly: +``` +$ ck install package --tags=model,tf,mlperf,mobilenet,quantized +``` + +#### Bonus + +##### Install other MobileNets models +You can also install any other MobileNets model compatible with TensorFlow (Python) as follows: +``` +$ ck install package --tags=tensorflowmodel,mobilenet --no_tags=mobilenet-all +``` +**NB:** This excludes "uber" packages which can be used to install all models in the sets `v1-2018-02-22` (16 models), `v1[-2018-06-14]` (16 models) and `v2` (22 models) in one go: +``` +$ ck search package --tags=tensorflowmodel,mobilenet-all +ck-tensorflow:package:tensorflowmodel-mobilenet-v1-2018_02_22 +ck-tensorflow:package:tensorflowmodel-mobilenet-v2 +ck-tensorflow:package:tensorflowmodel-mobilenet-v1 +``` + +### Run the TensorFlow (Python) Image Classification client + +Run the client: + +- with the non-quantized model: +``` +$ ck run program:image-classification-tf-py +... +*** Dependency 4 = weights (TensorFlow-Python model and weights): +... + Resolved. CK environment UID = f934f3a3faaf4d73 (version 1_1.0_224_2018_02_22) +... +ILSVRC2012_val_00000001.JPEG - (65) n01751748 sea snake +0.82 - (65) n01751748 sea snake +0.10 - (58) n01737021 water snake +0.04 - (34) n01665541 leatherback turtle, leatherback, leather... +0.01 - (54) n01729322 hognose snake, puff adder, sand viper +0.01 - (57) n01735189 garter snake, grass snake +--------------------------------------- + +Summary: +------------------------------- +Graph loaded in 0.855126s +All images loaded in 0.001089s +All images classified in 0.116698s +Average classification time: 0.116698s +Accuracy top 1: 1.0 (1 of 1) +Accuracy top 5: 1.0 (1 of 1) +-------------------------------- +``` + +- with the quantized model: +``` +$ ck run program:image-classification-tf-py +... +*** Dependency 4 = weights (TensorFlow-Python model and weights): +... + Resolved. CK environment UID = b18ad885d440dc77 (version 1_1.0_224_quant_2018_08_02) +... +ILSVRC2012_val_00000001.JPEG - (65) n01751748 sea snake +0.16 - (60) n01740131 night snake, Hypsiglena torquata +0.10 - (600) n03532672 hook, claw +0.07 - (58) n01737021 water snake +0.05 - (398) n02666196 abacus +0.05 - (79) n01784675 centipede +--------------------------------------- + +Summary: +------------------------------- +Graph loaded in 1.066851s +All images loaded in 0.001507s +All images classified in 0.178281s +Average classification time: 0.178281s +Accuracy top 1: 0.0 (0 of 1) +Accuracy top 5: 0.0 (0 of 1) +-------------------------------- +``` + + +## Benchmarking instructions + +### Benchmark the performance +``` +$ ck benchmark program:image-classification-tf-py \ +--repetitions=10 --env.CK_BATCH_SIZE=1 --env.CK_BATCH_COUNT=2 \ +--record --record_repo=local --record_uoa=mlperf-image-classification-mobilenet-tf-py-performance \ +--tags=mlperf,image-classification,mobilenet,tf-py,performance \ +--skip_print_timers --skip_stat_analysis --process_multi_keys +``` + +**NB:** When using the batch count of **N**, the program classifies **N** images, but +the slow first run is not taken into account when computing the average +classification time e.g.: +```bash +$ ck benchmark program:image-classification-tf-py \ +--repetitions=10 --env.CK_BATCH_SIZE=1 --env.CK_BATCH_COUNT=2 +... +Weights loaded in 0.293122s + +Batch 1 of 2 +Batch loaded in 0.001036s +Batch classified in 0.121501s + +Batch 2 of 2 +Batch loaded in 0.001257s +Batch classified in 0.013995s +... +Summary: +------------------------------- +Graph loaded in 1.115745s +All images loaded in 0.002293s +All images classified in 0.013995s +Average classification time: 0.013995s +Accuracy top 1: 0.5 (1 of 2) +Accuracy top 5: 1.0 (2 of 2) +-------------------------------- +``` + +### Benchmark the accuracy +```bash +$ ck benchmark program:image-classification-tf-py \ +--repetitions=1 --env.CK_BATCH_SIZE=1 --env.CK_BATCH_COUNT=50000 \ +--record --record_repo=local --record_uoa=mlperf-image-classification-mobilenet-tf-py-accuracy \ +--tags=mlperf,image-classification,mobilenet,tf-py,accuracy \ +--skip_print_timers --skip_stat_analysis --process_multi_keys +``` +**NB:** For the `imagenet-2012-val-min` dataset, change `--env.CK_BATCH_COUNT=50000` +to `--env.CK_BATCH_COUNT=500` (or drop completely to test on a single image as if +with `--env.CK_BATCH_COUNT=1`). + + + +## Reference accuracy +**TODO** + + +## Further information + +### Using Collective Knowledge +See the [common instructions](../README.md) for information on how to use Collective Knowledge +to learn about [the anatomy of a benchmark](../README.md#anatomy), or +to inspect and visualize [experimental results](../README.md#results). + +### Using the client program +See [`ck-tensorflow:program:image-classification-tf-py`](https://github.com/ctuning/ck-tensorflow/tree/master/program/image-classification-tf-py) for more details about the client program. diff --git a/retired_benchmarks/vision/classification_and_detection/optional_harness_ck/classification/tflite/README.md b/retired_benchmarks/vision/classification_and_detection/optional_harness_ck/classification/tflite/README.md new file mode 100644 index 000000000..f6933dd9c --- /dev/null +++ b/retired_benchmarks/vision/classification_and_detection/optional_harness_ck/classification/tflite/README.md @@ -0,0 +1,320 @@ +# Image Classification via TensorFlow Lite (TFLite) + +1. [Installation instructions](#installation) +2. [Benchmarking instructions](#benchmarking) +3. [Reference accuracy](#accuracy) +4. [Further information](#further-info) + + + +## Installation instructions + +Please follow the [common installation instructions](../README.md#installation) first. + +### Install TFLite + +Install TFLite v1.13.1 from source: +``` +$ ck install package --tags=lib,tflite,v1.13.1,vsrc [--target_os=android23-arm64] +``` +**NB:** TFLite v1.14.0 has [many known issues on Arm platforms](https://github.com/ctuning/ck-tensorflow/blob/master/package/lib-tflite-1.14.0-src-static/README.md), and does not work for Android yet. + +You can also install TFLite v0.1.7 from a prebuilt binary package for your target e.g.: +``` +$ ck list package:lib-tflite-prebuilt* +lib-tflite-prebuilt-0.1.7-linux-aarch64 +lib-tflite-prebuilt-0.1.7-linux-x64 +lib-tflite-prebuilt-0.1.7-android-arm64 +$ ck install package:lib-tflite-prebuilt-0.1.7-android-arm64 [--target_os=android23-arm64] +``` + +**NB:** Please [let us know](info@dividiti.com) if you would like us to create +prebuilt packages for TFLite 1.13.1. + + +### Install the models for TFLite + +#### ResNet + +To install the ResNet50-v1.5 model: +```bash +$ ck install package --tags=model,tflite,mlperf,resnet + +More than one package or version found: + + 0) model-tflite-mlperf-resnet-no-argmax Version 1.5 (afb43014ef38f646) + 1) model-tflite-mlperf-resnet Version 1.5 (d60d4e9a84151271) + 2) model-tflite-convert-from-tf (35e84375ac48dcb1), Variations: resnet + +Please select the package to install [ hit return for "0" ]: +``` + +Option 0 will download a TFLite model preconverted from the TF model. During +the conversion, the `ArgMax` operator causing an +[issue](https://github.com/ARM-software/armnn/issues/150) with ArmNN v19.02 +and v19.05 was excluded. + +Option 1 will download a TFLite model preconverted from the TF model, but +including the `ArgMax` operator. This variant can be used with ArmNN once +the above issue is resolved. + +Option 2 will download the TF model and convert it to TFLite, while excluding +the `ArgMax` operator. + + +#### MobileNet + +To select interactively from one of the non-quantized and quantized +MobileNets-v1-1.0-224 models: +``` +$ ck install package --tags=model,tflite,mlperf,mobilenet + +More than one package or version found: + + 0) model-tf-mlperf-mobilenet Version 1_1.0_224_2018_08_02 (05c4dcbbbf872ecf) + 1) model-tf-mlperf-mobilenet-quantized Version 1_1.0_224_quant_2018_08_02 (3013bdc96184bf3b) + 2) model-tflite-convert-from-tf (35e84375ac48dcb1), Variations: mobilenet + +Please select the package to install [ hit return for "0" ]: +``` +Options 0 and 1 will download the official non-quantized and quantized models. +Option 2 will download the official TF model and convert it to TFLite. + +**NB:** Option 2 is only viable on x86 platforms, as it depends on using a +prebuilt version of TF. While this constraint could be relaxed to use a +version of TF built from source, building TF from source takes a long time on +Arm platforms (as well as [not being officially +supported](https://github.com/tensorflow/tensorflow/issues/25607#issuecomment-466583730)). + +##### MobileNet non-quantized + +To install the non-quantized MobileNet model from: +- [zenodo.org](https://zenodo.org/record/2269307/files/mobilenet_v1_1.0_224.tgz) (default): +```bash +$ ck install package --tags=model,tflite,mlperf,mobilenet,non-quantized,from-zenodo +``` +- [tensorflow.org](http://download.tensorflow.org/models/mobilenet_v1_2018_08_02/mobilenet_v1_1.0_224.tgz) +```bash +$ ck install package --tags=model,tflite,mlperf,mobilenet,non-quantized,from-google +``` + +##### MobileNet quantized + +To install the quantized MobileNet model from: +- [zenodo.org](https://zenodo.org/record/2269307/files/mobilenet_v1_1.0_224_quant.tgz) (default): +```bash +$ ck install package --tags=model,tflite,mlperf,mobilenet,quantized,from-zenodo +``` +- [tensorflow.org](http://download.tensorflow.org/models/mobilenet_v1_2018_08_02/mobilenet_v1_1.0_224_quant.tgz) +```bash +$ ck install package --tags=model,tflite,mlperf,mobilenet,quantized,from-google +``` + +#### Bonus: other MobileNets models +You can also install any other MobileNets model compatible with TFLite as follows: +``` +$ ck install package --tags=tensorflowmodel,mobilenet,tflite +``` + +### Compile the TFLite Image Classification client + +Compile the client. (For Android, append e.g. `--target_os=android23-arm64` to the command.) + +```bash +$ ck compile program:image-classification-tflite --speed +``` + +### Run the TFLite Image Classification client + +Run the client. (For Android, connect an Android device to your host machine via USB and append e.g. `--target_os=android23-arm64` to the command). + +If you have preprocessed input data using more than one method (OpenCV, Pillow or TensorFlow), you need to select the particular preprocessed dataset. Note that the TensorFlow preprocessing method is not applicable to the MobileNet models. + +#### ResNet + +##### OpenCV preprocessing +```bash +$ ck run program:image-classification-tflite \ +--dep_add_tags.images=preprocessed,using-opencv --dep_add_tags.weights=resnet +... +--------------------------------------- +ILSVRC2012_val_00000001.JPEG - (65) n01751748 sea snake +0.95 - (65) n01751748 sea snake +0.01 - (58) n01737021 water snake +0.01 - (54) n01729322 hognose snake, puff adder, sand viper +0.01 - (66) n01753488 horned viper, cerastes, sand viper, horn... +0.00 - (60) n01740131 night snake, Hypsiglena torquata +--------------------------------------- +``` + +#### MobileNet non-quantized + +##### OpenCV preprocessing +```bash +$ ck run program:image-classification-tflite \ +--dep_add_tags.images=preprocessed,using-opencv --dep_add_tags.weights=mobilenet,non-quantized +... +--------------------------------------- +ILSVRC2012_val_00000001.JPEG - (65) n01751748 sea snake +0.86 - (65) n01751748 sea snake +0.05 - (58) n01737021 water snake +0.04 - (34) n01665541 leatherback turtle, leatherback, leather... +0.01 - (54) n01729322 hognose snake, puff adder, sand viper +0.01 - (57) n01735189 garter snake, grass snake +--------------------------------------- +``` + +#### MobileNet quantized + +##### OpenCV preprocessing +```bash +$ ck run program:image-classification-tflite \ +--dep_add_tags.images=preprocessed,using-opencv --dep_add_tags.weights=mobilenet,quantized +... +--------------------------------------- +ILSVRC2012_val_00000001.JPEG - (65) n01751748 sea snake +0.88 - (65) n01751748 sea snake +0.07 - (34) n01665541 leatherback turtle, leatherback, leather... +0.03 - (58) n01737021 water snake +0.00 - (54) n01729322 hognose snake, puff adder, sand viper +0.00 - (0) n01440764 tench, Tinca tinca +--------------------------------------- +``` +**NB:** The prediction from `tflite` differs from that from `tf-cpp`. + + +## Benchmarking instructions + +### Benchmark the performance + +**NB:** When using the batch count of **N**, the program classifies **N** images, but +the slow first run is not taken into account when computing the average +classification time e.g.: +``` +$ ck benchmark program:image-classification-tflite \ +--repetitions=1 --env.CK_BATCH_SIZE=1 --env.CK_BATCH_COUNT=2 +... +Processing batches... + +Batch 1 of 2 + +Batch loaded in 0.00802251 s +Batch classified in 0.16831 s + +Batch 2 of 2 + +Batch loaded in 0.00776105 s +Batch classified in 0.0762354 s +... +Summary: +------------------------------- +Graph loaded in 0.000663s +All images loaded in 0.015784s +All images classified in 0.244545s +Average classification time: 0.076235s +Accuracy top 1: 0.5 (1 of 2) +Accuracy top 5: 1.0 (2 of 2) +-------------------------------- +``` + +#### ResNet +``` +$ ck benchmark program:image-classification-tflite --speed \ +--repetitions=10 --env.CK_BATCH_SIZE=1 --env.CK_BATCH_COUNT=2 \ +--skip_print_timers --skip_stat_analysis --process_multi_keys --record --record_repo=local \ +--record_uoa=mlperf-image-classification-tflite-performance-using-opencv-resnet \ +--tags=mlperf,image-classification,tflite,performance,using-opencv,resnet \ +--dep_add_tags.images=preprocessed,using-opencv --dep_add_tags.weights=resnet +``` + +#### MobileNet non-quantized +``` +$ ck benchmark program:image-classification-tflite --speed \ +--repetitions=10 --env.CK_BATCH_SIZE=1 --env.CK_BATCH_COUNT=2 \ +--skip_print_timers --skip_stat_analysis --process_multi_keys --record --record_repo=local \ +--record_uoa=mlperf-image-classification-tflite-performance-using-opencv-mobilenet-non-quantized \ +--tags=mlperf,image-classification,tflite,performance,using-opencv,mobilenet,non-quantized \ +--dep_add_tags.images=preprocessed,using-opencv --dep_add_tags.weights=mobilenet,non-quantized +``` + +#### MobileNet quantized +``` +$ ck benchmark program:image-classification-tflite --speed \ +--repetitions=10 --env.CK_BATCH_SIZE=1 --env.CK_BATCH_COUNT=2 \ +--skip_print_timers --skip_stat_analysis --process_multi_keys --record --record_repo=local \ +--record_uoa=mlperf-image-classification-tflite-performance-using-opencv-mobilenet-quantized \ +--tags=mlperf,image-classification,tflite,performance,using-opencv,mobilenet,quantized \ +--dep_add_tags.images=preprocessed,using-opencv --dep_add_tags.weights=mobilenet,quantized +``` + +### Benchmark the accuracy + +**NB:** For the `imagenet-2012-val-min` dataset, change `--env.CK_BATCH_COUNT=50000` +to `--env.CK_BATCH_COUNT=500` (or drop completely to test on a single image as if +with `--env.CK_BATCH_COUNT=1`). + +#### ResNet +```bash +$ ck benchmark program:image-classification-tflite --speed \ +--repetitions=1 --env.CK_BATCH_SIZE=1 --env.CK_BATCH_COUNT=50000 \ +--skip_print_timers --skip_stat_analysis --process_multi_keys --record --record_repo=local \ +--record_uoa=mlperf-image-classification-tflite-accuracy-using-opencv-resnet \ +--tags=mlperf,image-classification,tflite,accuracy,using-opencv,resnet \ +--dep_add_tags.images=preprocessed,using-opencv --dep_add_tags.weights=resnet +``` + +#### MobileNet non-quantized +```bash +$ ck benchmark program:image-classification-tflite --speed \ +--repetitions=1 --env.CK_BATCH_SIZE=1 --env.CK_BATCH_COUNT=50000 \ +--skip_print_timers --skip_stat_analysis --process_multi_keys --record --record_repo=local \ +--record_uoa=mlperf-image-classification-tflite-accuracy-using-opencv-mobilenet-non-quantized \ +--tags=mlperf,image-classification,tflite,accuracy,using-opencv,mobilenet,non-quantized \ +--dep_add_tags.images=preprocessed,using-opencv --dep_add_tags.weights=mobilenet,non-quantized +``` + +#### MobileNet quantized +```bash +$ ck benchmark program:image-classification-tflite --speed \ +--repetitions=1 --env.CK_BATCH_SIZE=1 --env.CK_BATCH_COUNT=50000 \ +--skip_print_timers --skip_stat_analysis --process_multi_keys --record --record_repo=local \ +--record_uoa=mlperf-image-classification-tflite-accuracy-using-opencv-mobilenet-quantized \ +--tags=mlperf,image-classification,tflite,accuracy,using-opencv,mobilenet,quantized \ +--dep_add_tags.images=preprocessed,using-opencv --dep_add_tags.weights=mobilenet,quantized +``` + + + +## Reference accuracy + +### Example: OpenCV preprocessing (default), MobileNet non-quantized +```bash +$ ck benchmark program:image-classification-tflite \ +--repetitions=1 --env.CK_BATCH_SIZE=1 --env.CK_BATCH_COUNT=50000 \ +--skip_print_timers --skip_stat_analysis --process_multi_keys --record --record_repo=local \ +--record_uoa=mlperf-image-classification-tflite-accuracy-using-opencv-mobilenet-non-quantized \ +--tags=mlperf,image-classification,tflite,accuracy,using-opencv,mobilenet,non-quantized \ +--dep_add_tags.images=preprocessed,using-opencv --dep_add_tags.weights=mobilenet,non-quantized +``` + +### ImageNet validation dataset (50,000 images) + +| Model | Metric | Pillow | OpenCV universal | OpenCV for MobileNet | OpenCV for ResNet | TensorFlow | +|-|-|-|-|-|-|-| +| ResNet | Top1 | 0.76170 | 0.76422 | N/A | 0.76456 | 0.76522 | +| | Top5 | 0.92866 | 0.93074 | N/A | 0.93016 | 0.93066 | +| MobileNet non-quantized | Top1 | 0.71226 | 0.71676 | 0.71676 | N/A | N/A | +| | Top5 | 0.89834 | 0.90118 | 0.90118 | N/A | N/A | +| MobileNet quantized | Top1 | 0.70502 | 0.70762 | N/A ([bug?](https://github.com/ctuning/ck-mlperf/issues/40)) | N/A | N/A | +| | Top5 | 0.89118 | 0.89266 | N/A ([bug?](https://github.com/ctuning/ck-mlperf/issues/40)) | N/A | N/A | + + + +## Further information +### Using Collective Knowledge +See the [common instructions](../README.md) for information on how to use Collective Knowledge +to learn about [the anatomy of a benchmark](../README.md#anatomy), or +to inspect and visualize [experimental results](../README.md#results). + +### Using the client program +See [`ck-tensorflow:program:image-classification-tflite`](https://github.com/ctuning/ck-tensorflow/tree/master/program/image-classification-tflite) for more details about the client program. diff --git a/retired_benchmarks/vision/classification_and_detection/optional_harness_ck/detection/README.md b/retired_benchmarks/vision/classification_and_detection/optional_harness_ck/detection/README.md new file mode 100644 index 000000000..7a7cf3321 --- /dev/null +++ b/retired_benchmarks/vision/classification_and_detection/optional_harness_ck/detection/README.md @@ -0,0 +1,177 @@ +[![compatibility](https://github.com/ctuning/ck-guide-images/blob/master/ck-compatible.svg)](https://github.com/ctuning/ck) +[![License](https://img.shields.io/badge/License-BSD%203--Clause-blue.svg)](https://opensource.org/licenses/BSD-3-Clause) + +# MLPerf Inference - Object Detection + +MLPerf Inference v0.5 uses SSD-MobileNet-v1-1.0-224 (called SSD-MobileNet in what follows) and SSD-ResNet34 (called SSD-ResNet in what follows). + +# Table of contents + +1. [Installation](#installation) + - [Install prerequisites](#installation-debian) (Debian-specific) + - [Install CK workflows](#installation-workflows) (universal) +1. [Benchmarking](#benchmarking) + - [via TensorFlow (Python)](tf-py/README.md) + - [via TensorFlow Lite](tflite/README.md) + + +# Installation + +**NB:** If you would like to get a feel of CK workflows, you can skip +installation instructions and try [benchmarking](#benchmarking) +instructions on available Docker images: +- TensorFlow Lite: + - [Debian 9](https://github.com/ctuning/ck-mlperf/tree/master/docker/object-detection-tflite.debian-9) +- Arm NN: + - [Debian 9](https://github.com/ARM-software/armnn-mlperf/tree/master/docker/object-detection-armnn-tflite.debian-9) + +Even if you would like to run CK workflows natively (e.g. on an Arm-based +development board or Android phone), you may wish to have a quick look into the +latest Dockerfile's to check for latest updates e.g. system-specific +dependencies. + + +## Debian + +- Common tools and libraries. +- [Python](https://www.python.org/), [pip](https://pypi.org/project/pip/), [SciPy](https://www.scipy.org/), [Collective Knowledge](https://cknowledge.org) (CK). +- (Optional) [Android SDK](https://developer.android.com/studio/), [Android NDK](https://developer.android.com/ndk/). + +### Install common tools and libraries +```bash +$ sudo apt install git wget libz-dev curl cmake +$ sudo apt install gcc g++ autoconf autogen libtool +``` + +### Install Python 3 and the latest pip +```bash +$ sudo apt install python3 python3-pip +$ sudo python3 -m pip install --upgrade pip +``` +**NB:** Python 3 is needed for the [COCO API](https://github.com/cocodataset/cocoapi) +used to evaluate object detection accuracy on the [COCO dataset](http://cocodataset.org). + +**NB:** Care must be taken not to mix Python 3 and Python 2 packages. +If your system uses Python 2 by default, we recommend you prefix +all CK commands, for example, with `CK_PYTHON=python3` for CK to run under Python 3: +``` +$ python --version +Python 2.7.13 +$ ck python_version +2.7.13 (default, Sep 26 2018, 18:42:22) +[GCC 6.3.0 20170516] +$ CK_PYTHON=python3 ck python_version +3.5.3 (default, Sep 27 2018, 17:25:39) +[GCC 6.3.0 20170516] +``` +Similarly, if you use multiple Python 3 versions (e.g. 3.5 and 3.6), we recommend +you stick to one of them for consistency: +``` +$ CK_PYTHON=python3.5 ck python_version +3.5.2 (default, Nov 12 2018, 13:43:14) +[GCC 5.4.0 20160609] +$ CK_PYTHON=python3.6 ck python_version +3.6.7 (default, Oct 25 2018, 09:16:13) +[GCC 5.4.0 20160609] +``` + +### Install required Python 3 packages +Choose one of the following installation options: +1. system-wide via pip; +1. user-space via pip; +1. user-space via CK. + +With the first two options, packages get installed via pip and get registered +with CK later (typically, on the first run of a program). + +With the last option, packages also get installed via pip but get registered +with CK at the same time (so there is less chance of mixing things up). + +#### Option 1: system-wide installation via pip (under `/usr`) +```bash +$ sudo python3 -m pip install cython scipy==1.2.1 matplotlib pillow ck +``` +#### Option 2: user-space installation via pip (under `$HOME`) +```bash +$ python3 -m pip install cython scipy==1.2.1 matplotlib pillow ck --user +``` +#### Option 3: user-space installation via CK (under `$HOME` and `$CK_TOOLS`) +Install CK via pip (or [from GitHub](https://github.com/ctuning/ck#installation)): +```bash +$ python3 -m pip install ck --user +$ ck version +V1.9.7 +``` +Install and register Python packages with CK: +```bash +$ ck pull repo:ck-env +$ ck detect soft:compiler.python --full_path=`which python3` +$ ck install package --tags=lib,python-package,numpy +$ ck install package --tags=lib,python-package,scipy --force_version=1.2.1 +$ ck install package --tags=lib,python-package,matplotlib +$ ck install package --tags=lib,python-package,pillow +$ ck install package --tags=lib,python-package,cython +``` + +If the above dependencies have been installed on a clean system, you should be +able to inspect the registered CK environments e.g. as follows: +``` +$ ck show env --tags=python-package +Env UID: Target OS: Bits: Name: Version: Tags: + +4e82bab01c8ee3b7 linux-64 64 Python NumPy library 1.16.2 64bits,host-os-linux-64,lib,needs-python,needs-python-3.5.2,numpy,python-package,target-os-linux-64,v1,v1.16,v1.16.2,vmaster +66642698751a2fcf linux-64 64 Python SciPy library 1.2.1 64bits,host-os-linux-64,lib,needs-python,needs-python-3.5.2,python-package,scipy,target-os-linux-64,v1,v1.2,v1.2.1,vmaster +78e8a1bfb4eb052c linux-64 64 Python Matplotlib library 3.0.3 64bits,host-os-linux-64,lib,matplotlib,needs-python,needs-python-3.5.2,python-package,target-os-linux-64,v3,v3.0,v3.0.3,vmaster +a6f9c25377710f6f linux-64 64 Python Pillow library 6.0.0 64bits,PIL,host-os-linux-64,lib,needs-python,needs-python-3.5.2,pillow,python-package,target-os-linux-64,v6,v6.0,v6.0.0,vmaster +498dbe464d051b44 linux-64 64 Python Cython library 0.29.9 64bits,cython,host-os-linux-64,lib,needs-python,needs-python-3.5.2,python-package,target-os-linux-64,v0,v0.29,v0.29.9,vmaster + +$ ck cat env --tags=python-package | grep PYTHONPATH +export PYTHONPATH=/home/anton/CK_TOOLS/lib-python-numpy-compiler.python-3.5.2-linux-64/build:${PYTHONPATH} +export PYTHONPATH=/home/anton/CK_TOOLS/lib-python-scipy-compiler.python-3.5.2-linux-64/build:${PYTHONPATH} +export PYTHONPATH=/home/anton/CK_TOOLS/lib-python-matplotlib-compiler.python-3.5.2-linux-64/build:${PYTHONPATH} +export PYTHONPATH=/home/anton/CK_TOOLS/lib-python-pillow-compiler.python-3.5.2-linux-64/build:${PYTHONPATH} +export PYTHONPATH=/home/anton/CK_TOOLS/lib-python-cython-compiler.python-3.5.2-linux-64/build:${PYTHONPATH} +``` + + +## Install CK workflows + +### Pull CK repositories +```bash +$ ck pull repo:ck-mlperf +``` +**NB:** Transitive dependencies include [repo:ck-tensorflow](https://github.com/ctuning/ck-tensorflow). + +To update all CK repositories (e.g. after a bug fix): +``` +$ ck pull repo --all +``` + +### Install the COCO 2017 validation dataset (5,000 images) +```bash +$ ck install package --tags=object-detection,dataset,coco.2017,val,original,full +``` +**NB:** COCO dataset descriptions are in [repo:ck-env](https://github.com/ctuning/ck-env). + +**NB:** If you have previously installed the COCO 2017 validation dataset via CK to e.g. `$HOME/coco/`, you can simply detect it as follows: +```bash +$ ck detect soft:dataset.coco.2017.val --full_path=$HOME/coco/val2017/000000000139.jpg +``` +(CK also places annotations under `annotations/val2017/`.) + +### Preprocess the COCO 2017 validation dataset (first 50 images) +```bash +$ ck install package --tags=object-detection,dataset,coco.2017,preprocessed,first.50 +``` + +### Preprocess the COCO 2017 validation dataset (all 5,000 images) +```bash +$ ck install package --tags=object-detection,dataset,coco.2017,preprocessed,full +``` + + +## Benchmarking + +You can benchmark SSD-MobileNet using one of the available options: +- [via TensorFlow (Python)](tf-py/README.md) +- [via TensorFlow Lite](tflite/README.md) diff --git a/retired_benchmarks/vision/classification_and_detection/optional_harness_ck/detection/tf-py/README.md b/retired_benchmarks/vision/classification_and_detection/optional_harness_ck/detection/tf-py/README.md new file mode 100644 index 000000000..1dd5324ce --- /dev/null +++ b/retired_benchmarks/vision/classification_and_detection/optional_harness_ck/detection/tf-py/README.md @@ -0,0 +1,252 @@ +# Object Detection via TensorFlow (Python) + +1. [Installation instructions](#installation) +2. [Benchmarking instructions](#benchmarking) +3. [Reference accuracy](#accuracy) +4. [Further information](#further-info) + + + +## Installation instructions + +Please follow the common [installation instructions](../README.md#installation) first. + +### Install additional Python packages in user-space +``` +$ python -m pip install gast --user +$ python -m pip install astor --user +$ python -m pip install termcolor --user +$ python -m pip install tensorflow-estimator==1.13.0 --user +$ python -m pip install keras_applications==1.0.4 --no-deps --user +$ python -m pip install keras_preprocessing==1.0.2 --no-deps --user +``` + +### Install TensorFlow (Python) + +Install TensorFlow (Python) from an `x86_64` binary package: +```bash +$ ck install package:lib-tensorflow-1.13.1-cpu +``` +or from source: +```bash +$ ck install package:lib-tensorflow-1.13.1-src-cpu +``` + +### Install the SSD-MobileNet model + +To select interactively from one of the non-quantized and quantized SSD-MobileNet models: +``` +$ ck install package --tags=model,tf,object-detection,mlperf,ssd-mobilenet +``` + +#### Install the [non-quantized model](http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v1_coco_2018_01_28.tar.gz) directly +```bash +$ ck install package --tags=model,tf,object-detection,mlperf,ssd-mobilenet,non-quantized +``` + +#### Install the quantized finetuned model (courtesy of [Habana](https://habana.ai/)) directly +```bash +$ ck install package --tags=model,tf,object-detection,mlperf,ssd-mobilenet,quantized,finetuned +``` + +### Run the TensorFlow (Python) Object Detection client on 50 images +```bash +$ ck run program:object-detection-tf-py --env.CK_BATCH_COUNT=50 +... +******************************************************************************** +* Process results +******************************************************************************** + +Convert results to coco ... + +Evaluate metrics as coco ... +loading annotations into memory... +Done (t=0.55s) +creating index... +index created! +Loading and preparing results... +DONE (t=0.00s) +creating index... +index created! +Running per image evaluation... +Evaluate annotation type *bbox* +DONE (t=0.12s). +Accumulating evaluation results... +DONE (t=0.22s). + Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.315 + Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.439 + Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.331 + Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.064 + Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.184 + Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.689 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.296 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.322 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.323 + Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.066 + Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.187 + Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.711 + +Summary: +------------------------------- +Graph loaded in 0.748657s +All images loaded in 12.052568s +All images detected in 1.251245s +Average detection time: 0.025536s +mAP: 0.3148934914889957 +Recall: 0.3225293342489256 +-------------------------------- +``` + + +## Benchmarking instructions + +### Benchmark the performance +```bash +$ ck benchmark program:object-detection-tf-py \ +--repetitions=10 --env.CK_BATCH_SIZE=1 --env.CK_BATCH_COUNT=2 --env.CK_METRIC_TYPE=COCO \ +--record --record_repo=local --record_uoa=mlperf-object-detection-ssd-mobilenet-tf-py-performance \ +--tags=mlperf,object-detection,ssd-mobilenet,tf-py,performance \ +--skip_print_timers --skip_stat_analysis --process_multi_keys +``` + +**NB:** When using the batch count of **N**, the program runs object detection +on **N** images, but the slow first run is not taken into account when +computing the average detection time e.g.: +```bash +$ ck benchmark program:object-detection-tf-py \ +--repetitions=10 --env.CK_BATCH_SIZE=1 --env.CK_BATCH_COUNT=2 +... +Graph loaded in 0.7420s + +Detect image: 000000000139.jpg (1 of 2) +Detected in 1.9351s + +Detect image: 000000000285.jpg (2 of 2) +Detected in 0.0284s +... +Summary: +------------------------------- +Graph loaded in 0.741997s +All images loaded in 0.604377s +All images detected in 0.028387s +Average detection time: 0.028387s +mAP: 0.15445544554455443 +Recall: 0.15363636363636363 +-------------------------------- +``` + +### Benchmark the accuracy +```bash +$ ck benchmark program:object-detection-tf-py \ +--repetitions=1 --env.CK_BATCH_SIZE=1 --env.CK_BATCH_COUNT=5000 --env.CK_METRIC_TYPE=COCO \ +--record --record_repo=local --record_uoa=mlperf-object-detection-ssd-mobilenet-tf-py-accuracy \ +--tags=mlperf,object-detection,ssd-mobilenet,tf-py,accuracy \ +--skip_print_timers --skip_stat_analysis --process_multi_keys +``` + + +## Reference accuracy + +### SSD-MobileNet non-quantized +``` +******************************************************************************** +* Process results +******************************************************************************** + +Convert results to coco ... + +Evaluate metrics as coco ... +loading annotations into memory... +Done (t=0.49s) +creating index... +index created! +Loading and preparing results... +DONE (t=0.07s) +creating index... +index created! +Running per image evaluation... +Evaluate annotation type *bbox* +DONE (t=12.46s). +Accumulating evaluation results... +DONE (t=2.09s). + Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.231 + Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.349 + Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.252 + Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.018 + Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.166 + Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.531 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.209 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.262 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.263 + Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.023 + Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.190 + Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.604 + +Summary: +------------------------------- +Graph loaded in 0.753200s +All images loaded in 1193.981655s +All images detected in 123.461871s +Average detection time: 0.024697s +mAP: 0.23111107753357035 +Recall: 0.26304841188725403 +-------------------------------- +``` + +### SSD-MobileNet quantized finetuned +``` +******************************************************************************** +* Process results +******************************************************************************** + +Convert results to coco ... + +Evaluate metrics as coco ... +loading annotations into memory... +Done (t=0.48s) +creating index... +index created! +Loading and preparing results... +DONE (t=0.18s) +creating index... +index created! +Running per image evaluation... +Evaluate annotation type *bbox* +DONE (t=12.74s). +Accumulating evaluation results... +DONE (t=2.13s). + Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.236 + Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.361 + Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.259 + Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.019 + Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.166 + Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.546 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.212 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.268 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.269 + Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.025 + Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.191 + Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.618 + +Summary: +------------------------------- +Graph loaded in 1.091316s +All images loaded in 1150.996266s +All images detected in 122.103661s +Average detection time: 0.024426s +mAP: 0.23594222525632427 +Recall: 0.26864982712779556 +-------------------------------- +``` + + + +## Further information + +### Using Collective Knowledge +See the [common MobileNet instructions](../../../object_classification/mobilenets/README.md) for information on how to use Collective Knowledge +to learn about [the anatomy of a benchmark](../../../object_clasification/mobilenets/README.md#the-anatomy-of-a-benchmark), or +to inspect and visualize [experimental results](../../../object_clasification/mobilenets/README.md#inspecting-recorded-experimental-results). + +### Using the client program +See [`ck-tensorflow:program:object-detection-tf-py`](https://github.com/ctuning/ck-tensorflow/tree/master/program/object-detection-tf-py) for more details about the client program. diff --git a/retired_benchmarks/vision/classification_and_detection/optional_harness_ck/detection/tflite/README.md b/retired_benchmarks/vision/classification_and_detection/optional_harness_ck/detection/tflite/README.md new file mode 100644 index 000000000..9ffff4c78 --- /dev/null +++ b/retired_benchmarks/vision/classification_and_detection/optional_harness_ck/detection/tflite/README.md @@ -0,0 +1,183 @@ +# Object Detection via TensorFlow Lite + +1. [Installation instructions](#installation) +2. [Benchmarking instructions](#benchmarking) +3. [Reference accuracy](#accuracy) +4. [Further information](#further-info) + + + +## Installation instructions + +Please follow the common [installation instructions](../README.md#installation) first. + +### Install TensorFlow Lite (TFLite) + +Install TFLite from source: +``` +$ ck install package:lib-tflite-1.13.1-src-static [--target_os=android23-arm64] +``` + +**NB:** Currently we have no TFLite 1.13.1 prebuilt packages. +Please [let us know](info@dividiti.com) if you would like us to create some. + + +### Install the SSD-MobileNet models for TFLite + +To select interactively from one of the non-quantized and quantized SSD-MobileNets-v1-1.0-224 models adopted for MLPerf Inference v0.5: +``` +$ ck install package --tags=model,tflite,object-detection,mlperf,ssd-mobilenet +``` + +To install the non-quantized model directly: +``` +$ ck install package --tags=model,tflite,object-detection,mlperf,ssd-mobilenet,non-quantized +``` +**NB:** This TFLite model has been [converted](https://github.com/ctuning/ck-mlperf/blob/master/package/model-tflite-mlperf-ssd-mobilenet/README.md) from the [original TF model](http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v1_coco_2018_01_28.tar.gz), by adapting instructions in [Google's blog post](https://medium.com/tensorflow/training-and-serving-a-realtime-mobile-object-detector-in-30-minutes-with-cloud-tpus-b78971cf1193). + +### Compile the TFLite Object Detection client +``` +$ ck compile program:object-detection-tflite [--target_os=android23-arm64] +``` + +### Run the TFLite Object Detection client on 50 images + +Run the client (if required, connect an Android device to your host machine via USB): +``` +$ ck run program:object-detection-tflite --env.CK_BATCH_COUNT=50 \ +[--target_os=android23-arm64] +... +******************************************************************************** +* Process results +******************************************************************************** + +Convert results to coco ... + +Evaluate metrics as coco ... +loading annotations into memory... +Done (t=0.45s) +creating index... +index created! +Loading and preparing results... +DONE (t=0.00s) +creating index... +index created! +Running per image evaluation... +Evaluate annotation type *bbox* +DONE (t=0.11s). +Accumulating evaluation results... +DONE (t=0.22s). + Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.293 + Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.408 + Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.300 + Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.062 + Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.196 + Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.639 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.278 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.302 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.302 + Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.063 + Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.198 + Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.668 + +Summary: +------------------------------- +Graph loaded in 0.000000s +All images loaded in 0.000000s +All images detected in 0.000000s +Average detection time: 0.000000s +mAP: 0.2931519685807111 +Recall: 0.3022676916450782 +-------------------------------- +``` +**NB:** We are working on resolving the difference in mAP between the TF and +TFLite versions (31.5% vs. 29.3%), as well as resolving the timing issue (all +zeros). + + +## Benchmarking instructions + +### Benchmark the performance +```bash +$ ck benchmark program:object-detection-tflite \ +--repetitions=10 --env.CK_BATCH_SIZE=1 --env.CK_BATCH_COUNT=2 --env.CK_METRIC_TYPE=COCO \ +--record --record_repo=local --record_uoa=mlperf-object-detection-ssd-mobilenet-tflite-performance \ +--tags=mlperf,object-detection,ssd-mobilenet,tflite,performance \ +--skip_print_timers --skip_stat_analysis --process_multi_keys +``` +**NB:** When using the batch count of **N**, the program runs object detection +on **N** images, but the slow first run is not taken into account when +computing the average detection time. + +### Benchmark the accuracy +```bash +$ ck benchmark program:object-detection-tflite \ +--repetitions=1 --env.CK_BATCH_SIZE=1 --env.CK_BATCH_COUNT=5000 --env.CK_METRIC_TYPE=COCO \ +--record --record_repo=local --record_uoa=mlperf-object-detection-ssd-mobilenet-tflite-accuracy \ +--tags=mlperf,object-detection,ssd-mobilenet,tflite,accuracy \ +--skip_print_timers --skip_stat_analysis --process_multi_keys +``` + + +## Reference accuracy + +### SSD-MobileNet non-quantized +``` +******************************************************************************** +* Process results +******************************************************************************** + +Convert results to coco ... + +Evaluate metrics as coco ... +loading annotations into memory... +Done (t=0.50s) +creating index... +index created! +Loading and preparing results... +DONE (t=0.12s) +creating index... +index created! +Running per image evaluation... +Evaluate annotation type *bbox* +DONE (t=12.81s). +Accumulating evaluation results... +DONE (t=2.10s). + Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.223 + Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.341 + Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.247 + Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.015 + Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.160 + Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.515 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.203 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.255 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.255 + Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.019 + Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.182 + Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.593 + +Summary: +------------------------------- +Graph loaded in 0.000000s +All images loaded in 0.000000s +All images detected in 0.000000s +Average detection time: 0.000000s +mAP: 0.22349680978666922 +Recall: 0.2550505369422975 +-------------------------------- +``` +**NB:** We are working on resolving the difference in mAP between the TF and +TFLite versions (23.11% vs. 22.35%), as well as resolving the timing issue (all +zeros). Both versions [use the same parameters](https://github.com/ctuning/ck-mlperf/blob/master/package/model-tflite-mlperf-ssd-mobilenet/README.md): `score_threshold=0.3`, etc. + + + +## Further information + +### Using Collective Knowledge +See the [common MobileNet instructions](../../../object_classification/mobilenets/README.md) for information on how to use Collective Knowledge +to learn about [the anatomy of a benchmark](../../../object_clasification/mobilenets/README.md#the-anatomy-of-a-benchmark), or +to inspect and visualize [experimental results](../../../object_clasification/mobilenets/README.md#inspecting-recorded-experimental-results). + +### Using the client program +See [`ck-tensorflow:program:object-detection-tflite`](https://github.com/ctuning/ck-tensorflow/tree/master/program/object-detection-tflite) for more details about the client program. diff --git a/retired_benchmarks/vision/classification_and_detection/python/__init__.py b/retired_benchmarks/vision/classification_and_detection/python/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/retired_benchmarks/vision/classification_and_detection/python/backend.py b/retired_benchmarks/vision/classification_and_detection/python/backend.py new file mode 100755 index 000000000..955eddb88 --- /dev/null +++ b/retired_benchmarks/vision/classification_and_detection/python/backend.py @@ -0,0 +1,23 @@ +""" +abstract backend class +""" + + +# pylint: disable=unused-argument,missing-docstring + +class Backend(): + def __init__(self): + self.inputs = [] + self.outputs = [] + + def version(self): + raise NotImplementedError("Backend:version") + + def name(self): + raise NotImplementedError("Backend:name") + + def load(self, model_path, inputs=None, outputs=None): + raise NotImplementedError("Backend:load") + + def predict(self, feed): + raise NotImplementedError("Backend:predict") diff --git a/retired_benchmarks/vision/classification_and_detection/python/backend_null.py b/retired_benchmarks/vision/classification_and_detection/python/backend_null.py new file mode 100755 index 000000000..ed5817070 --- /dev/null +++ b/retired_benchmarks/vision/classification_and_detection/python/backend_null.py @@ -0,0 +1,34 @@ +""" +null backend +""" + +# pylint: disable=unused-argument,missing-docstring,useless-super-delegation + +import time + +import backend + + +class BackendNull(backend.Backend): + def __init__(self): + super(BackendNull, self).__init__() + + def version(self): + return "-" + + def name(self): + return "null" + + def image_format(self): + return "NHWC" + + def load(self, model_path, inputs=None, outputs=None): + self.outputs = ["output"] + self.inputs = ["input"] + return self + + def predict(self, feed): + # yield to give the thread that feeds our queue a chance to run + time.sleep(0) + # return something fake + return [[0]] diff --git a/retired_benchmarks/vision/classification_and_detection/python/backend_onnxruntime.py b/retired_benchmarks/vision/classification_and_detection/python/backend_onnxruntime.py new file mode 100755 index 000000000..66b8fdaae --- /dev/null +++ b/retired_benchmarks/vision/classification_and_detection/python/backend_onnxruntime.py @@ -0,0 +1,47 @@ +""" +onnxruntime backend (https://github.com/microsoft/onnxruntime) +""" + +# pylint: disable=unused-argument,missing-docstring,useless-super-delegation + +import onnxruntime as rt + +import backend + + +class BackendOnnxruntime(backend.Backend): + def __init__(self): + super(BackendOnnxruntime, self).__init__() + + def version(self): + return rt.__version__ + + def name(self): + """Name of the runtime.""" + return "onnxruntime" + + def image_format(self): + """image_format. For onnx it is always NCHW.""" + return "NCHW" + + def load(self, model_path, inputs=None, outputs=None): + """Load model and find input/outputs from the model file.""" + opt = rt.SessionOptions() + # enable level 3 optimizations + # FIXME: enable below once onnxruntime 0.5 is released + # opt.set_graph_optimization_level(3) + self.sess = rt.InferenceSession(model_path, opt) + # get input and output names + if not inputs: + self.inputs = [meta.name for meta in self.sess.get_inputs()] + else: + self.inputs = inputs + if not outputs: + self.outputs = [meta.name for meta in self.sess.get_outputs()] + else: + self.outputs = outputs + return self + + def predict(self, feed): + """Run the prediction.""" + return self.sess.run(self.outputs, feed) diff --git a/retired_benchmarks/vision/classification_and_detection/python/backend_pytorch.py b/retired_benchmarks/vision/classification_and_detection/python/backend_pytorch.py new file mode 100755 index 000000000..02b010ac7 --- /dev/null +++ b/retired_benchmarks/vision/classification_and_detection/python/backend_pytorch.py @@ -0,0 +1,64 @@ +""" +pytoch/caffe2 backend via onnx +https://pytorch.org/docs/stable/onnx.html +""" + +# pylint: disable=unused-argument,missing-docstring,useless-super-delegation + +from threading import Lock + +import caffe2.python.onnx.backend +import onnx +import torch # needed to get version and cuda setup + +import backend + + +class BackendPytorch(backend.Backend): + def __init__(self): + super(BackendPytorch, self).__init__() + self.sess = None + self.model = None + self.lock = Lock() + + def version(self): + return torch.__version__ + + def name(self): + return "pytorch" + + def image_format(self): + return "NCHW" + + def load(self, model_path, inputs=None, outputs=None): + self.model = onnx.load(model_path) + + # find inputs from the model if not passed in by config + if inputs: + self.inputs = inputs + else: + self.inputs = [] + initializers = set() + for i in self.model.graph.initializer: + initializers.add(i.name) + for i in self.model.graph.input: + if i.name not in initializers: + self.inputs.append(i.name) + # find outputs from the model if not passed in by config + if outputs: + self.outputs = outputs + else: + self.outputs = [] + for i in self.model.graph.output: + self.outputs.append(i.name) + + # prepare the backend + device = "CUDA:0" if torch.cuda.is_available() else "CPU" + self.sess = caffe2.python.onnx.backend.prepare(self.model, device) + return self + + def predict(self, feed): + self.lock.acquire() + res = self.sess.run(feed) + self.lock.release() + return res diff --git a/retired_benchmarks/vision/classification_and_detection/python/backend_pytorch_native.py b/retired_benchmarks/vision/classification_and_detection/python/backend_pytorch_native.py new file mode 100755 index 000000000..f631ac5d3 --- /dev/null +++ b/retired_benchmarks/vision/classification_and_detection/python/backend_pytorch_native.py @@ -0,0 +1,57 @@ +""" +pytoch native backend +""" +# pylint: disable=unused-argument,missing-docstring +import torch # currently supports pytorch1.0 +import backend + + + +class BackendPytorchNative(backend.Backend): + def __init__(self): + super(BackendPytorchNative, self).__init__() + self.sess = None + self.model = None + self.device = "cuda:0" if torch.cuda.is_available() else "cpu" + def version(self): + return torch.__version__ + + def name(self): + return "pytorch-native" + + def image_format(self): + return "NCHW" + + def load(self, model_path, inputs=None, outputs=None): + self.model = torch.load(model_path,map_location=lambda storage, loc: storage) + self.model.eval() + # find inputs from the model if not passed in by config + if inputs: + self.inputs = inputs + else: + self.inputs = [] + initializers = set() + for i in self.model.graph.initializer: + initializers.add(i.name) + for i in self.model.graph.input: + if i.name not in initializers: + self.inputs.append(i.name) + # find outputs from the model if not passed in by config + if outputs: + self.outputs = outputs + else: + self.outputs = [] + for i in self.model.graph.output: + self.outputs.append(i.name) + + # prepare the backend + self.model = self.model.to(self.device) + return self + + + def predict(self, feed): + key=[key for key in feed.keys()][0] + feed[key] = torch.tensor(feed[key]).float().to(self.device) + with torch.no_grad(): + output = self.model(feed[key]) + return output diff --git a/retired_benchmarks/vision/classification_and_detection/python/backend_tf.py b/retired_benchmarks/vision/classification_and_detection/python/backend_tf.py new file mode 100755 index 000000000..05245a454 --- /dev/null +++ b/retired_benchmarks/vision/classification_and_detection/python/backend_tf.py @@ -0,0 +1,63 @@ +""" +tensorflow backend (https://github.com/tensorflow/tensorflow) +""" + +# pylint: disable=unused-argument,missing-docstring,useless-super-delegation + +import tensorflow as tf +from tensorflow import dtypes +from tensorflow.python.tools.optimize_for_inference_lib import optimize_for_inference + +import os +import backend + + +class BackendTensorflow(backend.Backend): + def __init__(self): + super(BackendTensorflow, self).__init__() + + def version(self): + return tf.__version__ + "/" + tf.__git_version__ + + def name(self): + return "tensorflow" + + def image_format(self): + # By default tensorflow uses NHWC (and the cpu implementation only does NHWC) + return "NHWC" + + def load(self, model_path, inputs=None, outputs=None): + # there is no input/output meta data i the graph so it need to come from config. + if not inputs: + raise ValueError("BackendTensorflow needs inputs") + if not outputs: + raise ValueError("BackendTensorflow needs outputs") + self.outputs = outputs + self.inputs = inputs + + infer_config = tf.compat.v1.ConfigProto() + infer_config.intra_op_parallelism_threads = int(os.environ['TF_INTRA_OP_PARALLELISM_THREADS']) \ + if 'TF_INTRA_OP_PARALLELISM_THREADS' in os.environ else os.cpu_count() + infer_config.inter_op_parallelism_threads = int(os.environ['TF_INTER_OP_PARALLELISM_THREADS']) \ + if 'TF_INTER_OP_PARALLELISM_THREADS' in os.environ else os.cpu_count() + infer_config.use_per_session_threads = 1 + + # TODO: support checkpoint and saved_model formats? + graph_def = tf.compat.v1.GraphDef() + with tf.compat.v1.gfile.FastGFile(model_path, "rb") as f: + graph_def.ParseFromString(f.read()) + for as_datatype_enum in [dtypes.float32.as_datatype_enum, dtypes.uint8.as_datatype_enum]: + try: + optimized_graph_def = optimize_for_inference(graph_def, [item.split(':')[0] for item in inputs], + [item.split(':')[0] for item in outputs], as_datatype_enum, False) + graph_def = optimized_graph_def + break + except ValueError: + pass + + g = tf.compat.v1.import_graph_def(graph_def, name='') + self.sess = tf.compat.v1.Session(graph=g, config=infer_config) + return self + + def predict(self, feed): + return self.sess.run(self.outputs, feed_dict=feed) diff --git a/retired_benchmarks/vision/classification_and_detection/python/backend_tflite.py b/retired_benchmarks/vision/classification_and_detection/python/backend_tflite.py new file mode 100755 index 000000000..7c8c78c13 --- /dev/null +++ b/retired_benchmarks/vision/classification_and_detection/python/backend_tflite.py @@ -0,0 +1,62 @@ +""" +tflite backend (https://github.com/tensorflow/tensorflow/lite) +""" + +# pylint: disable=unused-argument,missing-docstring,useless-super-delegation + +from threading import Lock + +try: + # try dedicated tflite package first + import tflite_runtime + import tflite_runtime.interpreter as tflite + _version = tflite_runtime.__version__ + _git_version = tflite_runtime.__git_version__ +except: + # fall back to tflite bundled in tensorflow + import tensorflow as tf + from tensorflow.lite.python import interpreter as tflite + _version = tf.__version__ + _git_version = tf.__git_version__ + + +import backend + + +class BackendTflite(backend.Backend): + def __init__(self): + super(BackendTflite, self).__init__() + self.sess = None + self.lock = Lock() + + def version(self): + return _version + "/" + _git_version + + def name(self): + return "tflite" + + def image_format(self): + # tflite is always NHWC + return "NHWC" + + def load(self, model_path, inputs=None, outputs=None): + self.sess = tflite.Interpreter(model_path=model_path) + self.sess.allocate_tensors() + # keep input/output name to index mapping + self.input2index = {i["name"]: i["index"] for i in self.sess.get_input_details()} + self.output2index = {i["name"]: i["index"] for i in self.sess.get_output_details()} + # keep input/output names + self.inputs = list(self.input2index.keys()) + self.outputs = list(self.output2index.keys()) + return self + + def predict(self, feed): + self.lock.acquire() + # set inputs + for k, v in self.input2index.items(): + self.sess.set_tensor(v, feed[k]) + self.sess.invoke() + # get results + res = [self.sess.get_tensor(v) for _, v in self.output2index.items()] + self.lock.release() + return res diff --git a/retired_benchmarks/vision/classification_and_detection/python/coco.py b/retired_benchmarks/vision/classification_and_detection/python/coco.py new file mode 100644 index 000000000..408015874 --- /dev/null +++ b/retired_benchmarks/vision/classification_and_detection/python/coco.py @@ -0,0 +1,343 @@ +""" +implementation of imagenet dataset +""" + +# pylint: disable=unused-argument,missing-docstring + +import json +import logging +import os +import time + +import cv2 +import numpy as np +from pycocotools.cocoeval import COCOeval +import pycoco +import dataset + +logging.basicConfig(level=logging.INFO) +log = logging.getLogger("coco") + + +class Coco(dataset.Dataset): + def __init__(self, data_path, image_list, name, use_cache=0, image_size=None, + image_format="NHWC", pre_process=None, count=None, cache_dir=None,use_label_map=False): + super().__init__() + self.image_size = image_size + self.image_list = [] + self.label_list = [] + self.image_ids = [] + self.image_sizes = [] + self.count = count + self.use_cache = use_cache + self.data_path = data_path + self.pre_process = pre_process + self.use_label_map=use_label_map + if not cache_dir: + cache_dir = os.getcwd() + self.cache_dir = os.path.join(cache_dir, "preprocessed", name, image_format) + # input images are in HWC + self.need_transpose = True if image_format == "NCHW" else False + not_found = 0 + empty_80catageories = 0 + if image_list is None: + # by default look for val_map.txt + image_list = os.path.join(data_path, "annotations/instances_val2017.json") + self.annotation_file = image_list + if self.use_label_map: + # for pytorch + label_map = {} + with open(self.annotation_file) as fin: + annotations = json.load(fin) + for cnt, cat in enumerate(annotations["categories"]): + label_map[cat["id"]] = cnt + 1 + + os.makedirs(self.cache_dir, exist_ok=True) + start = time.time() + images = {} + with open(image_list, "r") as f: + coco = json.load(f) + for i in coco["images"]: + images[i["id"]] = {"file_name": i["file_name"], + "height": i["height"], + "width": i["width"], + "bbox": [], + "category": []} + for a in coco["annotations"]: + i = images.get(a["image_id"]) + if i is None: + continue + catagory_ids = label_map[a.get("category_id")] if self.use_label_map else a.get("category_id") + i["category"].append(catagory_ids) + i["bbox"].append(a.get("bbox")) + + for image_id, img in images.items(): + image_name = os.path.join("val2017", img["file_name"]) + src = os.path.join(data_path, image_name) + if not os.path.exists(src): + # if the image does not exists ignore it + not_found += 1 + continue + if len(img["category"])==0 and self.use_label_map: + #if an image doesn't have any of the 81 categories in it + empty_80catageories += 1 #should be 48 images - thus the validation sert has 4952 images + continue + + os.makedirs(os.path.dirname(os.path.join(self.cache_dir, image_name)), exist_ok=True) + dst = os.path.join(self.cache_dir, image_name) + if not os.path.exists(dst + ".npy"): + # cache a preprocessed version of the image + img_org = cv2.imread(src) + processed = self.pre_process(img_org, need_transpose=self.need_transpose, dims=self.image_size) + np.save(dst, processed) + + self.image_ids.append(image_id) + self.image_list.append(image_name) + self.image_sizes.append((img["height"], img["width"])) + self.label_list.append((img["category"], img["bbox"])) + + # limit the dataset if requested + if self.count and len(self.image_list) >= self.count: + break + + time_taken = time.time() - start + if not self.image_list: + log.error("no images in image list found") + raise ValueError("no images in image list found") + if not_found > 0: + log.info("reduced image list, %d images not found", not_found) + if empty_80catageories > 0: + log.info("reduced image list, %d images without any of the 80 categories", empty_80catageories) + + log.info("loaded {} images, cache={}, took={:.1f}sec".format( + len(self.image_list), use_cache, time_taken)) + + self.label_list = np.array(self.label_list) + + def get_item(self, nr): + """Get image by number in the list.""" + dst = os.path.join(self.cache_dir, self.image_list[nr]) + img = np.load(dst + ".npy") + return img, self.label_list[nr] + + def get_item_loc(self, nr): + src = os.path.join(self.data_path, self.image_list[nr]) + return src + + +class PostProcessCoco: + """ + Post processing for tensorflow ssd-mobilenet style models + """ + def __init__(self): + self.results = [] + self.good = 0 + self.total = 0 + self.content_ids = [] + self.use_inv_map = False + + def add_results(self, results): + self.results.extend(results) + + def __call__(self, results, ids, expected=None, result_dict=None, ): + # results come as: + # tensorflow, ssd-mobilenet: num_detections,detection_boxes,detection_scores,detection_classes + processed_results = [] + # batch size + bs = len(results[0]) + for idx in range(0, bs): + # keep the content_id from loadgen to handle content_id's without results + self.content_ids.append(ids[idx]) + processed_results.append([]) + detection_num = int(results[0][idx]) + detection_boxes = results[1][idx] + detection_classes = results[3][idx] + expected_classes = expected[idx][0] + for detection in range(0, detection_num): + detection_class = int(detection_classes[detection]) + if detection_class in expected_classes: + self.good += 1 + box = detection_boxes[detection] + processed_results[idx].append([float(ids[idx]), + box[0], box[1], box[2], box[3], + results[2][idx][detection], + float(detection_class)]) + self.total += 1 + return processed_results + + def start(self): + self.results = [] + self.good = 0 + self.total = 0 + + def finalize(self, result_dict, ds=None, output_dir=None): + result_dict["good"] += self.good + result_dict["total"] += self.total + + if self.use_inv_map: + # for pytorch + label_map = {} + with open(ds.annotation_file) as fin: + annotations = json.load(fin) + for cnt, cat in enumerate(annotations["categories"]): + label_map[cat["id"]] = cnt + 1 + inv_map = {v:k for k,v in label_map.items()} + + detections = [] + image_indices = [] + for batch in range(0, len(self.results)): + image_indices.append(self.content_ids[batch]) + for idx in range(0, len(self.results[batch])): + detection = self.results[batch][idx] + # this is the index of the coco image + image_idx = int(detection[0]) + if image_idx != self.content_ids[batch]: + # working with the coco index/id is error prone - extra check to make sure it is consistent + log.error("image_idx missmatch, lg={} / result={}".format(image_idx, self.content_ids[batch])) + # map the index to the coco image id + detection[0] = ds.image_ids[image_idx] + height, width = ds.image_sizes[image_idx] + # box comes from model as: ymin, xmin, ymax, xmax + ymin = detection[1] * height + xmin = detection[2] * width + ymax = detection[3] * height + xmax = detection[4] * width + # pycoco wants {imageID,x1,y1,w,h,score,class} + detection[1] = xmin + detection[2] = ymin + detection[3] = xmax - xmin + detection[4] = ymax - ymin + if self.use_inv_map: + cat_id = inv_map.get(int(detection[6]), -1) + if cat_id == -1: + # FIXME: + log.info("finalize can't map category {}".format(int(detection[6]))) + detection[6] = cat_id + detections.append(np.array(detection)) + + # map indices to coco image id's + image_ids = [ds.image_ids[i] for i in image_indices] + self.results = [] + cocoGt = pycoco.COCO(ds.annotation_file) + cocoDt = cocoGt.loadRes(np.array(detections)) + cocoEval = COCOeval(cocoGt, cocoDt, iouType='bbox') + cocoEval.params.imgIds = image_ids + cocoEval.evaluate() + cocoEval.accumulate() + cocoEval.summarize() + result_dict["mAP"] = cocoEval.stats[0] + + +class PostProcessCocoPt(PostProcessCoco): + """ + Post processing required by ssd-resnet34 / pytorch + """ + def __init__(self,use_inv_map,score_threshold): + super().__init__() + self.use_inv_map = use_inv_map + self.score_threshold = score_threshold + + def __call__(self, results, ids, expected=None, result_dict=None): + # results come as: + # detection_boxes,detection_classes,detection_scores + + processed_results = [] + # batch size + bs = len(results[0]) + for idx in range(0, bs): + self.content_ids.append(ids[idx]) + processed_results.append([]) + detection_boxes = results[0][idx] + detection_classes = results[1][idx] + expected_classes = expected[idx][0] + scores = results[2][idx] + #for detection in range(0, len(expected_classes)): + for detection in range(0, len(scores)): + if scores[detection] < self.score_threshold: + break + detection_class = int(detection_classes[detection]) + if detection_class in expected_classes: + self.good += 1 + box = detection_boxes[detection] + # comes from model as: 0=xmax 1=ymax 2=xmin 3=ymin + processed_results[idx].append([float(ids[idx]), + box[1], box[0], box[3], box[2], + scores[detection], + float(detection_class)]) + self.total += 1 + return processed_results + + +class PostProcessCocoOnnx(PostProcessCoco): + """ + Post processing required by ssd-resnet34 / onnx + """ + def __init__(self): + super().__init__() + + def __call__(self, results, ids, expected=None, result_dict=None): + # results come as: + # onnx (from pytorch ssd-resnet34): detection_boxes,detection_classes,detection_scores + + processed_results = [] + + # batch size + bs = len(results[0]) + for idx in range(0, bs): + self.content_ids.append(ids[idx]) + processed_results.append([]) + detection_boxes = results[0][idx] + detection_classes = results[1][idx] + expected_classes = expected[idx][0] + scores = results[2][idx] + for detection in range(0, len(scores)): + if scores[detection] < 0.5: + break + detection_class = int(detection_classes[detection]) + if detection_class in expected_classes: + self.good += 1 + box = detection_boxes[detection] + # comes from model as: 0=xmax 1=ymax 2=xmin 3=ymin + processed_results[idx].append([float(ids[idx]), + box[1], box[0], box[3], box[2], + scores[detection], + float(detection_class)]) + self.total += 1 + return processed_results + +class PostProcessCocoTf(PostProcessCoco): + """ + Post processing required by ssd-resnet34 / pytorch + """ + def __init__(self): + super().__init__() + self.use_inv_map = True + + def __call__(self, results, ids, expected=None, result_dict=None): + # results come as: + # detection_boxes,detection_classes,detection_scores + + processed_results = [] + # batch size + bs = len(results[0]) + for idx in range(0, bs): + self.content_ids.append(ids[idx]) + processed_results.append([]) + detection_boxes = results[0][idx] + detection_classes = results[1][idx] + expected_classes = expected[idx][0] + scores = results[2][idx] + for detection in range(0, len(scores)): + if scores[detection] < 0.05: + break + detection_class = int(detection_classes[detection]) + if detection_class in expected_classes: + self.good += 1 + box = detection_boxes[detection] + # comes from model as: 0=xmax 1=ymax 2=xmin 3=ymin + processed_results[idx].append([float(ids[idx]), + box[0], box[1], box[2], box[3], + scores[detection], + float(detection_class)]) + self.total += 1 + return processed_results diff --git a/retired_benchmarks/vision/classification_and_detection/python/dataset.py b/retired_benchmarks/vision/classification_and_detection/python/dataset.py new file mode 100755 index 000000000..dce968a3d --- /dev/null +++ b/retired_benchmarks/vision/classification_and_detection/python/dataset.py @@ -0,0 +1,273 @@ +""" +dataset related classes and methods +""" + +# pylint: disable=unused-argument,missing-docstring + +import logging +import sys +import time + +import cv2 +import numpy as np + + +logging.basicConfig(level=logging.INFO) +log = logging.getLogger("dataset") + +class Item(): + def __init__(self, label, img, idx): + self.label = label + self.img = img + self.idx = idx + self.start = time.time() + + +def usleep(sec): + if sys.platform == 'win32': + # on windows time.sleep() doesn't work to well + import ctypes + kernel32 = ctypes.windll.kernel32 + timer = kernel32.CreateWaitableTimerA(ctypes.c_void_p(), True, ctypes.c_void_p()) + delay = ctypes.c_longlong(int(-1 * (10 * 1000000 * sec))) + kernel32.SetWaitableTimer(timer, ctypes.byref(delay), 0, ctypes.c_void_p(), ctypes.c_void_p(), False) + kernel32.WaitForSingleObject(timer, 0xffffffff) + else: + time.sleep(sec) + + +class Dataset(): + def __init__(self): + self.arrival = None + self.image_list = [] + self.label_list = [] + self.image_list_inmemory = {} + self.last_loaded = -1 + + def preprocess(self, use_cache=True): + raise NotImplementedError("Dataset:preprocess") + + def get_item_count(self): + return len(self.image_list) + + def get_list(self): + raise NotImplementedError("Dataset:get_list") + + def load_query_samples(self, sample_list): + self.image_list_inmemory = {} + for sample in sample_list: + self.image_list_inmemory[sample], _ = self.get_item(sample) + self.last_loaded = time.time() + + def unload_query_samples(self, sample_list): + if sample_list: + for sample in sample_list: + if sample in self.image_list_inmemory : + del self.image_list_inmemory[sample] + else: + self.image_list_inmemory = {} + + def get_samples(self, id_list): + data = np.array([self.image_list_inmemory[id] for id in id_list]) + return data, self.label_list[id_list] + + def get_item_loc(self, id): + raise NotImplementedError("Dataset:get_item_loc") + + +# +# Post processing +# +class PostProcessCommon: + def __init__(self, offset=0): + self.offset = offset + self.good = 0 + self.total = 0 + + def __call__(self, results, ids, expected=None, result_dict=None): + processed_results = [] + n = len(results[0]) + for idx in range(0, n): + result = results[0][idx] + self.offset + processed_results.append([result]) + if result == expected[idx]: + self.good += 1 + self.total += n + return processed_results + + def add_results(self, results): + pass + + def start(self): + self.good = 0 + self.total = 0 + + def finalize(self, results, ds=False, output_dir=None): + results["good"] = self.good + results["total"] = self.total + + +class PostProcessArgMax: + def __init__(self, offset=0): + self.offset = offset + self.good = 0 + self.total = 0 + + def __call__(self, results, ids, expected=None, result_dict=None): + processed_results = [] + results = np.argmax(results[0], axis=1) + n = results.shape[0] + for idx in range(0, n): + result = results[idx] + self.offset + processed_results.append([result]) + if result == expected[idx]: + self.good += 1 + self.total += n + return processed_results + + def add_results(self, results): + pass + + def start(self): + self.good = 0 + self.total = 0 + + def finalize(self, results, ds=False, output_dir=None): + results["good"] = self.good + results["total"] = self.total + + +# +# pre-processing +# + +def center_crop(img, out_height, out_width): + height, width, _ = img.shape + left = int((width - out_width) / 2) + right = int((width + out_width) / 2) + top = int((height - out_height) / 2) + bottom = int((height + out_height) / 2) + img = img[top:bottom, left:right] + return img + + +def resize_with_aspectratio(img, out_height, out_width, scale=87.5, inter_pol=cv2.INTER_LINEAR): + height, width, _ = img.shape + new_height = int(100. * out_height / scale) + new_width = int(100. * out_width / scale) + if height > width: + w = new_width + h = int(new_height * height / width) + else: + h = new_height + w = int(new_width * width / height) + img = cv2.resize(img, (w, h), interpolation=inter_pol) + return img + + +def pre_process_vgg(img, dims=None, need_transpose=False): + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + + output_height, output_width, _ = dims + cv2_interpol = cv2.INTER_AREA + img = resize_with_aspectratio(img, output_height, output_width, inter_pol=cv2_interpol) + img = center_crop(img, output_height, output_width) + img = np.asarray(img, dtype='float32') + + # normalize image + means = np.array([123.68, 116.78, 103.94], dtype=np.float32) + img -= means + + # transpose if needed + if need_transpose: + img = img.transpose([2, 0, 1]) + return img + + +def pre_process_mobilenet(img, dims=None, need_transpose=False): + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + + output_height, output_width, _ = dims + img = resize_with_aspectratio(img, output_height, output_width, inter_pol=cv2.INTER_LINEAR) + img = center_crop(img, output_height, output_width) + img = np.asarray(img, dtype='float32') + + img /= 255.0 + img -= 0.5 + img *= 2 + + # transpose if needed + if need_transpose: + img = img.transpose([2, 0, 1]) + return img + + +def pre_process_imagenet_pytorch(img, dims=None, need_transpose=False): + from PIL import Image + import torchvision.transforms.functional as F + + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + img = Image.fromarray(img) + img = F.resize(img, 256, Image.BILINEAR) + img = F.center_crop(img, 224) + img = F.to_tensor(img) + img = F.normalize(img, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], inplace=False) + if not need_transpose: + img = img.permute(1, 2, 0) # NHWC + img = np.asarray(img, dtype='float32') + return img + + +def maybe_resize(img, dims): + img = np.array(img, dtype=np.float32) + if len(img.shape) < 3 or img.shape[2] != 3: + # some images might be grayscale + img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB) + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + if dims != None: + im_height, im_width, _ = dims + img = cv2.resize(img, (im_width, im_height), interpolation=cv2.INTER_LINEAR) + return img + + +def pre_process_coco_mobilenet(img, dims=None, need_transpose=False): + img = maybe_resize(img, dims) + img = np.asarray(img, dtype=np.uint8) + # transpose if needed + if need_transpose: + img = img.transpose([2, 0, 1]) + return img + + +def pre_process_coco_pt_mobilenet(img, dims=None, need_transpose=False): + img = maybe_resize(img, dims) + img -= 127.5 + img /= 127.5 + # transpose if needed + if need_transpose: + img = img.transpose([2, 0, 1]) + return img + + +def pre_process_coco_resnet34(img, dims=None, need_transpose=False): + img = maybe_resize(img, dims) + mean = np.array([0.485, 0.456, 0.406], dtype=np.float32) + std = np.array([0.229, 0.224, 0.225], dtype=np.float32) + + img = img / 255. - mean + img = img / std + + if need_transpose: + img = img.transpose([2, 0, 1]) + + return img + + +def pre_process_coco_resnet34_tf(img, dims=None, need_transpose=False): + img = maybe_resize(img, dims) + mean = np.array([123.68, 116.78, 103.94], dtype=np.float32) + img = img - mean + if need_transpose: + img = img.transpose([2, 0, 1]) + + return img diff --git a/retired_benchmarks/vision/classification_and_detection/python/imagenet.py b/retired_benchmarks/vision/classification_and_detection/python/imagenet.py new file mode 100755 index 000000000..57865c6b7 --- /dev/null +++ b/retired_benchmarks/vision/classification_and_detection/python/imagenet.py @@ -0,0 +1,95 @@ +""" +implementation of imagenet dataset +""" + +# pylint: disable=unused-argument,missing-docstring + +import logging +import os +import re +import time + +import cv2 +import numpy as np + +import dataset + +logging.basicConfig(level=logging.INFO) +log = logging.getLogger("imagenet") + + +class Imagenet(dataset.Dataset): + + def __init__(self, data_path, image_list, name, use_cache=0, image_size=None, + image_format="NHWC", pre_process=None, count=None, cache_dir=None): + super(Imagenet, self).__init__() + if image_size is None: + self.image_size = [224, 224, 3] + else: + self.image_size = image_size + if not cache_dir: + cache_dir = os.getcwd() + self.image_list = [] + self.label_list = [] + self.count = count + self.use_cache = use_cache + self.cache_dir = os.path.join(cache_dir, "preprocessed", name, image_format) + self.data_path = data_path + self.pre_process = pre_process + # input images are in HWC + self.need_transpose = True if image_format == "NCHW" else False + + not_found = 0 + if image_list is None: + # by default look for val_map.txt + image_list = os.path.join(data_path, "val_map.txt") + + os.makedirs(self.cache_dir, exist_ok=True) + + start = time.time() + with open(image_list, 'r') as f: + for s in f: + image_name, label = re.split(r"\s+", s.strip()) + src = os.path.join(data_path, image_name) + if not os.path.exists(src): + # if the image does not exists ignore it + not_found += 1 + continue + os.makedirs(os.path.dirname(os.path.join(self.cache_dir, image_name)), exist_ok=True) + dst = os.path.join(self.cache_dir, image_name) + if not os.path.exists(dst + ".npy"): + # cache a preprocessed version of the image + # TODO: make this multi threaded ? + img_org = cv2.imread(src) + processed = self.pre_process(img_org, need_transpose=self.need_transpose, dims=self.image_size) + np.save(dst, processed) + + self.image_list.append(image_name) + self.label_list.append(int(label)) + + # limit the dataset if requested + if self.count and len(self.image_list) >= self.count: + break + + time_taken = time.time() - start + if not self.image_list: + log.error("no images in image list found") + raise ValueError("no images in image list found") + if not_found > 0: + log.info("reduced image list, %d images not found", not_found) + + log.info("loaded {} images, cache={}, took={:.1f}sec".format( + len(self.image_list), use_cache, time_taken)) + + self.label_list = np.array(self.label_list) + + def get_item(self, nr): + """Get image by number in the list.""" + dst = os.path.join(self.cache_dir, self.image_list[nr]) + img = np.load(dst + ".npy") + return img, self.label_list[nr] + + def get_item_loc(self, nr): + src = os.path.join(self.data_path, self.image_list[nr]) + return src + diff --git a/retired_benchmarks/vision/classification_and_detection/python/main.py b/retired_benchmarks/vision/classification_and_detection/python/main.py new file mode 100755 index 000000000..904a8b74a --- /dev/null +++ b/retired_benchmarks/vision/classification_and_detection/python/main.py @@ -0,0 +1,565 @@ +""" +mlperf inference benchmarking tool +""" + +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +import argparse +import array +import collections +import json +import logging +import os +import sys +import threading +import time +from queue import Queue + +import mlperf_loadgen as lg +import numpy as np + +import dataset +import imagenet +import coco + +logging.basicConfig(level=logging.INFO) +log = logging.getLogger("main") + +NANO_SEC = 1e9 +MILLI_SEC = 1000 + +# pylint: disable=missing-docstring + +# the datasets we support +SUPPORTED_DATASETS = { + "imagenet": + (imagenet.Imagenet, dataset.pre_process_vgg, dataset.PostProcessCommon(offset=-1), + {"image_size": [224, 224, 3]}), + "imagenet_mobilenet": + (imagenet.Imagenet, dataset.pre_process_mobilenet, dataset.PostProcessArgMax(offset=-1), + {"image_size": [224, 224, 3]}), + "imagenet_pytorch": + (imagenet.Imagenet, dataset.pre_process_imagenet_pytorch, dataset.PostProcessArgMax(offset=0), + {"image_size": [224, 224, 3]}), + "coco-300": + (coco.Coco, dataset.pre_process_coco_mobilenet, coco.PostProcessCoco(), + {"image_size": [300, 300, 3]}), + "coco-300-pt": + (coco.Coco, dataset.pre_process_coco_pt_mobilenet, coco.PostProcessCocoPt(False,0.3), + {"image_size": [300, 300, 3]}), + "coco-1200": + (coco.Coco, dataset.pre_process_coco_resnet34, coco.PostProcessCoco(), + {"image_size": [1200, 1200, 3]}), + "coco-1200-onnx": + (coco.Coco, dataset.pre_process_coco_resnet34, coco.PostProcessCocoOnnx(), + {"image_size": [1200, 1200, 3]}), + "coco-1200-pt": + (coco.Coco, dataset.pre_process_coco_resnet34, coco.PostProcessCocoPt(True,0.05), + {"image_size": [1200, 1200, 3],"use_label_map": True}), + "coco-1200-tf": + (coco.Coco, dataset.pre_process_coco_resnet34, coco.PostProcessCocoTf(), + {"image_size": [1200, 1200, 3],"use_label_map": False}), +} + +# pre-defined command line options so simplify things. They are used as defaults and can be +# overwritten from command line + +SUPPORTED_PROFILES = { + "defaults": { + "dataset": "imagenet", + "backend": "tensorflow", + "cache": 0, + "max-batchsize": 32, + }, + + # resnet + "resnet50-tf": { + "inputs": "input_tensor:0", + "outputs": "ArgMax:0", + "dataset": "imagenet", + "backend": "tensorflow", + "model-name": "resnet50", + }, + "resnet50-pytorch": { + "inputs": "image", + "outputs": "ArgMax:0", + "dataset": "imagenet", + "backend": "tensorflow", + "model-name": "resnet50", + }, + "resnet50-onnxruntime": { + "dataset": "imagenet", + "outputs": "ArgMax:0", + "backend": "onnxruntime", + "model-name": "resnet50", + }, + + # mobilenet + "mobilenet-tf": { + "inputs": "input:0", + "outputs": "MobilenetV1/Predictions/Reshape_1:0", + "dataset": "imagenet_mobilenet", + "backend": "tensorflow", + "model-name": "mobilenet", + }, + "mobilenet-onnxruntime": { + "dataset": "imagenet_mobilenet", + "outputs": "MobilenetV1/Predictions/Reshape_1:0", + "backend": "onnxruntime", + "model-name": "mobilenet", + }, + + # ssd-mobilenet + "ssd-mobilenet-tf": { + "inputs": "image_tensor:0", + "outputs": "num_detections:0,detection_boxes:0,detection_scores:0,detection_classes:0", + "dataset": "coco-300", + "backend": "tensorflow", + "model-name": "ssd-mobilenet", + }, + "ssd-mobilenet-pytorch": { + "inputs": "image", + "outputs": "bboxes,labels,scores", + "dataset": "coco-300-pt", + "backend": "pytorch-native", + "model-name": "ssd-mobilenet", + }, + "ssd-mobilenet-onnxruntime": { + "dataset": "coco-300", + "outputs": "num_detections:0,detection_boxes:0,detection_scores:0,detection_classes:0", + "backend": "onnxruntime", + "data-format": "NHWC", + "model-name": "ssd-mobilenet", + }, + + # ssd-resnet34 + "ssd-resnet34-tf": { + "inputs": "image:0", + "outputs": "detection_bboxes:0,detection_classes:0,detection_scores:0", + "dataset": "coco-1200-tf", + "backend": "tensorflow", + "data-format": "NCHW", + "model-name": "ssd-resnet34", + }, + "ssd-resnet34-pytorch": { + "inputs": "image", + "outputs": "bboxes,labels,scores", + "dataset": "coco-1200-pt", + "backend": "pytorch-native", + "model-name": "ssd-resnet34", + }, + "ssd-resnet34-onnxruntime": { + "dataset": "coco-1200-onnx", + "inputs": "image", + "outputs": "bboxes,labels,scores", + "backend": "onnxruntime", + "data-format": "NCHW", + "max-batchsize": 1, + "model-name": "ssd-resnet34", + }, + "ssd-resnet34-onnxruntime-tf": { + "dataset": "coco-1200-tf", + "inputs": "image:0", + "outputs": "detection_bboxes:0,detection_classes:0,detection_scores:0", + "backend": "onnxruntime", + "data-format": "NHWC", + "model-name": "ssd-resnet34", + }, +} + +SCENARIO_MAP = { + "SingleStream": lg.TestScenario.SingleStream, + "MultiStream": lg.TestScenario.MultiStream, + "Server": lg.TestScenario.Server, + "Offline": lg.TestScenario.Offline, +} + +last_timeing = [] + + +def get_args(): + """Parse commandline.""" + parser = argparse.ArgumentParser() + parser.add_argument("--dataset", choices=SUPPORTED_DATASETS.keys(), help="dataset") + parser.add_argument("--dataset-path", required=True, help="path to the dataset") + parser.add_argument("--dataset-list", help="path to the dataset list") + parser.add_argument("--data-format", choices=["NCHW", "NHWC"], help="data format") + parser.add_argument("--profile", choices=SUPPORTED_PROFILES.keys(), help="standard profiles") + parser.add_argument("--scenario", default="SingleStream", + help="mlperf benchmark scenario, one of " + str(list(SCENARIO_MAP.keys()))) + parser.add_argument("--max-batchsize", type=int, help="max batch size in a single inference") + parser.add_argument("--model", required=True, help="model file") + parser.add_argument("--output", default="output", help="test results") + parser.add_argument("--inputs", help="model inputs") + parser.add_argument("--outputs", help="model outputs") + parser.add_argument("--backend", help="runtime to use") + parser.add_argument("--model-name", help="name of the mlperf model, ie. resnet50") + parser.add_argument("--threads", default=os.cpu_count(), type=int, help="threads") + parser.add_argument("--qps", type=int, help="target qps") + parser.add_argument("--cache", type=int, default=0, help="use cache") + parser.add_argument("--accuracy", action="store_true", help="enable accuracy pass") + parser.add_argument("--find-peak-performance", action="store_true", help="enable finding peak performance pass") + parser.add_argument("--debug", action="store_true", help="debug, turn traces on") + + # file to use mlperf rules compliant parameters + parser.add_argument("--mlperf_conf", default="../../mlperf.conf", help="mlperf rules config") + # file for user LoadGen settings such as target QPS + parser.add_argument("--user_conf", default="user.conf", help="user config for user LoadGen settings such as target QPS") + + # below will override mlperf rules compliant settings - don't use for official submission + parser.add_argument("--time", type=int, help="time to scan in seconds") + parser.add_argument("--count", type=int, help="dataset items to use") + parser.add_argument("--max-latency", type=float, help="mlperf max latency in pct tile") + parser.add_argument("--samples-per-query", type=int, help="mlperf multi-stream sample per query") + args = parser.parse_args() + + # don't use defaults in argparser. Instead we default to a dict, override that with a profile + # and take this as default unless command line give + defaults = SUPPORTED_PROFILES["defaults"] + + if args.profile: + profile = SUPPORTED_PROFILES[args.profile] + defaults.update(profile) + for k, v in defaults.items(): + kc = k.replace("-", "_") + if getattr(args, kc) is None: + setattr(args, kc, v) + if args.inputs: + args.inputs = args.inputs.split(",") + if args.outputs: + args.outputs = args.outputs.split(",") + + if args.scenario not in SCENARIO_MAP: + parser.error("valid scanarios:" + str(list(SCENARIO_MAP.keys()))) + return args + + +def get_backend(backend): + if backend == "tensorflow": + from backend_tf import BackendTensorflow + backend = BackendTensorflow() + elif backend == "onnxruntime": + from backend_onnxruntime import BackendOnnxruntime + backend = BackendOnnxruntime() + elif backend == "null": + from backend_null import BackendNull + backend = BackendNull() + elif backend == "pytorch": + from backend_pytorch import BackendPytorch + backend = BackendPytorch() + elif backend == "pytorch-native": + from backend_pytorch_native import BackendPytorchNative + backend = BackendPytorchNative() + elif backend == "tflite": + from backend_tflite import BackendTflite + backend = BackendTflite() + else: + raise ValueError("unknown backend: " + backend) + return backend + + +class Item: + """An item that we queue for processing by the thread pool.""" + + def __init__(self, query_id, content_id, img, label=None): + self.query_id = query_id + self.content_id = content_id + self.img = img + self.label = label + self.start = time.time() + + +class RunnerBase: + def __init__(self, model, ds, threads, post_proc=None, max_batchsize=128): + self.take_accuracy = False + self.ds = ds + self.model = model + self.post_process = post_proc + self.threads = threads + self.take_accuracy = False + self.max_batchsize = max_batchsize + self.result_timing = [] + + def handle_tasks(self, tasks_queue): + pass + + def start_run(self, result_dict, take_accuracy): + self.result_dict = result_dict + self.result_timing = [] + self.take_accuracy = take_accuracy + self.post_process.start() + + def run_one_item(self, qitem): + # run the prediction + processed_results = [] + try: + results = self.model.predict({self.model.inputs[0]: qitem.img}) + processed_results = self.post_process(results, qitem.content_id, qitem.label, self.result_dict) + if self.take_accuracy: + self.post_process.add_results(processed_results) + self.result_timing.append(time.time() - qitem.start) + except Exception as ex: # pylint: disable=broad-except + src = [self.ds.get_item_loc(i) for i in qitem.content_id] + log.error("thread: failed on contentid=%s, %s", src, ex) + # since post_process will not run, fake empty responses + processed_results = [[]] * len(qitem.query_id) + finally: + response_array_refs = [] + response = [] + for idx, query_id in enumerate(qitem.query_id): + response_array = array.array("B", np.array(processed_results[idx], np.float32).tobytes()) + response_array_refs.append(response_array) + bi = response_array.buffer_info() + response.append(lg.QuerySampleResponse(query_id, bi[0], bi[1])) + lg.QuerySamplesComplete(response) + + def enqueue(self, query_samples): + idx = [q.index for q in query_samples] + query_id = [q.id for q in query_samples] + if len(query_samples) < self.max_batchsize: + data, label = self.ds.get_samples(idx) + self.run_one_item(Item(query_id, idx, data, label)) + else: + bs = self.max_batchsize + for i in range(0, len(idx), bs): + data, label = self.ds.get_samples(idx[i:i+bs]) + self.run_one_item(Item(query_id[i:i+bs], idx[i:i+bs], data, label)) + + def finish(self): + pass + + +class QueueRunner(RunnerBase): + def __init__(self, model, ds, threads, post_proc=None, max_batchsize=128): + super().__init__(model, ds, threads, post_proc, max_batchsize) + self.tasks = Queue(maxsize=threads * 4) + self.workers = [] + self.result_dict = {} + + for _ in range(self.threads): + worker = threading.Thread(target=self.handle_tasks, args=(self.tasks,)) + worker.daemon = True + self.workers.append(worker) + worker.start() + + def handle_tasks(self, tasks_queue): + """Worker thread.""" + while True: + qitem = tasks_queue.get() + if qitem is None: + # None in the queue indicates the parent want us to exit + tasks_queue.task_done() + break + self.run_one_item(qitem) + tasks_queue.task_done() + + def enqueue(self, query_samples): + idx = [q.index for q in query_samples] + query_id = [q.id for q in query_samples] + if len(query_samples) < self.max_batchsize: + data, label = self.ds.get_samples(idx) + self.tasks.put(Item(query_id, idx, data, label)) + else: + bs = self.max_batchsize + for i in range(0, len(idx), bs): + ie = i + bs + data, label = self.ds.get_samples(idx[i:ie]) + self.tasks.put(Item(query_id[i:ie], idx[i:ie], data, label)) + + def finish(self): + # exit all threads + for _ in self.workers: + self.tasks.put(None) + for worker in self.workers: + worker.join() + + +def add_results(final_results, name, result_dict, result_list, took, show_accuracy=False): + percentiles = [50., 80., 90., 95., 99., 99.9] + buckets = np.percentile(result_list, percentiles).tolist() + buckets_str = ",".join(["{}:{:.4f}".format(p, b) for p, b in zip(percentiles, buckets)]) + + if result_dict["total"] == 0: + result_dict["total"] = len(result_list) + + # this is what we record for each run + result = { + "took": took, + "mean": np.mean(result_list), + "percentiles": {str(k): v for k, v in zip(percentiles, buckets)}, + "qps": len(result_list) / took, + "count": len(result_list), + "good_items": result_dict["good"], + "total_items": result_dict["total"], + } + acc_str = "" + if show_accuracy: + result["accuracy"] = 100. * result_dict["good"] / result_dict["total"] + acc_str = ", acc={:.3f}%".format(result["accuracy"]) + if "mAP" in result_dict: + result["mAP"] = 100. * result_dict["mAP"] + acc_str += ", mAP={:.3f}%".format(result["mAP"]) + + # add the result to the result dict + final_results[name] = result + + # to stdout + print("{} qps={:.2f}, mean={:.4f}, time={:.3f}{}, queries={}, tiles={}".format( + name, result["qps"], result["mean"], took, acc_str, + len(result_list), buckets_str)) + + +def main(): + global last_timeing + args = get_args() + + log.info(args) + + # find backend + backend = get_backend(args.backend) + + # override image format if given + image_format = args.data_format if args.data_format else backend.image_format() + + # --count applies to accuracy mode only and can be used to limit the number of images + # for testing. For perf model we always limit count to 200. + count_override = False + count = args.count + if count: + count_override = True + + # dataset to use + wanted_dataset, pre_proc, post_proc, kwargs = SUPPORTED_DATASETS[args.dataset] + ds = wanted_dataset(data_path=args.dataset_path, + image_list=args.dataset_list, + name=args.dataset, + image_format=image_format, + pre_process=pre_proc, + use_cache=args.cache, + count=count, **kwargs) + # load model to backend + model = backend.load(args.model, inputs=args.inputs, outputs=args.outputs) + final_results = { + "runtime": model.name(), + "version": model.version(), + "time": int(time.time()), + "cmdline": str(args), + } + + mlperf_conf = os.path.abspath(args.mlperf_conf) + if not os.path.exists(mlperf_conf): + log.error("{} not found".format(mlperf_conf)) + sys.exit(1) + + user_conf = os.path.abspath(args.user_conf) + if not os.path.exists(user_conf): + log.error("{} not found".format(user_conf)) + sys.exit(1) + + if args.output: + output_dir = os.path.abspath(args.output) + os.makedirs(output_dir, exist_ok=True) + os.chdir(output_dir) + + # + # make one pass over the dataset to validate accuracy + # + count = ds.get_item_count() + + # warmup + ds.load_query_samples([0]) + for _ in range(5): + img, _ = ds.get_samples([0]) + _ = backend.predict({backend.inputs[0]: img}) + ds.unload_query_samples(None) + + scenario = SCENARIO_MAP[args.scenario] + runner_map = { + lg.TestScenario.SingleStream: RunnerBase, + lg.TestScenario.MultiStream: QueueRunner, + lg.TestScenario.Server: QueueRunner, + lg.TestScenario.Offline: QueueRunner + } + runner = runner_map[scenario](model, ds, args.threads, post_proc=post_proc, max_batchsize=args.max_batchsize) + + def issue_queries(query_samples): + runner.enqueue(query_samples) + + def flush_queries(): + pass + + def process_latencies(latencies_ns): + # called by loadgen to show us the recorded latencies + global last_timeing + last_timeing = [t / NANO_SEC for t in latencies_ns] + + log_output_settings = lg.LogOutputSettings() + log_output_settings.outdir = output_dir + log_output_settings.copy_summary_to_stdout = False + log_settings = lg.LogSettings() + log_settings.enable_trace = args.debug + log_settings.log_output = log_output_settings + + settings = lg.TestSettings() + settings.FromConfig(mlperf_conf, args.model_name, args.scenario) + settings.FromConfig(user_conf, args.model_name, args.scenario) + settings.scenario = scenario + settings.mode = lg.TestMode.PerformanceOnly + if args.accuracy: + settings.mode = lg.TestMode.AccuracyOnly + if args.find_peak_performance: + settings.mode = lg.TestMode.FindPeakPerformance + + if args.time: + # override the time we want to run + settings.min_duration_ms = args.time * MILLI_SEC + settings.max_duration_ms = args.time * MILLI_SEC + + if args.qps: + qps = float(args.qps) + settings.server_target_qps = qps + settings.offline_expected_qps = qps + + if count_override: + settings.min_query_count = count + settings.max_query_count = count + + if args.samples_per_query: + settings.multi_stream_samples_per_query = args.samples_per_query + if args.max_latency: + settings.server_target_latency_ns = int(args.max_latency * NANO_SEC) + settings.multi_stream_target_latency_ns = int(args.max_latency * NANO_SEC) + + sut = lg.ConstructSUT(issue_queries, flush_queries, process_latencies) + qsl = lg.ConstructQSL(count, min(count, 500), ds.load_query_samples, ds.unload_query_samples) + + log.info("starting {}".format(scenario)) + result_dict = {"good": 0, "total": 0, "scenario": str(scenario)} + runner.start_run(result_dict, args.accuracy) + + lg.StartTestWithLogSettings(sut, qsl, settings, log_settings) + + if not last_timeing: + last_timeing = runner.result_timing + if args.accuracy: + post_proc.finalize(result_dict, ds, output_dir=args.output) + + add_results(final_results, "{}".format(scenario), + result_dict, last_timeing, time.time() - ds.last_loaded, args.accuracy) + + runner.finish() + lg.DestroyQSL(qsl) + lg.DestroySUT(sut) + + # + # write final results + # + if args.output: + with open("results.json", "w") as f: + json.dump(final_results, f, sort_keys=True, indent=4) + + +if __name__ == "__main__": + main() diff --git a/retired_benchmarks/vision/classification_and_detection/python/models/anchor_generator.py b/retired_benchmarks/vision/classification_and_detection/python/models/anchor_generator.py new file mode 100644 index 000000000..9a2d9d490 --- /dev/null +++ b/retired_benchmarks/vision/classification_and_detection/python/models/anchor_generator.py @@ -0,0 +1,394 @@ +import torch +import numpy as np + + +# The following functions were taken from +# https://github.com/tensorflow/models/tree/master/research/object_detection +# with minor modifications so that they use +# torch operations instead + +def expanded_shape(orig_shape, start_dim, num_dims): + s = (1,) * num_dims + return orig_shape[:start_dim] + s + orig_shape[start_dim:] + + +def meshgrid(x, y): + """Tiles the contents of x and y into a pair of grids. + Multidimensional analog of numpy.meshgrid, giving the same behavior if x and y + are vectors. Generally, this will give: + xgrid(i1, ..., i_m, j_1, ..., j_n) = x(j_1, ..., j_n) + ygrid(i1, ..., i_m, j_1, ..., j_n) = y(i_1, ..., i_m) + Keep in mind that the order of the arguments and outputs is reverse relative + to the order of the indices they go into, done for compatibility with numpy. + The output tensors have the same shapes. Specifically: + xgrid.get_shape() = y.get_shape().concatenate(x.get_shape()) + ygrid.get_shape() = y.get_shape().concatenate(x.get_shape()) + Args: + x: A tensor of arbitrary shape and rank. xgrid will contain these values + varying in its last dimensions. + y: A tensor of arbitrary shape and rank. ygrid will contain these values + varying in its first dimensions. + Returns: + A tuple of tensors (xgrid, ygrid). + """ + x = torch.as_tensor(x) + y = torch.as_tensor(y) + x_exp_shape = expanded_shape(x.shape, 0, y.dim()) + y_exp_shape = expanded_shape(y.shape, y.dim(), x.dim()) + + xgrid = torch.reshape(x, x_exp_shape).repeat(*y_exp_shape) + ygrid = torch.reshape(y, y_exp_shape).repeat(*x_exp_shape) + new_shape = y.shape + x.shape + xgrid = xgrid.reshape(new_shape) + ygrid = ygrid.reshape(new_shape) + + return xgrid, ygrid + + +def tile_anchors(grid_height, + grid_width, + scales, + aspect_ratios, + base_anchor_size, + anchor_stride, + anchor_offset): + """Create a tiled set of anchors strided along a grid in image space. + This op creates a set of anchor boxes by placing a "basis" collection of + boxes with user-specified scales and aspect ratios centered at evenly + distributed points along a grid. The basis collection is specified via the + scale and aspect_ratios arguments. For example, setting scales=[.1, .2, .2] + and aspect ratios = [2,2,1/2] means that we create three boxes: one with scale + .1, aspect ratio 2, one with scale .2, aspect ratio 2, and one with scale .2 + and aspect ratio 1/2. Each box is multiplied by "base_anchor_size" before + placing it over its respective center. + Grid points are specified via grid_height, grid_width parameters as well as + the anchor_stride and anchor_offset parameters. + Args: + grid_height: size of the grid in the y direction (int or int scalar tensor) + grid_width: size of the grid in the x direction (int or int scalar tensor) + scales: a 1-d (float) tensor representing the scale of each box in the + basis set. + aspect_ratios: a 1-d (float) tensor representing the aspect ratio of each + box in the basis set. The length of the scales and aspect_ratios tensors + must be equal. + base_anchor_size: base anchor size as [height, width] + (float tensor of shape [2]) + anchor_stride: difference in centers between base anchors for adjacent grid + positions (float tensor of shape [2]) + anchor_offset: center of the anchor with scale and aspect ratio 1 for the + upper left element of the grid, this should be zero for + feature networks with only VALID padding and even receptive + field size, but may need some additional calculation if other + padding is used (float tensor of shape [2]) + Returns: + a BoxList holding a collection of N anchor boxes + """ + aspect_ratios = torch.as_tensor(aspect_ratios, dtype=torch.float32) + scales = torch.as_tensor(scales, dtype=torch.float32) + + ratio_sqrts = torch.sqrt(aspect_ratios) + heights = scales / ratio_sqrts * base_anchor_size[0] + widths = scales * ratio_sqrts * base_anchor_size[1] + + # Get a grid of box centers + y_centers = torch.arange(grid_height, dtype=torch.float32) + y_centers = y_centers * anchor_stride[0] + anchor_offset[0] + x_centers = torch.arange(grid_width, dtype=torch.float32) + x_centers = x_centers * anchor_stride[1] + anchor_offset[1] + + x_centers, y_centers = meshgrid(x_centers, y_centers) + + widths_grid, x_centers_grid = meshgrid(widths, x_centers) + heights_grid, y_centers_grid = meshgrid(heights, y_centers) + + bbox_centers = torch.stack([y_centers_grid, x_centers_grid], dim=3) + bbox_sizes = torch.stack([heights_grid, widths_grid], dim=3) + bbox_centers = torch.reshape(bbox_centers, [-1, 2]) + bbox_sizes = torch.reshape(bbox_sizes, [-1, 2]) + bbox_corners = _center_size_bbox_to_corners_bbox(bbox_centers, bbox_sizes) + return bbox_corners + + +def _center_size_bbox_to_corners_bbox(centers, sizes): + """Converts bbox center-size representation to corners representation. + Args: + centers: a tensor with shape [N, 2] representing bounding box centers + sizes: a tensor with shape [N, 2] representing bounding boxes + Returns: + corners: tensor with shape [N, 4] representing bounding boxes in corners + representation + """ + return torch.cat([centers - .5 * sizes, centers + .5 * sizes], 1) + + +def create_ssd_anchors(num_layers=6, + min_scale=0.2, + max_scale=0.95, + scales=None, + aspect_ratios=(1.0, 2.0, 1.0 / 2, 3.0, 1.0 / 3), + interpolated_scale_aspect_ratio=1.0, + base_anchor_size=None, + anchor_strides=None, + anchor_offsets=None, + reduce_boxes_in_lowest_layer=True): + """Creates MultipleGridAnchorGenerator for SSD anchors. + This function instantiates a MultipleGridAnchorGenerator that reproduces + ``default box`` construction proposed by Liu et al in the SSD paper. + See Section 2.2 for details. Grid sizes are assumed to be passed in + at generation time from finest resolution to coarsest resolution --- this is + used to (linearly) interpolate scales of anchor boxes corresponding to the + intermediate grid sizes. + Anchors that are returned by calling the `generate` method on the returned + MultipleGridAnchorGenerator object are always in normalized coordinates + and clipped to the unit square: (i.e. all coordinates lie in [0, 1]x[0, 1]). + Args: + num_layers: integer number of grid layers to create anchors for (actual + grid sizes passed in at generation time) + min_scale: scale of anchors corresponding to finest resolution (float) + max_scale: scale of anchors corresponding to coarsest resolution (float) + scales: As list of anchor scales to use. When not None and not empty, + min_scale and max_scale are not used. + aspect_ratios: list or tuple of (float) aspect ratios to place on each + grid point. + interpolated_scale_aspect_ratio: An additional anchor is added with this + aspect ratio and a scale interpolated between the scale for a layer + and the scale for the next layer (1.0 for the last layer). + This anchor is not included if this value is 0. + base_anchor_size: base anchor size as [height, width]. + The height and width values are normalized to the minimum dimension of the + input height and width, so that when the base anchor height equals the + base anchor width, the resulting anchor is square even if the input image + is not square. + anchor_strides: list of pairs of strides in pixels (in y and x directions + respectively). For example, setting anchor_strides=[(25, 25), (50, 50)] + means that we want the anchors corresponding to the first layer to be + strided by 25 pixels and those in the second layer to be strided by 50 + pixels in both y and x directions. If anchor_strides=None, they are set to + be the reciprocal of the corresponding feature map shapes. + anchor_offsets: list of pairs of offsets in pixels (in y and x directions + respectively). The offset specifies where we want the center of the + (0, 0)-th anchor to lie for each layer. For example, setting + anchor_offsets=[(10, 10), (20, 20)]) means that we want the + (0, 0)-th anchor of the first layer to lie at (10, 10) in pixel space + and likewise that we want the (0, 0)-th anchor of the second layer to lie + at (25, 25) in pixel space. If anchor_offsets=None, then they are set to + be half of the corresponding anchor stride. + reduce_boxes_in_lowest_layer: a boolean to indicate whether the fixed 3 + boxes per location is used in the lowest layer. + Returns: + a MultipleGridAnchorGenerator + """ + if base_anchor_size is None: + base_anchor_size = [1.0, 1.0] + base_anchor_size = torch.tensor(base_anchor_size, dtype=torch.float32) + box_specs_list = [] + if scales is None or not scales: + scales = [min_scale + (max_scale - min_scale) * i / (num_layers - 1) + for i in range(num_layers)] + [1.0] + else: + # Add 1.0 to the end, which will only be used in scale_next below and used + # for computing an interpolated scale for the largest scale in the list. + scales += [1.0] + + for layer, scale, scale_next in zip( + range(num_layers), scales[:-1], scales[1:]): + layer_box_specs = [] + if layer == 0 and reduce_boxes_in_lowest_layer: + layer_box_specs = [(0.1, 1.0), (scale, 2.0), (scale, 0.5)] + else: + for aspect_ratio in aspect_ratios: + layer_box_specs.append((scale, aspect_ratio)) + # Add one more anchor, with a scale between the current scale, and the + # scale for the next layer, with a specified aspect ratio (1.0 by + # default). + if interpolated_scale_aspect_ratio > 0.0: + layer_box_specs.append((np.sqrt(scale*scale_next), + interpolated_scale_aspect_ratio)) + box_specs_list.append(layer_box_specs) + + return MultipleGridAnchorGenerator(box_specs_list, base_anchor_size, + anchor_strides, anchor_offsets) + +class MultipleGridAnchorGenerator(object): + """Generate a grid of anchors for multiple CNN layers.""" + + def __init__(self, + box_specs_list, + base_anchor_size=None, + anchor_strides=None, + anchor_offsets=None, + clip_window=None): + """Constructs a MultipleGridAnchorGenerator. + To construct anchors, at multiple grid resolutions, one must provide a + list of feature_map_shape_list (e.g., [(8, 8), (4, 4)]), and for each grid + size, a corresponding list of (scale, aspect ratio) box specifications. + For example: + box_specs_list = [[(.1, 1.0), (.1, 2.0)], # for 8x8 grid + [(.2, 1.0), (.3, 1.0), (.2, 2.0)]] # for 4x4 grid + To support the fully convolutional setting, we pass grid sizes in at + generation time, while scale and aspect ratios are fixed at construction + time. + Args: + box_specs_list: list of list of (scale, aspect ratio) pairs with the + outside list having the same number of entries as feature_map_shape_list + (which is passed in at generation time). + base_anchor_size: base anchor size as [height, width] + (length-2 float tensor, default=[1.0, 1.0]). + The height and width values are normalized to the + minimum dimension of the input height and width, so that + when the base anchor height equals the base anchor + width, the resulting anchor is square even if the input + image is not square. + anchor_strides: list of pairs of strides in pixels (in y and x directions + respectively). For example, setting anchor_strides=[(25, 25), (50, 50)] + means that we want the anchors corresponding to the first layer to be + strided by 25 pixels and those in the second layer to be strided by 50 + pixels in both y and x directions. If anchor_strides=None, they are set + to be the reciprocal of the corresponding feature map shapes. + anchor_offsets: list of pairs of offsets in pixels (in y and x directions + respectively). The offset specifies where we want the center of the + (0, 0)-th anchor to lie for each layer. For example, setting + anchor_offsets=[(10, 10), (20, 20)]) means that we want the + (0, 0)-th anchor of the first layer to lie at (10, 10) in pixel space + and likewise that we want the (0, 0)-th anchor of the second layer to + lie at (25, 25) in pixel space. If anchor_offsets=None, then they are + set to be half of the corresponding anchor stride. + clip_window: a tensor of shape [4] specifying a window to which all + anchors should be clipped. If clip_window is None, then no clipping + is performed. + Raises: + ValueError: if box_specs_list is not a list of list of pairs + ValueError: if clip_window is not either None or a tensor of shape [4] + """ + if isinstance(box_specs_list, list) and all( + [isinstance(list_item, list) for list_item in box_specs_list]): + self._box_specs = box_specs_list + else: + raise ValueError('box_specs_list is expected to be a ' + 'list of lists of pairs') + if base_anchor_size is None: + base_anchor_size = torch.tensor([256, 256], dtype=torch.float32) + self._base_anchor_size = base_anchor_size + self._anchor_strides = anchor_strides + self._anchor_offsets = anchor_offsets + if clip_window is not None and list(clip_window.shape) != [4]: + raise ValueError('clip_window must either be None or a shape [4] tensor') + self._clip_window = clip_window + self._scales = [] + self._aspect_ratios = [] + for box_spec in self._box_specs: + if not all([isinstance(entry, tuple) and len(entry) == 2 + for entry in box_spec]): + raise ValueError('box_specs_list is expected to be a ' + 'list of lists of pairs') + scales, aspect_ratios = zip(*box_spec) + self._scales.append(scales) + self._aspect_ratios.append(aspect_ratios) + + for arg, arg_name in zip([self._anchor_strides, self._anchor_offsets], + ['anchor_strides', 'anchor_offsets']): + if arg and not (isinstance(arg, list) and + len(arg) == len(self._box_specs)): + raise ValueError('%s must be a list with the same length ' + 'as self._box_specs' % arg_name) + if arg and not all([ + isinstance(list_item, tuple) and len(list_item) == 2 + for list_item in arg + ]): + raise ValueError('%s must be a list of pairs.' % arg_name) + + + def _generate(self, feature_map_shape_list, im_height=1, im_width=1): + """Generates a collection of bounding boxes to be used as anchors. + The number of anchors generated for a single grid with shape MxM where we + place k boxes over each grid center is k*M^2 and thus the total number of + anchors is the sum over all grids. In our box_specs_list example + (see the constructor docstring), we would place two boxes over each grid + point on an 8x8 grid and three boxes over each grid point on a 4x4 grid and + thus end up with 2*8^2 + 3*4^2 = 176 anchors in total. The layout of the + output anchors follows the order of how the grid sizes and box_specs are + specified (with box_spec index varying the fastest, followed by width + index, then height index, then grid index). + Args: + feature_map_shape_list: list of pairs of convnet layer resolutions in the + format [(height_0, width_0), (height_1, width_1), ...]. For example, + setting feature_map_shape_list=[(8, 8), (7, 7)] asks for anchors that + correspond to an 8x8 layer followed by a 7x7 layer. + im_height: the height of the image to generate the grid for. If both + im_height and im_width are 1, the generated anchors default to + absolute coordinates, otherwise normalized coordinates are produced. + im_width: the width of the image to generate the grid for. If both + im_height and im_width are 1, the generated anchors default to + absolute coordinates, otherwise normalized coordinates are produced. + Returns: + boxes_list: a list of BoxLists each holding anchor boxes corresponding to + the input feature map shapes. + Raises: + ValueError: if feature_map_shape_list, box_specs_list do not have the same + length. + ValueError: if feature_map_shape_list does not consist of pairs of + integers + """ + if not (isinstance(feature_map_shape_list, list) + and len(feature_map_shape_list) == len(self._box_specs)): + raise ValueError('feature_map_shape_list must be a list with the same ' + 'length as self._box_specs') + if not all([isinstance(list_item, tuple) and len(list_item) == 2 + for list_item in feature_map_shape_list]): + raise ValueError('feature_map_shape_list must be a list of pairs.') + + im_height = float(im_height) + im_width = float(im_width) + + if not self._anchor_strides: + anchor_strides = [(1.0 / float(pair[0]), 1.0 / float(pair[1])) + for pair in feature_map_shape_list] + else: + anchor_strides = [(float(stride[0]) / im_height, + float(stride[1]) / im_width) + for stride in self._anchor_strides] + if not self._anchor_offsets: + anchor_offsets = [(0.5 * stride[0], 0.5 * stride[1]) + for stride in anchor_strides] + else: + anchor_offsets = [(float(offset[0]) / im_height, + float(offset[1]) / im_width) + for offset in self._anchor_offsets] + + for arg, arg_name in zip([anchor_strides, anchor_offsets], + ['anchor_strides', 'anchor_offsets']): + if not (isinstance(arg, list) and len(arg) == len(self._box_specs)): + raise ValueError('%s must be a list with the same length ' + 'as self._box_specs' % arg_name) + if not all([isinstance(list_item, tuple) and len(list_item) == 2 + for list_item in arg]): + raise ValueError('%s must be a list of pairs.' % arg_name) + + anchor_grid_list = [] + min_im_shape = min(im_height, im_width) + scale_height = min_im_shape / im_height + scale_width = min_im_shape / im_width + base_anchor_size = [ + scale_height * self._base_anchor_size[0], + scale_width * self._base_anchor_size[1] + ] + for feature_map_index, (grid_size, scales, aspect_ratios, stride, + offset) in enumerate( + zip(feature_map_shape_list, self._scales, + self._aspect_ratios, anchor_strides, + anchor_offsets)): + tiled_anchors = tile_anchors( + grid_height=grid_size[0], + grid_width=grid_size[1], + scales=scales, + aspect_ratios=aspect_ratios, + base_anchor_size=base_anchor_size, + anchor_stride=stride, + anchor_offset=offset) + if self._clip_window is not None: + raise NotImplementedError("Oups!") + num_anchors_in_layer = len(tiled_anchors) + anchor_indices = feature_map_index * torch.ones(num_anchors_in_layer) + anchor_grid_list.append(tiled_anchors) + + return anchor_grid_list diff --git a/retired_benchmarks/vision/classification_and_detection/python/models/base_model_r34.py b/retired_benchmarks/vision/classification_and_detection/python/models/base_model_r34.py new file mode 100644 index 000000000..ea224a7ca --- /dev/null +++ b/retired_benchmarks/vision/classification_and_detection/python/models/base_model_r34.py @@ -0,0 +1,204 @@ +""" + Load the vgg16 weight and save it to special file +""" + +#from torchvision.models.vgg import vgg16 +import torch.nn as nn +import torch.nn.functional as F +import torch +from torch.autograd import Variable +from collections import OrderedDict + +from torchvision.models.resnet import resnet18, resnet34, resnet50 + +def _ModifyConvStrideDilation(conv, stride=(1, 1), padding=None): + conv.stride = stride + + if padding is not None: + conv.padding = padding + +def _ModifyBlock(block, bottleneck=False, **kwargs): + for m in list(block.children()): + if bottleneck: + _ModifyConvStrideDilation(m.conv2, **kwargs) + else: + _ModifyConvStrideDilation(m.conv1, **kwargs) + + if m.downsample is not None: + # need to make sure no padding for the 1x1 residual connection + _ModifyConvStrideDilation(list(m.downsample.children())[0], **kwargs) + +class ResNet18(nn.Module): + def __init__(self): + super().__init__() + rn18 = resnet18(pretrained=True) + + + # discard last Resnet block, avrpooling and classification FC + # layer1 = up to and including conv3 block + self.layer1 = nn.Sequential(*list(rn18.children())[:6]) + # layer2 = conv4 block only + self.layer2 = nn.Sequential(*list(rn18.children())[6:7]) + + # modify conv4 if necessary + # Always deal with stride in first block + modulelist = list(self.layer2.children()) + _ModifyBlock(modulelist[0], stride=(1,1)) + + def forward(self, data): + layer1_activation = self.layer1(data) + x = layer1_activation + layer2_activation = self.layer2(x) + + # Only need the output of conv4 + return [layer2_activation] + +class ResNet34(nn.Module): + def __init__(self): + super().__init__() + rn34 = resnet34(pretrained=True) + + # discard last Resnet block, avrpooling and classification FC + self.layer1 = nn.Sequential(*list(rn34.children())[:6]) + self.layer2 = nn.Sequential(*list(rn34.children())[6:7]) + # modify conv4 if necessary + # Always deal with stride in first block + modulelist = list(self.layer2.children()) + _ModifyBlock(modulelist[0], stride=(1,1)) + + + def forward(self, data): + layer1_activation = self.layer1(data) + x = layer1_activation + layer2_activation = self.layer2(x) + + return [layer2_activation] + +class L2Norm(nn.Module): + """ + Scale shall be learnable according to original paper + scale: initial scale number + chan_num: L2Norm channel number (norm over all channels) + """ + def __init__(self, scale=20, chan_num=512): + super(L2Norm, self).__init__() + # Scale across channels + self.scale = \ + nn.Parameter(torch.Tensor([scale]*chan_num).view(1, chan_num, 1, 1)) + + def forward(self, data): + # normalize accross channel + return self.scale*data*data.pow(2).sum(dim=1, keepdim=True).clamp(min=1e-12).rsqrt() + + + +def tailor_module(src_model, src_dir, tgt_model, tgt_dir): + state = torch.load(src_dir) + src_model.load_state_dict(state) + src_state = src_model.state_dict() + # only need features + keys1 = src_state.keys() + keys1 = [k for k in src_state.keys() if k.startswith("features")] + keys2 = tgt_model.state_dict().keys() + + assert len(keys1) == len(keys2) + state = OrderedDict() + + for k1, k2 in zip(keys1, keys2): + # print(k1, k2) + state[k2] = src_state[k1] + #diff_keys = state.keys() - target_model.state_dict().keys() + #print("Different Keys:", diff_keys) + # Remove unecessary keys + #for k in diff_keys: + # state.pop(k) + tgt_model.load_state_dict(state) + torch.save(tgt_model.state_dict(), tgt_dir) + +# Default vgg16 in pytorch seems different from ssd +def make_layers(cfg, batch_norm=False): + layers = [] + in_channels = 3 + for v in cfg: + if v == 'M': + layers += [nn.MaxPool2d(kernel_size=2, stride=2)] + elif v == 'C': + # Notice ceil_mode is true + layers += [nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True)] + else: + conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1) + if batch_norm: + layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)] + else: + layers += [conv2d, nn.ReLU(inplace=True)] + in_channels = v + return layers + +class Loss(nn.Module): + """ + Implements the loss as the sum of the followings: + 1. Confidence Loss: All labels, with hard negative mining + 2. Localization Loss: Only on positive labels + Suppose input dboxes has the shape 8732x4 + """ + + def __init__(self, dboxes): + super(Loss, self).__init__() + self.scale_xy = 1.0/dboxes.scale_xy + self.scale_wh = 1.0/dboxes.scale_wh + + self.sl1_loss = nn.SmoothL1Loss(reduce=False) + self.dboxes = nn.Parameter(dboxes(order="xywh").transpose(0, 1).unsqueeze(dim = 0), + requires_grad=False) + # Two factor are from following links + # http://jany.st/post/2017-11-05-single-shot-detector-ssd-from-scratch-in-tensorflow.html + self.con_loss = nn.CrossEntropyLoss(reduce=False) + + def _loc_vec(self, loc): + """ + Generate Location Vectors + """ + gxy = self.scale_xy*(loc[:, :2, :] - self.dboxes[:, :2, :])/self.dboxes[:, 2:, ] + gwh = self.scale_wh*(loc[:, 2:, :]/self.dboxes[:, 2:, :]).log() + + return torch.cat((gxy, gwh), dim=1).contiguous() + + def forward(self, ploc, plabel, gloc, glabel): + """ + ploc, plabel: Nx4x8732, Nxlabel_numx8732 + predicted location and labels + + gloc, glabel: Nx4x8732, Nx8732 + ground truth location and labels + """ + + mask = glabel > 0 + pos_num = mask.sum(dim=1) + vec_gd = self._loc_vec(gloc) + # sum on four coordinates, and mask + sl1 = self.sl1_loss(ploc, vec_gd).sum(dim=1) + sl1 = (mask.float()*sl1).sum(dim=1) + + # hard negative mining + con = self.con_loss(plabel, glabel) + + # postive mask will never selected + con_neg = con.clone() + con_neg[mask] = 0 + _, con_idx = con_neg.sort(dim=1, descending=True) + _, con_rank = con_idx.sort(dim=1) + + # number of negative three times positive + neg_num = torch.clamp(3*pos_num, max=mask.size(1)).unsqueeze(-1) + neg_mask = con_rank < neg_num + + closs = (con*(mask.float() + neg_mask.float())).sum(dim=1) + + # avoid no object detected + total_loss = sl1 + closs + num_mask = (pos_num > 0).float() + pos_num = pos_num.float().clamp(min=1e-6) + + ret = (total_loss*num_mask/pos_num).mean(dim=0) + return ret + diff --git a/retired_benchmarks/vision/classification_and_detection/python/models/convert_tf_weights.py b/retired_benchmarks/vision/classification_and_detection/python/models/convert_tf_weights.py new file mode 100644 index 000000000..02c51bf1b --- /dev/null +++ b/retired_benchmarks/vision/classification_and_detection/python/models/convert_tf_weights.py @@ -0,0 +1,137 @@ +import torch + +import re +from collections import OrderedDict + + +def remap_tf_base_names(orig_weights): + prefix = "backbone." + + # convs + weights = { + k: v for k, v in orig_weights.items() if "FeatureExtractor/MobilenetV1" in k + } + convs = { + k: v + for k, v in weights.items() + if "batchnorm" not in k and "pointwise_" not in k + } + + matcher = re.compile("(.*)Conv2d_(\d+)") + mapping = {} + for k in convs.keys(): + l = matcher.match(k).group(2) + name = "pointwise" if "pointwise" in k else "depthwise" + if l == "0": + name = "0" + mapping[k] = "{}{}.{}.weight".format(prefix, l, name) + + # batch norm + weights = { + k: v + for k, v in orig_weights.items() + if "FeatureExtractor/MobilenetV1/MobilenetV1" in k + } + weights = {k: v for k, v in weights.items() if "pointwise_" not in k} + for k in weights.keys(): + l = matcher.match(k).group(2) + name = "pointwise" if "pointwise" in k else "depthwise" + op = "scale" if "mul" in k else "bias" + if l == "0": + name = "0" + mapping[k] = "{}{}.{}/BatchNorm.{}".format(prefix, l, name, op) + + return mapping + + +def remap_tf_extras(orig_weights): + prefix = "extras." + + weights = { + k: v for k, v in orig_weights.items() if "FeatureExtractor/MobilenetV1" in k + } + weights = {k: v for k, v in weights.items() if "pointwise_" in k} + + matcher = re.compile("(.*)Conv2d_(\d+)_(\d)x(\d)") + mapping = {} + for k in weights.keys(): + m = matcher.match(k) + l = int(m.group(2)) - 2 + ks = int(m.group(3)) + if ks == 1: + pos = 0 + else: + pos = 2 + wtype = "weight" if "weight" in k else "bias" + mapping[k] = "{}{}.{}.{}".format(prefix, l, pos, wtype) + + return mapping + + +def remap_tf_predictors(orig_weights): + mapping = {} + + # regression + weights = {k: v for k, v in orig_weights.items() if "BoxPredictor" in k} + weights = {k: v for k, v in weights.items() if "BoxEncodingPredictor" in k} + + matcher = re.compile("BoxPredictor_(\d+)") + for k in weights.keys(): + pos = matcher.match(k).group(1) + wtype = "weight" if "weights" in k else "bias" + mapping[k] = "predictors.{}.regression.{}".format(pos, wtype) + + # classification + weights = {k: v for k, v in orig_weights.items() if "BoxPredictor" in k} + weights = {k: v for k, v in weights.items() if "ClassPredictor" in k} + + for k in weights.keys(): + pos = matcher.match(k).group(1) + wtype = "weight" if "weights" in k else "bias" + mapping[k] = "predictors.{}.classification.{}".format(pos, wtype) + + return mapping + + +def remap_tf_names(weights): + layers_base = remap_tf_base_names(weights) + layers_extra = remap_tf_extras(weights) + layers_predictors = remap_tf_predictors(weights) + + layers = {} + layers.update(layers_base) + layers.update(layers_extra) + layers.update(layers_predictors) + + return layers + + +def get_state_dict(weights): + layers = remap_tf_names(weights) + state_dict = OrderedDict() + + for orig, new in layers.items(): + weight = weights[orig] + weight = torch.as_tensor(weight, dtype=torch.float32) + if weight.dim() == 4: + p = (2, 3, 0, 1) + if "pointwise" in orig or "backbone.0." in new or "BoxPredictor" in orig: + p = (3, 2, 0, 1) + weight = weight.permute(*p).contiguous() + state_dict[new] = weight + return state_dict + + +def read_tf_weights(frozen_model): + import tensorflow as tf + from tensorflow.python.framework import tensor_util + weights = {} + with tf.Session() as sess: + with tf.gfile.GFile(frozen_model, 'rb') as f: + graph_def = tf.GraphDef() + graph_def.ParseFromString(f.read()) + tf.import_graph_def(graph_def) + for n in graph_def.node: + if n.op == 'Const': + weights[n.name] = tensor_util.MakeNdarray(n.attr['value'].tensor) + return weights diff --git a/retired_benchmarks/vision/classification_and_detection/python/models/ssd_mobilenet_v1.py b/retired_benchmarks/vision/classification_and_detection/python/models/ssd_mobilenet_v1.py new file mode 100644 index 000000000..dc77808c7 --- /dev/null +++ b/retired_benchmarks/vision/classification_and_detection/python/models/ssd_mobilenet_v1.py @@ -0,0 +1,253 @@ +from collections import OrderedDict + +import torch +from torch import nn +from torch.nn import functional as F + +from models.anchor_generator import create_ssd_anchors +from models.utils import Conv2d_tf +from models.utils import BatchNorm2d +from models.utils import BiasAdd +from models.utils import nms +from models.utils import decode_boxes + + +def conv_bn(inp, oup, stride): + return nn.Sequential( + OrderedDict( + [ + ("0", Conv2d_tf(inp, oup, 3, stride, padding="SAME", bias=False)), + ("0/BatchNorm", BiasAdd(oup)), + ("0/ReLU", nn.ReLU6(inplace=True)), + ] + ) + ) + + +def conv_dw(inp, oup, stride): + return nn.Sequential( + OrderedDict( + [ + ( + "depthwise", + Conv2d_tf( + inp, inp, 3, stride, padding="SAME", groups=inp, bias=False + ), + ), + ("depthwise/BatchNorm", BatchNorm2d(inp)), + ("depthwise/ReLU", nn.ReLU6(inplace=True)), + ("pointwise", nn.Conv2d(inp, oup, 1, 1, 0, bias=False)), + ("pointwise/BatchNorm", BiasAdd(oup)), + ("pointwise/ReLU", nn.ReLU6(inplace=True)), + ] + ) + ) + + +class MobileNetV1Base(nn.ModuleList): + def __init__(self, return_layers=[11, 13]): + super(MobileNetV1Base, self).__init__( + [ + conv_bn(3, 32, 2), + conv_dw(32, 64, 1), + conv_dw(64, 128, 2), + conv_dw(128, 128, 1), + conv_dw(128, 256, 2), + conv_dw(256, 256, 1), + conv_dw(256, 512, 2), + conv_dw(512, 512, 1), + conv_dw(512, 512, 1), + conv_dw(512, 512, 1), + conv_dw(512, 512, 1), + conv_dw(512, 512, 1), + conv_dw(512, 1024, 2), + conv_dw(1024, 1024, 1), + ] + ) + self.return_layers = return_layers + + def forward(self, x): + out = [] + for idx, module in enumerate(self): + x = module(x) + if idx in self.return_layers: + out.append(x) + return out + + +class PredictionHead(nn.Module): + def __init__(self, in_channels, num_classes, num_anchors): + super(PredictionHead, self).__init__() + self.classification = nn.Conv2d( + in_channels, num_classes * num_anchors, kernel_size=1 + ) + self.regression = nn.Conv2d(in_channels, num_anchors * 4, kernel_size=1) + + self.num_classes = num_classes + self.num_anchors = num_anchors + + def forward(self, x): + bs = x.shape[0] + class_logits = self.classification(x) + box_regression = self.regression(x) + + class_logits = class_logits.permute(0, 2, 3, 1).reshape( + bs, -1, self.num_classes + ) + box_regression = box_regression.permute(0, 2, 3, 1).reshape(bs, -1, 4) + + return class_logits, box_regression + + +class Block(nn.Sequential): + def __init__(self, in_channels, mid_channels, out_channels): + super(Block, self).__init__( + nn.Conv2d(in_channels, out_channels=mid_channels, kernel_size=1), + nn.ReLU6(), + Conv2d_tf( + mid_channels, out_channels, kernel_size=3, stride=2, padding="SAME" + ), + nn.ReLU6(), + ) + + +class SSD(nn.Module): + def __init__(self, backbone, predictors, extras): + super(SSD, self).__init__() + + self.backbone = backbone + self.extras = extras + self.predictors = predictors + + # preprocess + self.image_size = 300 + self.image_mean = 127.5 + self.image_std = 127.5 + + self.coder_weights = torch.tensor((10, 10, 5, 5), dtype=torch.float32) + self._feature_map_shapes = None + + # postprocess + self.nms_threshold = 0.6 + + # set it to 0.01 for better results but slower runtime + self.score_threshold = 0.3 + + def ssd_model(self, x): + feature_maps = self.backbone(x) + + out = feature_maps[-1] + for module in self.extras: + out = module(out) + feature_maps.append(out) + + results = [] + for feature, module in zip(feature_maps, self.predictors): + results.append(module(feature)) + + class_logits, box_regression = list(zip(*results)) + class_logits = torch.cat(class_logits, 1) + box_regression = torch.cat(box_regression, 1) + + scores = torch.sigmoid(class_logits) + box_regression = box_regression.squeeze(0) + + shapes = [o.shape[-2:] for o in feature_maps] + if shapes != self._feature_map_shapes: + # generate anchors for the sizes of the feature map + priors = create_ssd_anchors()._generate(shapes) + priors = torch.cat(priors, dim=0) + self.priors = priors.to(scores) + self._feature_map_shapes = shapes + + self.coder_weights = self.coder_weights.to(scores) + if box_regression.dim()==2: + box_regression = box_regression[None] + boxes = decode_boxes(box_regression, self.priors, self.coder_weights) + # add a batch dimension + return scores, boxes + + + def forward(self, images): + """ + Arguments: + images (torch.Tensor[N,C,H,W]): + """ + + scores, boxes = self.ssd_model(images) + list_boxes=[]; list_labels=[]; list_scores=[] + for b in range(len(scores)): + bboxes, blabels, bscores = self.filter_results(scores[b], boxes[b]) + list_boxes.append(bboxes) + list_labels.append(blabels.long()) + list_scores.append(bscores) + #boxes = self.rescale_boxes(boxes, height, width) + return [list_boxes, list_labels, list_scores] + + def filter_results(self, scores, boxes): + # in order to avoid custom C++ extensions + # we use an NMS implementation written purely + # on python. This implementation is faster on the + # CPU, which is why we run this part on the CPU + cpu_device = torch.device("cpu") + #boxes = boxes[0] + #scores = scores[0] + boxes = boxes.to(cpu_device) + scores = scores.to(cpu_device) + selected_box_probs = [] + labels = [] + for class_index in range(1, scores.size(1)): + probs = scores[:, class_index] + mask = probs > self.score_threshold + probs = probs[mask] + subset_boxes = boxes[mask, :] + box_probs = torch.cat([subset_boxes, probs.reshape(-1, 1)], dim=1) + box_probs = nms(box_probs, self.nms_threshold) + selected_box_probs.append(box_probs) + labels.append( + torch.full((box_probs.size(0),), class_index, dtype=torch.int64) + ) + selected_box_probs = torch.cat(selected_box_probs) + labels = torch.cat(labels) + return selected_box_probs[:, :4], labels, selected_box_probs[:, 4] + + def rescale_boxes(self, boxes, height, width): + boxes[:, 0] *= width + boxes[:, 1] *= height + boxes[:, 2] *= width + boxes[:, 3] *= height + return boxes + + +def create_mobilenetv1_ssd(num_classes): + backbone = MobileNetV1Base() + + extras = nn.ModuleList( + [ + Block(1024, 256, 512), + Block(512, 128, 256), + Block(256, 128, 256), + Block(256, 64, 128), + ] + ) + + predictors = nn.ModuleList( + [ + PredictionHead(in_channels, num_classes, num_anchors) + for in_channels, num_anchors in zip( + (512, 1024, 512, 256, 256, 128), (3, 6, 6, 6, 6, 6) + ) + ] + ) + + return SSD(backbone, predictors, extras) + + +def get_tf_pretrained_mobilenet_ssd(weights_file): + from models.convert_tf_weights import get_state_dict, read_tf_weights + + model = create_mobilenetv1_ssd(91) + weights = read_tf_weights(weights_file) + state_dict = get_state_dict(weights) + model.load_state_dict(state_dict) + return model diff --git a/retired_benchmarks/vision/classification_and_detection/python/models/ssd_r34.py b/retired_benchmarks/vision/classification_and_detection/python/models/ssd_r34.py new file mode 100644 index 000000000..63e596b4a --- /dev/null +++ b/retired_benchmarks/vision/classification_and_detection/python/models/ssd_r34.py @@ -0,0 +1,351 @@ +import torch +import torch.nn as nn +from models.base_model_r34 import ResNet34 +import numpy as np +from math import sqrt, ceil +import itertools +import torch.nn.functional as F + +##Inspired by https://github.com/kuangliu/pytorch-ssd + +class Encoder(object): + """ + Transform between (bboxes, lables) <-> SSD output + + dboxes: default boxes in size 8732 x 4, + encoder: input ltrb format, output xywh format + decoder: input xywh format, output ltrb format + + decode: + input : bboxes_in (Tensor 8732 x 4), scores_in (Tensor 8732 x nitems) + output : bboxes_out (Tensor nboxes x 4), labels_out (Tensor nboxes) + criteria : IoU threshold of bboexes + max_output : maximum number of output bboxes + """ + + def __init__(self, dboxes): + self.dboxes = dboxes(order="ltrb") + self.dboxes_xywh = dboxes(order="xywh").unsqueeze(dim=0) + self.nboxes = self.dboxes.size(0) + #print("# Bounding boxes: {}".format(self.nboxes)) + self.scale_xy = torch.tensor(dboxes.scale_xy) + self.scale_wh = torch.tensor(dboxes.scale_wh) + + + def decode_batch(self, bboxes_in, scores_in, criteria = 0.45, max_output=200): + self.dboxes = self.dboxes.to(bboxes_in) + self.dboxes_xywh = self.dboxes_xywh.to(bboxes_in) + bboxes, probs = scale_back_batch(bboxes_in, scores_in,self.scale_xy,self.scale_wh,self.dboxes_xywh) + boxes = []; labels=[]; scores=[] + for bbox, prob in zip(bboxes.split(1, 0), probs.split(1, 0)): + bbox = bbox.squeeze(0) + prob = prob.squeeze(0) + dbox,dlabel,dscore=self.decode_single(bbox, prob, criteria, max_output) + boxes.append(dbox) + labels.append(dlabel) + scores.append(dscore) + + return [boxes,labels,scores] + + # perform non-maximum suppression + def decode_single(self, bboxes_in, scores_in, criteria, max_output, max_num=200): + # Reference to https://github.com/amdegroot/ssd.pytorch + + bboxes_out = [] + scores_out = [] + labels_out = [] + + for i, score in enumerate(scores_in.split(1, 1)): + # skip background + if i == 0: continue + + score = score.squeeze(1) + mask = score > 0.05 + + bboxes, score = bboxes_in[mask, :], score[mask] + if score.size(0) == 0: continue + + score_sorted, score_idx_sorted = score.sort(dim=0) + + # select max_output indices + score_idx_sorted = score_idx_sorted[-max_num:] + candidates = [] + + while score_idx_sorted.numel() > 0: + idx = score_idx_sorted[-1].item() + bboxes_sorted = bboxes[score_idx_sorted, :] + bboxes_idx = bboxes[idx, :].unsqueeze(dim=0) + iou_sorted = calc_iou_tensor(bboxes_sorted, bboxes_idx).squeeze() + # we only need iou < criteria + score_idx_sorted = score_idx_sorted[iou_sorted < criteria] + candidates.append(idx) + + bboxes_out.append(bboxes[candidates, :]) + scores_out.append(score[candidates]) + labels_out.extend([i]*len(candidates)) + + bboxes_out, labels_out, scores_out = torch.cat(bboxes_out, dim=0), \ + torch.tensor(labels_out, dtype=torch.long), \ + torch.cat(scores_out, dim=0) + + + _, max_ids = scores_out.sort(dim=0) + max_ids = max_ids[-max_output:] + return bboxes_out[max_ids, :], labels_out[max_ids], scores_out[max_ids] + +@torch.jit.script +def calc_iou_tensor(box1, box2): + """ Calculation of IoU based on two boxes tensor, + Reference to https://github.com/kuangliu/pytorch-ssd + input: + box1 (N, 4) + box2 (M, 4) + output: + IoU (N, M) + """ + N = box1.size(0) + M = box2.size(0) + + be1 = box1.unsqueeze(1).expand(-1, M, -1) + be2 = box2.unsqueeze(0).expand(N, -1, -1) + + # Left Top & Right Bottom + lt = torch.max(be1[:,:,:2], be2[:,:,:2]) + rb = torch.min(be1[:,:,2:], be2[:,:,2:]) + delta = rb - lt + delta.clone().masked_fill_(delta < 0,0) + intersect = delta[:,:,0]*delta[:,:,1] + delta1 = be1[:,:,2:] - be1[:,:,:2] + area1 = delta1[:,:,0]*delta1[:,:,1] + delta2 = be2[:,:,2:] - be2[:,:,:2] + area2 = delta2[:,:,0]*delta2[:,:,1] + + iou = intersect/(area1 + area2 - intersect) + return iou + +@torch.jit.script +def scale_back_batch(bboxes_in, scores_in,scale_xy,scale_wh,dboxes_xywh): + """ + Do scale and transform from xywh to ltrb + suppose input Nx4xnum_bbox Nxlabel_numxnum_bbox + """ + bboxes_in = bboxes_in.permute(0, 2, 1) + scores_in = scores_in.permute(0, 2, 1) + + bboxes_in[:, :, :2] = scale_xy*bboxes_in[:, :, :2] + bboxes_in[:, :, 2:] = scale_wh*bboxes_in[:, :, 2:] + bboxes_in[:, :, :2] = bboxes_in[:, :, :2]*dboxes_xywh[:, :, 2:] + dboxes_xywh[:, :, :2] + bboxes_in[:, :, 2:] = bboxes_in[:, :, 2:].exp()*dboxes_xywh[:, :, 2:] + # Transform format to ltrb + l, t, r, b = bboxes_in[:, :, 0] - 0.5*bboxes_in[:, :, 2],\ + bboxes_in[:, :, 1] - 0.5*bboxes_in[:, :, 3],\ + bboxes_in[:, :, 0] + 0.5*bboxes_in[:, :, 2],\ + bboxes_in[:, :, 1] + 0.5*bboxes_in[:, :, 3] + bboxes_in[:, :, 0] = l + bboxes_in[:, :, 1] = t + bboxes_in[:, :, 2] = r + bboxes_in[:, :, 3] = b + return bboxes_in, F.softmax(scores_in, dim=-1) + + +class DefaultBoxes(object): + def __init__(self, fig_size, feat_size, steps, scales, aspect_ratios, \ + scale_xy=0.1, scale_wh=0.2): + + self.feat_size = feat_size + self.fig_size_w,self.fig_size_h = fig_size + + self.scale_xy_ = scale_xy + self.scale_wh_ = scale_wh + + # According to https://github.com/weiliu89/caffe + # Calculation method slightly different from paper + self.steps_w = [st[0] for st in steps] + self.steps_h = [st[1] for st in steps] + self.scales = scales + fkw = self.fig_size_w//np.array(self.steps_w) + fkh = self.fig_size_h//np.array(self.steps_h) + self.aspect_ratios = aspect_ratios + + self.default_boxes = [] + # size of feature and number of feature + for idx, sfeat in enumerate(self.feat_size): + sfeat_w,sfeat_h=sfeat + sk1 = scales[idx][0]/self.fig_size_w + sk2 = scales[idx+1][1]/self.fig_size_h + sk3 = sqrt(sk1*sk2) + all_sizes = [(sk1, sk1), (sk3, sk3)] + for alpha in aspect_ratios[idx]: + w, h = sk1*sqrt(alpha), sk1/sqrt(alpha) + all_sizes.append((w, h)) + all_sizes.append((h, w)) + for w, h in all_sizes: + for i, j in itertools.product(range(sfeat_w), range(sfeat_h)): + cx, cy = (j+0.5)/fkh[idx], (i+0.5)/fkw[idx] + self.default_boxes.append((cx, cy, w, h)) + self.dboxes = torch.tensor(self.default_boxes) + self.dboxes.clamp_(min=0, max=1) + # For IoU calculation + self.dboxes_ltrb = self.dboxes.clone() + self.dboxes_ltrb[:, 0] = self.dboxes[:, 0] - 0.5*self.dboxes[:, 2] + self.dboxes_ltrb[:, 1] = self.dboxes[:, 1] - 0.5*self.dboxes[:, 3] + self.dboxes_ltrb[:, 2] = self.dboxes[:, 0] + 0.5*self.dboxes[:, 2] + self.dboxes_ltrb[:, 3] = self.dboxes[:, 1] + 0.5*self.dboxes[:, 3] + + @property + def scale_xy(self): + return self.scale_xy_ + + @property + def scale_wh(self): + return self.scale_wh_ + + def __call__(self, order="ltrb"): + if order == "ltrb": return self.dboxes_ltrb + if order == "xywh": return self.dboxes + +def dboxes_R34_coco(figsize,strides): + feat_size = [[50, 50], [25, 25], [13, 13], [7, 7], [3, 3], [3, 3]] + steps=[(int(figsize[0]/fs[0]),int(figsize[1]/fs[1])) for fs in feat_size] + scales = [(int(s*figsize[0]/300),int(s*figsize[1]/300)) for s in [21, 45, 99, 153, 207, 261, 315]] + aspect_ratios = [[2], [2, 3], [2, 3], [2, 3], [2], [2]] + dboxes = DefaultBoxes(figsize, feat_size, steps, scales, aspect_ratios) + return dboxes + +class SSD_R34(nn.Module): + """ + Build a SSD module to take 300x300 image input, + and output 8732 per class bounding boxes + + vggt: pretrained vgg16 (partial) model + label_num: number of classes (including background 0) + """ + def __init__(self, label_num=81, backbone='resnet34', model_path="./resnet34-333f7ec4.pth",strides=[3,3 ,2 ,2 ,2 ,2],extract_shapes=False): + + super(SSD_R34, self).__init__() + + self.label_num = label_num + self.strides = strides + if backbone == 'resnet34': + self.model = ResNet34() + out_channels = 256 + self.out_chan = [out_channels, 512, 512, 256, 256, 256] + else: + raise ValueError('Invalid backbone chosen') + + self._build_additional_features(self.out_chan) + self.extract_shapes=extract_shapes + # after l2norm, conv7, conv8_2, conv9_2, conv10_2, conv11_2 + # classifer 1, 2, 3, 4, 5 ,6 + + self.num_defaults = [4, 6, 6, 6, 4, 4] + self.loc = [] + self.conf = [] + for nd, oc in zip(self.num_defaults, self.out_chan): + self.loc.append(nn.Conv2d(oc, nd*4, kernel_size=3, padding=1,stride=self.strides[0])) + self.conf.append(nn.Conv2d(oc, nd*label_num, kernel_size=3, padding=1,stride=self.strides[1])) + + self.loc = nn.ModuleList(self.loc) + self.conf = nn.ModuleList(self.conf) + if not extract_shapes: + self.size=(1200,1200) + dboxes = dboxes_R34_coco(list(self.size),[3,3,2,2,2,2]) + self.encoder = Encoder(dboxes) + # intitalize all weights + self._init_weights() + self.device = 1 + def _build_additional_features(self, input_channels): + idx = 0 + self.additional_blocks = [] + + self.additional_blocks.append(nn.Sequential( + nn.Conv2d(input_channels[idx], 256, kernel_size=1), + nn.ReLU(inplace=True), + nn.Conv2d(256, input_channels[idx+1], kernel_size=3, padding=1,stride=self.strides[2]), + nn.ReLU(inplace=True), + )) + idx += 1 + + self.additional_blocks.append(nn.Sequential( + nn.Conv2d(input_channels[idx], 256, kernel_size=1), + nn.ReLU(inplace=True), + nn.Conv2d(256, input_channels[idx+1], kernel_size=3, padding=1, stride=self.strides[3]), + nn.ReLU(inplace=True), + )) + idx += 1 + + # conv9_1, conv9_2 + self.additional_blocks.append(nn.Sequential( + nn.Conv2d(input_channels[idx], 128, kernel_size=1), + nn.ReLU(inplace=True), + nn.Conv2d(128, input_channels[idx+1], kernel_size=3, padding=1, stride=self.strides[4]), + nn.ReLU(inplace=True), + )) + idx += 1 + + # conv10_1, conv10_2 + self.additional_blocks.append(nn.Sequential( + nn.Conv2d(input_channels[idx], 128, kernel_size=1), + nn.ReLU(inplace=True), + nn.Conv2d(128, input_channels[idx+1], kernel_size=3,stride=self.strides[5]), + nn.ReLU(inplace=True), + )) + idx += 1 + + + + # conv11_1, conv11_2 + self.additional_blocks.append(nn.Sequential( + nn.Conv2d(input_channels[idx], 128, kernel_size=1), + nn.ReLU(inplace=True), + nn.Conv2d(128, input_channels[idx+1], kernel_size=3), + nn.ReLU(inplace=True), + )) + + self.additional_blocks = nn.ModuleList(self.additional_blocks) + + def _init_weights(self): + + layers = [ + *self.additional_blocks, + *self.loc, *self.conf] + + for layer in layers: + for param in layer.parameters(): + if param.dim() > 1: nn.init.xavier_uniform_(param) + + # Shape the classifier to the view of bboxes + def bbox_view(self, src, loc, conf,extract_shapes=False): + ret = [] + features_shapes = [] + for s, l, c in zip(src, loc, conf): + ret.append((l(s).view(s.size(0), 4, -1), c(s).view(s.size(0), self.label_num, -1))) + # extract shapes for prior box initliziation + if extract_shapes: + ls=l(s) + features_shapes.append([ls.shape[2],ls.shape[3]]) + locs, confs = list(zip(*ret)) + locs, confs = torch.cat(locs, 2).contiguous(), torch.cat(confs, 2).contiguous() + return locs, confs,features_shapes + + def forward(self, data): + layers = self.model(data) + + # last result from network goes into additional blocks + x = layers[-1] + + additional_results = [] + for i, l in enumerate(self.additional_blocks): + + x = l(x) + additional_results.append(x) + + src = [*layers, *additional_results] + # Feature maps sizes depend on the image size. For 300x300 with strides=[1,1,2,2,2,1] it is 38x38x4, 19x19x6, 10x10x6, 5x5x6, 3x3x4, 1x1x4 + locs, confs,features_shapes = self.bbox_view(src, self.loc, self.conf,extract_shapes=self.extract_shapes) + if self.extract_shapes: + return locs, confs,features_shapes + else: + # For SSD 300 with strides=[1,1,2,2,2,1] , shall return nbatch x 8732 x {nlabels, nlocs} results + results=self.encoder.decode_batch(locs, confs, 0.50, 200) #[0] + return results #locs, confs,features_shapes diff --git a/retired_benchmarks/vision/classification_and_detection/python/models/utils.py b/retired_benchmarks/vision/classification_and_detection/python/models/utils.py new file mode 100644 index 000000000..940722075 --- /dev/null +++ b/retired_benchmarks/vision/classification_and_detection/python/models/utils.py @@ -0,0 +1,198 @@ +import torch +import math + +from torch import nn +from torch.nn import functional as F + + +class BatchNorm2d(nn.Module): + """ + Fixed version of BatchNorm2d, which has only the scale and bias + """ + + def __init__(self, out): + super(BatchNorm2d, self).__init__() + self.register_buffer("scale", torch.ones(out)) + self.register_buffer("bias", torch.zeros(out)) + + #@torch.jit.script_method + def forward(self, x): + scale = self.scale.view(1, -1, 1, 1) + bias = self.bias.view(1, -1, 1, 1) + return x * scale + bias + + +class BiasAdd(nn.Module): + """ + Fixed version of BatchNorm2d, which has only the scale and bias + """ + + def __init__(self, out): + super(BiasAdd, self).__init__() + self.register_buffer("bias", torch.zeros(out)) + + #@torch.jit.script_method + def forward(self, x): + bias = self.bias.view(1, -1, 1, 1) + return x + bias + + +class Conv2d_tf(nn.Conv2d): + """ + Conv2d with the padding behavior from TF + """ + + def __init__(self, *args, **kwargs): + super(Conv2d_tf, self).__init__(*args, **kwargs) + self.padding = kwargs.get("padding", "SAME") + + def _compute_padding(self, input, dim): + input_size = input.size(dim + 2) + filter_size = self.weight.size(dim + 2) + effective_filter_size = (filter_size - 1) * self.dilation[dim] + 1 + out_size = (input_size + self.stride[dim] - 1) // self.stride[dim] + total_padding = max( + 0, (out_size - 1) * self.stride[dim] + effective_filter_size - input_size + ) + additional_padding = int(total_padding % 2 != 0) + + return additional_padding, total_padding + + def forward(self, input): + #import pdb; pdb.set_trace() + if self.padding == "VALID": + return F.conv2d( + input, + self.weight, + self.bias, + self.stride, + padding=0, + dilation=self.dilation, + groups=self.groups, + ) + rows_odd, padding_rows = self._compute_padding(input, dim=0) + cols_odd, padding_cols = self._compute_padding(input, dim=1) + if rows_odd or cols_odd: + input = F.pad(input, [0, cols_odd, 0, rows_odd]) + + return F.conv2d( + input, + self.weight, + self.bias, + self.stride, + padding=(padding_rows // 2, padding_cols // 2), + dilation=self.dilation, + groups=self.groups, + ) + + +def box_area(left_top, right_bottom): + """Compute the areas of rectangles given two corners. + + Args: + left_top (N, 2): left top corner. + right_bottom (N, 2): right bottom corner. + + Returns: + area (N): return the area. + """ + hw = torch.clamp(right_bottom - left_top, min=0.0) + return hw[..., 0] * hw[..., 1] + + +def box_iou(boxes0, boxes1, eps=1e-5): + """Return intersection-over-union (Jaccard index) of boxes. + + Args: + boxes0 (N, 4): ground truth boxes. + boxes1 (N or 1, 4): predicted boxes. + eps: a small number to avoid 0 as denominator. + Returns: + iou (N): IoU values. + """ + overlap_left_top = torch.max(boxes0[..., :2], boxes1[..., :2]) + overlap_right_bottom = torch.min(boxes0[..., 2:], boxes1[..., 2:]) + + overlap_area = box_area(overlap_left_top, overlap_right_bottom) + area0 = box_area(boxes0[..., :2], boxes0[..., 2:]) + area1 = box_area(boxes1[..., :2], boxes1[..., 2:]) + return overlap_area / (area0 + area1 - overlap_area + eps) + + +def nms(box_scores, iou_threshold): + """ + + Args: + box_scores (N, 5): boxes in corner-form and probabilities. + iou_threshold: intersection over union threshold. + Returns: + picked: a list of indexes of the kept boxes + """ + scores = box_scores[:, -1] + boxes = box_scores[:, :-1] + picked = [] + _, indexes = scores.sort(descending=True) + while len(indexes) > 0: + current = indexes[0] + picked.append(current.item()) + if len(indexes) == 1: + break + current_box = boxes[current, :] + indexes = indexes[1:] + rest_boxes = boxes[indexes, :] + iou = box_iou(rest_boxes, current_box.unsqueeze(0)) + indexes = indexes[iou <= iou_threshold] + + return box_scores[picked, :] + + +@torch.jit.script +def decode_boxes(rel_codes, boxes, weights): + # type: (torch.Tensor, torch.Tensor, torch.Tensor) -> torch.Tensor + + # perform some unpacking to make it JIT-fusion friendly + + #rel_codes=rel_codes[0][None] + wx = weights[1] + wy = weights[0] + ww = weights[3] + wh = weights[2] + + boxes_x1 = boxes[:, 1].unsqueeze(1).unsqueeze(0) + boxes_y1 = boxes[:, 0].unsqueeze(1).unsqueeze(0) + boxes_x2 = boxes[:, 3].unsqueeze(1).unsqueeze(0) + boxes_y2 = boxes[:, 2].unsqueeze(1).unsqueeze(0) + + dx = rel_codes[:,:, 1].unsqueeze(2) + dy = rel_codes[:,:, 0].unsqueeze(2) + dw = rel_codes[:,:, 3].unsqueeze(2) + dh = rel_codes[:,:, 2].unsqueeze(2) + + # implementation starts here + widths = boxes_x2 - boxes_x1 + heights = boxes_y2 - boxes_y1 + ctr_x = boxes_x1 + 0.5 * widths + ctr_y = boxes_y1 + 0.5 * heights + + dx = dx / wx + dy = dy / wy + dw = dw / ww + dh = dh / wh + + pred_ctr_x = dx * widths + ctr_x + #import pdb; pdb.set_trace() + pred_ctr_y = dy * heights + ctr_y + pred_w = torch.exp(dw) * widths + pred_h = torch.exp(dh) * heights + + pred_boxes = torch.cat( + [ + pred_ctr_x - 0.5 * pred_w, + pred_ctr_y - 0.5 * pred_h, + pred_ctr_x + 0.5 * pred_w, + pred_ctr_y + 0.5 * pred_h, + ], + dim=2, + ) + #import pdb; pdb.set_trace() + return pred_boxes diff --git a/retired_benchmarks/vision/classification_and_detection/python/pycoco.py b/retired_benchmarks/vision/classification_and_detection/python/pycoco.py new file mode 100644 index 000000000..931863569 --- /dev/null +++ b/retired_benchmarks/vision/classification_and_detection/python/pycoco.py @@ -0,0 +1,433 @@ +__author__ = 'tylin' +__version__ = '2.0' +# Interface for accessing the Microsoft COCO dataset. + +# Microsoft COCO is a large image dataset designed for object detection, +# segmentation, and caption generation. pycocotools is a Python API that +# assists in loading, parsing and visualizing the annotations in COCO. +# Please visit http://mscoco.org/ for more information on COCO, including +# for the data, paper, and tutorials. The exact format of the annotations +# is also described on the COCO website. For example usage of the pycocotools +# please see pycocotools_demo.ipynb. In addition to this API, please download both +# the COCO images and annotations in order to run the demo. + +# An alternative to using the API is to load the annotations directly +# into Python dictionary +# Using the API provides additional utility functions. Note that this API +# supports both *instance* and *caption* annotations. In the case of +# captions not all functions are defined (e.g. categories are undefined). + +# The following API functions are defined: +# COCO - COCO api class that loads COCO annotation file and prepare data structures. +# decodeMask - Decode binary mask M encoded via run-length encoding. +# encodeMask - Encode binary mask M using run-length encoding. +# getAnnIds - Get ann ids that satisfy given filter conditions. +# getCatIds - Get cat ids that satisfy given filter conditions. +# getImgIds - Get img ids that satisfy given filter conditions. +# loadAnns - Load anns with the specified ids. +# loadCats - Load cats with the specified ids. +# loadImgs - Load imgs with the specified ids. +# annToMask - Convert segmentation in an annotation to binary mask. +# showAnns - Display the specified annotations. +# loadRes - Load algorithm results and create API for accessing them. +# download - Download COCO images from mscoco.org server. +# Throughout the API "ann"=annotation, "cat"=category, and "img"=image. +# Help on each functions can be accessed by: "help COCO>function". + +# See also COCO>decodeMask, +# COCO>encodeMask, COCO>getAnnIds, COCO>getCatIds, +# COCO>getImgIds, COCO>loadAnns, COCO>loadCats, +# COCO>loadImgs, COCO>annToMask, COCO>showAnns + +# Microsoft COCO Toolbox. version 2.0 +# Data, paper, and tutorials available at: http://mscoco.org/ +# Code written by Piotr Dollar and Tsung-Yi Lin, 2014. +# Licensed under the Simplified BSD License [see bsd.txt] + +import json +import time +#import matplotlib.pyplot as plt +#from matplotlib.collections import PatchCollection +#from matplotlib.patches import Polygon +import numpy as np +import copy +import itertools +from pycocotools import mask as maskUtils +import os +from collections import defaultdict +import sys +PYTHON_VERSION = sys.version_info[0] +if PYTHON_VERSION == 2: + from urllib import urlretrieve +elif PYTHON_VERSION == 3: + from urllib.request import urlretrieve + + +def _isArrayLike(obj): + return hasattr(obj, '__iter__') and hasattr(obj, '__len__') + + +class COCO: + def __init__(self, annotation_file=None): + """ + Constructor of Microsoft COCO helper class for reading and visualizing annotations. + :param annotation_file (str): location of annotation file + :param image_folder (str): location to the folder that hosts images. + :return: + """ + # load dataset + self.dataset,self.anns,self.cats,self.imgs = dict(),dict(),dict(),dict() + self.imgToAnns, self.catToImgs = defaultdict(list), defaultdict(list) + if not annotation_file == None: + print('loading annotations into memory...') + tic = time.time() + dataset = json.load(open(annotation_file, 'r')) + assert type(dataset)==dict, 'annotation file format {} not supported'.format(type(dataset)) + print('Done (t={:0.2f}s)'.format(time.time()- tic)) + self.dataset = dataset + self.createIndex() + + def createIndex(self): + # create index + print('creating index...') + anns, cats, imgs = {}, {}, {} + imgToAnns,catToImgs = defaultdict(list),defaultdict(list) + if 'annotations' in self.dataset: + for ann in self.dataset['annotations']: + imgToAnns[ann['image_id']].append(ann) + anns[ann['id']] = ann + + if 'images' in self.dataset: + for img in self.dataset['images']: + imgs[img['id']] = img + + if 'categories' in self.dataset: + for cat in self.dataset['categories']: + cats[cat['id']] = cat + + if 'annotations' in self.dataset and 'categories' in self.dataset: + for ann in self.dataset['annotations']: + catToImgs[ann['category_id']].append(ann['image_id']) + + print('index created!') + + # create class members + self.anns = anns + self.imgToAnns = imgToAnns + self.catToImgs = catToImgs + self.imgs = imgs + self.cats = cats + + def info(self): + """ + Print information about the annotation file. + :return: + """ + for key, value in self.dataset['info'].items(): + print('{}: {}'.format(key, value)) + + def getAnnIds(self, imgIds=[], catIds=[], areaRng=[], iscrowd=None): + """ + Get ann ids that satisfy given filter conditions. default skips that filter + :param imgIds (int array) : get anns for given imgs + catIds (int array) : get anns for given cats + areaRng (float array) : get anns for given area range (e.g. [0 inf]) + iscrowd (boolean) : get anns for given crowd label (False or True) + :return: ids (int array) : integer array of ann ids + """ + imgIds = imgIds if _isArrayLike(imgIds) else [imgIds] + catIds = catIds if _isArrayLike(catIds) else [catIds] + + if len(imgIds) == len(catIds) == len(areaRng) == 0: + anns = self.dataset['annotations'] + else: + if not len(imgIds) == 0: + lists = [self.imgToAnns[imgId] for imgId in imgIds if imgId in self.imgToAnns] + anns = list(itertools.chain.from_iterable(lists)) + else: + anns = self.dataset['annotations'] + anns = anns if len(catIds) == 0 else [ann for ann in anns if ann['category_id'] in catIds] + anns = anns if len(areaRng) == 0 else [ann for ann in anns if ann['area'] > areaRng[0] and ann['area'] < areaRng[1]] + if not iscrowd == None: + ids = [ann['id'] for ann in anns if ann['iscrowd'] == iscrowd] + else: + ids = [ann['id'] for ann in anns] + return ids + + def getCatIds(self, catNms=[], supNms=[], catIds=[]): + """ + filtering parameters. default skips that filter. + :param catNms (str array) : get cats for given cat names + :param supNms (str array) : get cats for given supercategory names + :param catIds (int array) : get cats for given cat ids + :return: ids (int array) : integer array of cat ids + """ + catNms = catNms if _isArrayLike(catNms) else [catNms] + supNms = supNms if _isArrayLike(supNms) else [supNms] + catIds = catIds if _isArrayLike(catIds) else [catIds] + + if len(catNms) == len(supNms) == len(catIds) == 0: + cats = self.dataset['categories'] + else: + cats = self.dataset['categories'] + cats = cats if len(catNms) == 0 else [cat for cat in cats if cat['name'] in catNms] + cats = cats if len(supNms) == 0 else [cat for cat in cats if cat['supercategory'] in supNms] + cats = cats if len(catIds) == 0 else [cat for cat in cats if cat['id'] in catIds] + ids = [cat['id'] for cat in cats] + return ids + + def getImgIds(self, imgIds=[], catIds=[]): + ''' + Get img ids that satisfy given filter conditions. + :param imgIds (int array) : get imgs for given ids + :param catIds (int array) : get imgs with all given cats + :return: ids (int array) : integer array of img ids + ''' + imgIds = imgIds if _isArrayLike(imgIds) else [imgIds] + catIds = catIds if _isArrayLike(catIds) else [catIds] + + if len(imgIds) == len(catIds) == 0: + ids = self.imgs.keys() + else: + ids = set(imgIds) + for i, catId in enumerate(catIds): + if i == 0 and len(ids) == 0: + ids = set(self.catToImgs[catId]) + else: + ids &= set(self.catToImgs[catId]) + return list(ids) + + def loadAnns(self, ids=[]): + """ + Load anns with the specified ids. + :param ids (int array) : integer ids specifying anns + :return: anns (object array) : loaded ann objects + """ + if _isArrayLike(ids): + return [self.anns[id] for id in ids] + elif type(ids) == int: + return [self.anns[ids]] + + def loadCats(self, ids=[]): + """ + Load cats with the specified ids. + :param ids (int array) : integer ids specifying cats + :return: cats (object array) : loaded cat objects + """ + if _isArrayLike(ids): + return [self.cats[id] for id in ids] + elif type(ids) == int: + return [self.cats[ids]] + + def loadImgs(self, ids=[]): + """ + Load anns with the specified ids. + :param ids (int array) : integer ids specifying img + :return: imgs (object array) : loaded img objects + """ + if _isArrayLike(ids): + return [self.imgs[id] for id in ids] + elif type(ids) == int: + return [self.imgs[ids]] + + def showAnns(self, anns): + """ + Display the specified annotations. + :param anns (array of object): annotations to display + :return: None + """ + if len(anns) == 0: + return 0 + if 'segmentation' in anns[0] or 'keypoints' in anns[0]: + datasetType = 'instances' + elif 'caption' in anns[0]: + datasetType = 'captions' + else: + raise Exception('datasetType not supported') + if datasetType == 'instances': + ax = plt.gca() + ax.set_autoscale_on(False) + polygons = [] + color = [] + for ann in anns: + c = (np.random.random((1, 3))*0.6+0.4).tolist()[0] + if 'segmentation' in ann: + if type(ann['segmentation']) == list: + # polygon + for seg in ann['segmentation']: + poly = np.array(seg).reshape((int(len(seg)/2), 2)) + polygons.append(Polygon(poly)) + color.append(c) + else: + # mask + t = self.imgs[ann['image_id']] + if type(ann['segmentation']['counts']) == list: + rle = maskUtils.frPyObjects([ann['segmentation']], t['height'], t['width']) + else: + rle = [ann['segmentation']] + m = maskUtils.decode(rle) + img = np.ones( (m.shape[0], m.shape[1], 3) ) + if ann['iscrowd'] == 1: + color_mask = np.array([2.0,166.0,101.0])/255 + if ann['iscrowd'] == 0: + color_mask = np.random.random((1, 3)).tolist()[0] + for i in range(3): + img[:,:,i] = color_mask[i] + ax.imshow(np.dstack( (img, m*0.5) )) + if 'keypoints' in ann and type(ann['keypoints']) == list: + # turn skeleton into zero-based index + sks = np.array(self.loadCats(ann['category_id'])[0]['skeleton'])-1 + kp = np.array(ann['keypoints']) + x = kp[0::3] + y = kp[1::3] + v = kp[2::3] + for sk in sks: + if np.all(v[sk]>0): + plt.plot(x[sk],y[sk], linewidth=3, color=c) + plt.plot(x[v>0], y[v>0],'o',markersize=8, markerfacecolor=c, markeredgecolor='k',markeredgewidth=2) + plt.plot(x[v>1], y[v>1],'o',markersize=8, markerfacecolor=c, markeredgecolor=c, markeredgewidth=2) + p = PatchCollection(polygons, facecolor=color, linewidths=0, alpha=0.4) + ax.add_collection(p) + p = PatchCollection(polygons, facecolor='none', edgecolors=color, linewidths=2) + ax.add_collection(p) + elif datasetType == 'captions': + for ann in anns: + print(ann['caption']) + + def loadRes(self, resFile): + """ + Load result file and return a result api object. + :param resFile (str) : file name of result file + :return: res (obj) : result api object + """ + res = COCO() + res.dataset['images'] = [img for img in self.dataset['images']] + + print('Loading and preparing results...') + tic = time.time() + if type(resFile) == str: #or type(resFile) == unicode: + anns = json.load(open(resFile)) + elif type(resFile) == np.ndarray: + anns = self.loadNumpyAnnotations(resFile) + else: + anns = resFile + assert type(anns) == list, 'results in not an array of objects' + annsImgIds = [ann['image_id'] for ann in anns] + assert set(annsImgIds) == (set(annsImgIds) & set(self.getImgIds())), \ + 'Results do not correspond to current coco set' + if 'caption' in anns[0]: + imgIds = set([img['id'] for img in res.dataset['images']]) & set([ann['image_id'] for ann in anns]) + res.dataset['images'] = [img for img in res.dataset['images'] if img['id'] in imgIds] + for id, ann in enumerate(anns): + ann['id'] = id+1 + elif 'bbox' in anns[0] and not anns[0]['bbox'] == []: + res.dataset['categories'] = copy.deepcopy(self.dataset['categories']) + for id, ann in enumerate(anns): + bb = ann['bbox'] + x1, x2, y1, y2 = [bb[0], bb[0]+bb[2], bb[1], bb[1]+bb[3]] + if not 'segmentation' in ann: + ann['segmentation'] = [[x1, y1, x1, y2, x2, y2, x2, y1]] + ann['area'] = bb[2]*bb[3] + ann['id'] = id+1 + ann['iscrowd'] = 0 + elif 'segmentation' in anns[0]: + res.dataset['categories'] = copy.deepcopy(self.dataset['categories']) + for id, ann in enumerate(anns): + # now only support compressed RLE format as segmentation results + ann['area'] = maskUtils.area(ann['segmentation']) + if not 'bbox' in ann: + ann['bbox'] = maskUtils.toBbox(ann['segmentation']) + ann['id'] = id+1 + ann['iscrowd'] = 0 + elif 'keypoints' in anns[0]: + res.dataset['categories'] = copy.deepcopy(self.dataset['categories']) + for id, ann in enumerate(anns): + s = ann['keypoints'] + x = s[0::3] + y = s[1::3] + x0,x1,y0,y1 = np.min(x), np.max(x), np.min(y), np.max(y) + ann['area'] = (x1-x0)*(y1-y0) + ann['id'] = id + 1 + ann['bbox'] = [x0,y0,x1-x0,y1-y0] + print('DONE (t={:0.2f}s)'.format(time.time()- tic)) + + res.dataset['annotations'] = anns + res.createIndex() + return res + + def download(self, tarDir = None, imgIds = [] ): + ''' + Download COCO images from mscoco.org server. + :param tarDir (str): COCO results directory name + imgIds (list): images to be downloaded + :return: + ''' + if tarDir is None: + print('Please specify target directory') + return -1 + if len(imgIds) == 0: + imgs = self.imgs.values() + else: + imgs = self.loadImgs(imgIds) + N = len(imgs) + if not os.path.exists(tarDir): + os.makedirs(tarDir) + for i, img in enumerate(imgs): + tic = time.time() + fname = os.path.join(tarDir, img['file_name']) + if not os.path.exists(fname): + urlretrieve(img['coco_url'], fname) + print('downloaded {}/{} images (t={:0.1f}s)'.format(i, N, time.time()- tic)) + + def loadNumpyAnnotations(self, data): + """ + Convert result data from a numpy array [Nx7] where each row contains {imageID,x1,y1,w,h,score,class} + :param data (numpy.ndarray) + :return: annotations (python nested list) + """ + print('Converting ndarray to lists...') + assert(type(data) == np.ndarray) + print(data.shape) + assert(data.shape[1] == 7) + N = data.shape[0] + ann = [] + for i in range(N): + if i % 1000000 == 0: + print('{}/{}'.format(i,N)) + ann += [{ + 'image_id' : int(data[i, 0]), + 'bbox' : [ data[i, 1], data[i, 2], data[i, 3], data[i, 4] ], + 'score' : data[i, 5], + 'category_id': int(data[i, 6]), + }] + return ann + + def annToRLE(self, ann): + """ + Convert annotation which can be polygons, uncompressed RLE to RLE. + :return: binary mask (numpy 2D array) + """ + t = self.imgs[ann['image_id']] + h, w = t['height'], t['width'] + segm = ann['segmentation'] + if type(segm) == list: + # polygon -- a single object might consist of multiple parts + # we merge all parts into one mask rle code + rles = maskUtils.frPyObjects(segm, h, w) + rle = maskUtils.merge(rles) + elif type(segm['counts']) == list: + # uncompressed RLE + rle = maskUtils.frPyObjects(segm, h, w) + else: + # rle + rle = ann['segmentation'] + return rle + + def annToMask(self, ann): + """ + Convert annotation which can be polygons, uncompressed RLE, or RLE to binary mask. + :return: binary mask (numpy 2D array) + """ + rle = self.annToRLE(ann) + m = maskUtils.decode(rle) + return m diff --git a/retired_benchmarks/vision/classification_and_detection/python/version.py b/retired_benchmarks/vision/classification_and_detection/python/version.py new file mode 100644 index 000000000..1152dbb41 --- /dev/null +++ b/retired_benchmarks/vision/classification_and_detection/python/version.py @@ -0,0 +1,3 @@ + +version = '0.1.0' +git_version = '05df3bae82ef9fc933277385eb778e3f22cd0c6a' diff --git a/retired_benchmarks/vision/classification_and_detection/run_and_time.sh b/retired_benchmarks/vision/classification_and_detection/run_and_time.sh new file mode 100755 index 000000000..50af82cb7 --- /dev/null +++ b/retired_benchmarks/vision/classification_and_detection/run_and_time.sh @@ -0,0 +1,26 @@ +#!/bin/bash + +source run_common.sh + +dockercmd=docker +if [ $device == "gpu" ]; then + runtime="--runtime=nvidia" +fi + +# copy the config to cwd so the docker contrainer has access +cp ../../mlperf.conf . + +OUTPUT_DIR=`pwd`/output/$name +if [ ! -d $OUTPUT_DIR ]; then + mkdir -p $OUTPUT_DIR +fi + +image=mlperf-infer-imgclassify-$device +docker build -t $image -f Dockerfile.$device . +opts="--mlperf_conf ./mlperf.conf --profile $profile $common_opt --model $model_path \ + --dataset-path $DATA_DIR --output $OUTPUT_DIR $extra_args $EXTRA_OPS $@" + +docker run $runtime -e opts="$opts" \ + -v $DATA_DIR:$DATA_DIR -v $MODEL_DIR:$MODEL_DIR -v `pwd`:/mlperf \ + -v $OUTPUT_DIR:/output -v /proc:/host_proc \ + -t $image:latest /mlperf/run_helper.sh 2>&1 | tee $OUTPUT_DIR/output.txt diff --git a/retired_benchmarks/vision/classification_and_detection/run_common.sh b/retired_benchmarks/vision/classification_and_detection/run_common.sh new file mode 100755 index 000000000..6071019ab --- /dev/null +++ b/retired_benchmarks/vision/classification_and_detection/run_common.sh @@ -0,0 +1,115 @@ +#!/bin/bash + +if [ $# -lt 1 ]; then + echo "usage: $0 tf|onnxruntime|pytorch|tflite [resnet50|mobilenet|ssd-mobilenet|ssd-resnet34] [cpu|gpu]" + exit 1 +fi +if [ "x$DATA_DIR" == "x" ]; then + echo "DATA_DIR not set" && exit 1 +fi +if [ "x$MODEL_DIR" == "x" ]; then + echo "MODEL_DIR not set" && exit 1 +fi + +# defaults +backend=tf +model=resnet50 +device="cpu" + +for i in $* ; do + case $i in + tf|onnxruntime|tflite|pytorch) backend=$i; shift;; + cpu|gpu) device=$i; shift;; + gpu) device=gpu; shift;; + resnet50|mobilenet|ssd-mobilenet|ssd-resnet34|ssd-resnet34-tf) model=$i; shift;; + esac +done + +if [ $device == "cpu" ] ; then + export CUDA_VISIBLE_DEVICES="" +fi + +name="$model-$backend" +extra_args="" + +# +# tensorflow +# +if [ $name == "resnet50-tf" ] ; then + model_path="$MODEL_DIR/resnet50_v1.pb" + profile=resnet50-tf +fi +if [ $name == "mobilenet-tf" ] ; then + model_path="$MODEL_DIR/mobilenet_v1_1.0_224_frozen.pb" + profile=mobilenet-tf +fi +if [ $name == "ssd-mobilenet-tf" ] ; then + model_path="$MODEL_DIR/ssd_mobilenet_v1_coco_2018_01_28.pb" + profile=ssd-mobilenet-tf +fi +if [ $name == "ssd-resnet34-tf" ] ; then + model_path="$MODEL_DIR/resnet34_tf.22.1.pb" + profile=ssd-resnet34-tf +fi + +# +# onnxruntime +# +if [ $name == "resnet50-onnxruntime" ] ; then + model_path="$MODEL_DIR/resnet50_v1.onnx" + profile=resnet50-onnxruntime +fi +if [ $name == "mobilenet-onnxruntime" ] ; then + model_path="$MODEL_DIR/mobilenet_v1_1.0_224.onnx" + profile=mobilenet-onnxruntime +fi +if [ $name == "ssd-mobilenet-onnxruntime" ] ; then + model_path="$MODEL_DIR/ssd_mobilenet_v1_coco_2018_01_28.onnx" + profile=ssd-mobilenet-onnxruntime +fi +if [ $name == "ssd-resnet34-onnxruntime" ] ; then + # use onnx model converted from pytorch + model_path="$MODEL_DIR/resnet34-ssd1200.onnx" + profile=ssd-resnet34-onnxruntime +fi +if [ $name == "ssd-resnet34-tf-onnxruntime" ] ; then + # use onnx model converted from tensorflow + model_path="$MODEL_DIR/ssd_resnet34_mAP_20.2.onnx" + profile=ssd-resnet34-onnxruntime-tf +fi + +# +# pytorch +# +if [ $name == "resnet50-pytorch" ] ; then + model_path="$MODEL_DIR/resnet50-19c8e357.pth" + profile=resnet50-pytorch + extra_args="$extra_args --backend pytorch" +fi +if [ $name == "mobilenet-pytorch" ] ; then + model_path="$MODEL_DIR/mobilenet_v1_1.0_224.onnx" + profile=mobilenet-onnxruntime + extra_args="$extra_args --backend pytorch" +fi +if [ $name == "ssd-resnet34-pytorch" ] ; then + model_path="$MODEL_DIR/resnet34-ssd1200.pytorch" + profile=ssd-resnet34-pytorch +fi + + +# +# tflite +# +if [ $name == "resnet50-tflite" ] ; then + model_path="$MODEL_DIR/resnet50_v1.tflite" + profile=resnet50-tf + extra_args="$extra_args --backend tflite" +fi +if [ $name == "mobilenet-tflite" ] ; then + model_path="$MODEL_DIR/mobilenet_v1_1.0_224.tflite" + profile=mobilenet-tf + extra_args="$extra_args --backend tflite" +fi + +name="$backend-$device/$model" +EXTRA_OPS="$extra_args $EXTRA_OPS" diff --git a/retired_benchmarks/vision/classification_and_detection/run_helper.sh b/retired_benchmarks/vision/classification_and_detection/run_helper.sh new file mode 100755 index 000000000..180486923 --- /dev/null +++ b/retired_benchmarks/vision/classification_and_detection/run_helper.sh @@ -0,0 +1,18 @@ +#!/bin/bash + +echo "Clearing caches." +sync && echo 3 | tee /host_proc/sys/vm/drop_caches + + +cd /root + +common_opt="" + +start_fmt=$(date +%Y-%m-%d\ %r) +echo "STARTING RUN AT $start_fmt" + +cd /mlperf +python python/main.py $opts --output /output + +end_fmt=$(date +%Y-%m-%d\ %r) +echo "ENDING RUN AT $end_fmt" diff --git a/retired_benchmarks/vision/classification_and_detection/run_local.sh b/retired_benchmarks/vision/classification_and_detection/run_local.sh new file mode 100755 index 000000000..c014fc1a3 --- /dev/null +++ b/retired_benchmarks/vision/classification_and_detection/run_local.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +source ./run_common.sh + +common_opt="--mlperf_conf ../../mlperf.conf" +dataset="--dataset-path $DATA_DIR" +OUTPUT_DIR=`pwd`/output/$name +if [ ! -d $OUTPUT_DIR ]; then + mkdir -p $OUTPUT_DIR +fi + +python python/main.py --profile $profile $common_opt --model $model_path $dataset \ + --output $OUTPUT_DIR $EXTRA_OPS $@ diff --git a/retired_benchmarks/vision/classification_and_detection/setup.py b/retired_benchmarks/vision/classification_and_detection/setup.py new file mode 100644 index 000000000..c1e2fbcf0 --- /dev/null +++ b/retired_benchmarks/vision/classification_and_detection/setup.py @@ -0,0 +1,85 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT license. + +import distutils.command.build +import os +import subprocess +from collections import namedtuple +from textwrap import dedent + +import setuptools.command.build_py +import setuptools.command.develop +import setuptools.command.install +from setuptools import setup, find_packages, Command + +TOP_DIR = os.path.realpath(os.path.dirname(__file__)) +SRC_DIR = os.path.join(TOP_DIR, 'python') + +try: + git_version = subprocess.check_output(['git', 'rev-parse', 'HEAD'], cwd=TOP_DIR).decode('ascii').strip() +except (OSError, subprocess.CalledProcessError): + git_version = None + +with open(os.path.join(TOP_DIR, 'VERSION_NUMBER')) as version_file: + VersionInfo = namedtuple('VersionInfo', ['version', 'git_version'])( + version=version_file.read().strip(), + git_version=git_version + ) + + +class create_version(Command): + user_options = [] + + def initialize_options(self): + pass + + def finalize_options(self): + pass + + def run(self): + with open(os.path.join(SRC_DIR, 'version.py'), 'w') as f: + f.write(dedent(''' + version = '{version}' + git_version = '{git_version}' + '''.format(**dict(VersionInfo._asdict())))) + + +class build_py(setuptools.command.build_py.build_py): + def run(self): + self.run_command('create_version') + setuptools.command.build_py.build_py.run(self) + + +class build(distutils.command.build.build): + def run(self): + self.run_command('build_py') + + +class develop(setuptools.command.develop.develop): + def run(self): + self.run_command('create_version') + self.run_command('build') + setuptools.command.develop.develop.run(self) + + +cmdclass = { + 'create_version': create_version, + 'build_py': build_py, + 'build': build, + 'develop': develop, +} + +setup( + name="mlperf-inference", + version=VersionInfo.version, + description='mlperf inference benchmark', + setup_requires=['pytest-runner'], + tests_require=['graphviz', 'parameterized', 'pytest', 'pytest-cov', 'pyyaml'], + cmdclass=cmdclass, + packages=find_packages(), + author='guschmue@microsoft.com', + author_email='guschmue@microsoft.com', + url='https://github.com/mlperf/inference', + install_requires=['numpy>=1.14.1', 'onnx>=1.5', 'pybind11', 'Cython', + 'pycocotools', 'mlperf_loadgen', 'opencv-python-headless'] +) diff --git a/retired_benchmarks/vision/classification_and_detection/tools/accuracy-coco.py b/retired_benchmarks/vision/classification_and_detection/tools/accuracy-coco.py new file mode 100644 index 000000000..1e15999f1 --- /dev/null +++ b/retired_benchmarks/vision/classification_and_detection/tools/accuracy-coco.py @@ -0,0 +1,125 @@ +""" +Tool to calculate accuracy for loadgen accuracy output found in mlperf_log_accuracy.json +We assume that loadgen's query index is in the same order as +the images in coco's annotations/instances_val2017.json. +""" + +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +import argparse +import json +import os + +import numpy as np + +from pycocotools.coco import COCO +from pycocotools.cocoeval import COCOeval + +# pylint: disable=missing-docstring + +def get_args(): + """Parse commandline.""" + parser = argparse.ArgumentParser() + parser.add_argument("--mlperf-accuracy-file", required=True, help="path to mlperf_log_accuracy.json") + parser.add_argument("--coco-dir", required=True, help="coco directory") + parser.add_argument("--verbose", action="store_true", help="verbose messages") + parser.add_argument("--output-file", default="coco-results.json", help="path to output file") + parser.add_argument("--use-inv-map", action="store_true", help="use inverse label map") + parser.add_argument("--remove-48-empty-images", action="store_true", help="used in case you removed 48 empty images while preprocessing the dataset") + args = parser.parse_args() + return args + + +def main(): + args = get_args() + + cocoGt = COCO(os.path.join(args.coco_dir, "annotations/instances_val2017.json")) + + if args.use_inv_map: + inv_map = [0] + cocoGt.getCatIds() # First label in inv_map is not used + + with open(args.mlperf_accuracy_file, "r") as f: + results = json.load(f) + + detections = [] + image_ids = set() + seen = set() + no_results = 0 + if args.remove_48_empty_images: + im_ids = [] + for i in cocoGt.getCatIds(): + im_ids += cocoGt.catToImgs[i] + im_ids = list(set(im_ids)) + image_map = [cocoGt.imgs[id] for id in im_ids] + else: + image_map = cocoGt.dataset["images"] + + for j in results: + idx = j['qsl_idx'] + # de-dupe in case loadgen sends the same image multiple times + if idx in seen: + continue + seen.add(idx) + + # reconstruct from mlperf accuracy log + # what is written by the benchmark is an array of float32's: + # id, box[0], box[1], box[2], box[3], score, detection_class + # note that id is a index into instances_val2017.json, not the actual image_id + data = np.frombuffer(bytes.fromhex(j['data']), np.float32) + if len(data) < 7: + # handle images that had no results + image = image_map[idx] + # by adding the id to image_ids we make pycoco aware of the no-result image + image_ids.add(image["id"]) + no_results += 1 + if args.verbose: + print("no results: {}, idx={}".format(image["coco_url"], idx)) + continue + + for i in range(0, len(data), 7): + image_idx, ymin, xmin, ymax, xmax, score, label = data[i:i + 7] + image = image_map[idx] + image_idx = int(image_idx) + if image_idx != idx: + print("ERROR: loadgen({}) and payload({}) disagree on image_idx".format(idx, image_idx)) + image_id = image["id"] + height, width = image["height"], image["width"] + ymin *= height + xmin *= width + ymax *= height + xmax *= width + loc = os.path.join(args.coco_dir, "val2017", image["file_name"]) + label = int(label) + if args.use_inv_map: + label = inv_map[label] + # pycoco wants {imageID,x1,y1,w,h,score,class} + detections.append({ + "image_id": image_id, + "image_loc": loc, + "category_id": label, + "bbox": [float(xmin), float(ymin), float(xmax - xmin), float(ymax - ymin)], + "score": float(score)}) + image_ids.add(image_id) + + with open(args.output_file, "w") as fp: + json.dump(detections, fp, sort_keys=True, indent=4) + + cocoDt = cocoGt.loadRes(args.output_file) # Load from file to bypass error with Python3 + cocoEval = COCOeval(cocoGt, cocoDt, iouType='bbox') + cocoEval.params.imgIds = list(image_ids) + cocoEval.evaluate() + cocoEval.accumulate() + cocoEval.summarize() + + print("mAP={:.3f}%".format(100. * cocoEval.stats[0])) + if args.verbose: + print("found {} results".format(len(results))) + print("found {} images".format(len(image_ids))) + print("found {} images with no results".format(no_results)) + print("ignored {} dupes".format(len(results) - len(seen))) + + +if __name__ == "__main__": + main() diff --git a/retired_benchmarks/vision/classification_and_detection/tools/accuracy-imagenet.py b/retired_benchmarks/vision/classification_and_detection/tools/accuracy-imagenet.py new file mode 100644 index 000000000..1879e0f09 --- /dev/null +++ b/retired_benchmarks/vision/classification_and_detection/tools/accuracy-imagenet.py @@ -0,0 +1,74 @@ +""" +Tool to calculate accuracy for loadgen accuracy output found in mlperf_log_accuracy.json +We assume that loadgen's query index is in the same order as the images in imagenet2012/val_map.txt. +""" + +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +import argparse +import json + +import numpy as np + + +# pylint: disable=missing-docstring + +def get_args(): + """Parse commandline.""" + parser = argparse.ArgumentParser() + parser.add_argument("--mlperf-accuracy-file", required=True, help="path to mlperf_log_accuracy.json") + parser.add_argument("--imagenet-val-file", required=True, help="path to imagenet val_map.txt") + parser.add_argument("--verbose", action="store_true", help="verbose messages") + parser.add_argument("--dtype", default="float32", choices=["float32", "int32", "int64"], help="data type of the label") + args = parser.parse_args() + return args + +dtype_map = { + "float32": np.float32, + "int32": np.int32, + "int64": np.int64 +} + +def main(): + args = get_args() + + imagenet = [] + with open(args.imagenet_val_file, "r") as f: + for line in f: + cols = line.strip().split() + imagenet.append((cols[0], int(cols[1]))) + + with open(args.mlperf_accuracy_file, "r") as f: + results = json.load(f) + + seen = set() + good = 0 + for j in results: + idx = j['qsl_idx'] + + # de-dupe in case loadgen sends the same image multiple times + if idx in seen: + continue + seen.add(idx) + + # get the expected label and image + img, label = imagenet[idx] + + # reconstruct label from mlperf accuracy log + data = np.frombuffer(bytes.fromhex(j['data']), dtype_map[args.dtype]) + found = int(data[0]) + if label == found: + good += 1 + else: + if args.verbose: + print("{}, expected: {}, found {}".format(img, label, found)) + + print("accuracy={:.3f}%, good={}, total={}".format(100. * good / len(seen), good, len(seen))) + if args.verbose: + print("found and ignored {} dupes".format(len(results) - len(seen))) + + +if __name__ == "__main__": + main() diff --git a/retired_benchmarks/vision/classification_and_detection/tools/calibrate_torchvision_model.py b/retired_benchmarks/vision/classification_and_detection/tools/calibrate_torchvision_model.py new file mode 100644 index 000000000..815e5fe20 --- /dev/null +++ b/retired_benchmarks/vision/classification_and_detection/tools/calibrate_torchvision_model.py @@ -0,0 +1,77 @@ +import argparse +import os +from tqdm import tqdm +from PIL import Image + +import torch +from torch.utils.data import DataLoader, Dataset + +import torchvision.transforms as transforms +from torchvision.models.quantization import * + + +class CalibrationDataset(Dataset): + def __init__(self, root, files, transform): + with open(files, 'r') as f: + self.files = [os.path.join(root, fn.strip()) for fn in f.readlines()] + self.transform = transform + + def __getitem__(self, idx): + image = Image.open(self.files[idx]).convert('RGB') + image = self.transform(image) + return image + + def __len__(self): + return len(self.files) + + +def quantize_model(model, dataloader, backend='fbgemm'): + if backend not in torch.backends.quantized.supported_engines: + raise RuntimeError("Quantized backend not supported ") + torch.backends.quantized.engine = backend + model.cpu() + model.eval() + model.fuse_model() + + # Make sure that weight qconfig matches that of the serialized models + model.qconfig = torch.quantization.get_default_qconfig(backend) + torch.quantization.prepare(model, inplace=True) + print('calibrating...') + for x in tqdm(dataloader): + model(x) + print('calibration DONE!') + torch.quantization.convert(model, inplace=True) + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('--model', type=str, default='resnet50') + parser.add_argument('--image-dir', type=str, default='imagenet/val') + parser.add_argument('--image-list', type=str, default='../../calibration/ImageNet/cal_image_list_option_1.txt') + args = parser.parse_args() + print(args) + + transform = transforms.Compose([ + transforms.Resize(256), + transforms.CenterCrop(224), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), + ]) + + dataset = CalibrationDataset(root=args.image_dir, files=args.image_list, transform=transform) + dataloader = DataLoader(dataset, batch_size=1) + + model = eval(args.model)(pretrained=True, progress=True, quantize=False) + quantize_model(model, dataloader) + print(model) + + inp = torch.rand(1, 3, 224, 224) + script_module = torch.jit.trace(model, inp) + save_path = f'{args.model}.pt' + torch.jit.save(script_module, save_path) + print(f'saved: {save_path}') + + +if __name__=='__main__': + main() + diff --git a/retired_benchmarks/vision/classification_and_detection/tools/ci-run.sh b/retired_benchmarks/vision/classification_and_detection/tools/ci-run.sh new file mode 100755 index 000000000..ad8bda3f0 --- /dev/null +++ b/retired_benchmarks/vision/classification_and_detection/tools/ci-run.sh @@ -0,0 +1,78 @@ +#!/bin/bash + +# where to find stuff +export DATA_ROOT=`pwd`/data +export MODEL_DIR=`pwd`/models + +export DATA_ROOT=/data +export MODEL_DIR=$HOME/resnet_for_mlperf + + +# options for official runs +gopt="--max-batchsize 32 --samples-per-query 2 --threads 2" +gopt="$gopt $@" + +result=output/results.csv + +function one_run { + # args: mode framework device model ... + scenario=$1; shift + model=$3 + system_desc=$1-$2 + case $model in + "mobilenet") + official_model="mobilenet" + acc_cmd="tools/accuracy-imagenet.py --imagenet-val-file $DATA_ROOT/imagenet2012/val_map.txt";; + "resnet50") + official_model="resnet" + acc_cmd="tools/accuracy-imagenet.py --imagenet-val-file $DATA_ROOT/imagenet2012/val_map.txt";; + "ssd-mobilenet") + official_model="ssd-small" + acc_cmd="tools/accuracy-coco.py --coco-dir $DATA_ROOT/coco";; + "ssd-resnet34") + official_model="ssd-large" + acc_cmd="tools/accuracy-coco.py --use-inv-map --coco-dir $DATA_ROOT/coco";; + "gnmt") + official_model="gnmt";; + esac + echo "====== $official_model/$scenario =====" + output_dir=output/$system_desc/$official_model/$scenario + + # accuracy run + ./run_local.sh $@ --scenario $scenario --accuracy --output $output_dir/accuracy + python $acc_cmd --verbose --mlperf-accuracy-file $output_dir/accuracy/mlperf_log_accuracy.json \ + > $output_dir/accuracy/accuracy.txt + cat $output_dir/accuracy/accuracy.txt + + # performance run + ./run_local.sh $@ --scenario $scenario --output $output_dir/performance + + # summary to csv + python tools/lglog2csv.py --input $output_dir/performance/mlperf_log_summary.txt --runtime "$1-$2" \ + --machine $HOSTNAME --model $3 --name $1-$2-py >> $result +} + +function one_model { + # args: framework device model ... + one_run SingleStream $@ + one_run MultiStream $@ + one_run Server $@ + one_run Offline $@ +} + + +mkdir output +echo "build,date,machine,runtime,model,mode,qps,mean,latency_90,latency_99" > $result + +# TODO: add gnmt + +# using imagenet +export DATA_DIR=$DATA_ROOT/imagenet2012 +#one_model onnxruntime cpu mobilenet $gopt +#one_model tf gpu resnet50 $gopt + +# using coco +export DATA_DIR=$DATA_ROOT/coco +#one_model tf gpu ssd-mobilenet $gopt +one_model tf gpu ssd-resnet34 $gopt +#one_model onnxruntime cpu ssd-resnet34 $gopt diff --git a/retired_benchmarks/vision/classification_and_detection/tools/coco-analyze.py b/retired_benchmarks/vision/classification_and_detection/tools/coco-analyze.py new file mode 100755 index 000000000..7f22daa84 --- /dev/null +++ b/retired_benchmarks/vision/classification_and_detection/tools/coco-analyze.py @@ -0,0 +1,134 @@ +""" +coco result analyzer +""" + +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +import argparse +import collections +import json +import os +import time +import numpy as np + +from pycocotools.coco import COCO +from pycocotools.cocoeval import COCOeval +from PIL import Image, ImageFont, ImageDraw, ImageColor + + +# pylint: disable=missing-docstring + + +def get_args(): + """Parse commandline.""" + parser = argparse.ArgumentParser() + parser.add_argument("--output", required=True, help="output directory") + parser.add_argument("--input", required=True, help="input json") + parser.add_argument("--coco", help="coco dataset root") + args = parser.parse_args() + return args + + +def annotate_image(results, cocoGt, output): + os.makedirs(output, exist_ok=True) + + new_results = collections.defaultdict(list) + for result in results: + new_results[result['image_id']].append(result) + print("Unique images = {}".format(len(new_results))) + results = new_results + + for k, result in results.items(): + draw = None + image = None + for v in result: + box = v['bbox'] + score = v['score'] + predicted_class = v["category_id"] + try: + predicted_class = cocoGt.loadCats(predicted_class)[0]["name"] + except Exception as ex: + print("category {} not found, image {}".format(predicted_class, v["image_loc"])) + # predicted_class = self.class_names[c] + # "image_loc": "/home/gs/data/coco300/val2017/000000397133.jpg", + if not draw: + image = Image.open(v['image_loc']) + if image.mode != 'RGB': + image = image.convert('RGB') + + draw = ImageDraw.Draw(image) + # font = ImageFont.truetype(font='FreeMono.ttf', + # size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32')) + try: + left, top, w, h = box + bottom = top + h + right = left + w + top = max(0, np.floor(top + 0.5).astype('int32')) + left = max(0, np.floor(left + 0.5).astype('int32')) + bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32')) + right = min(image.size[0], np.floor(right + 0.5).astype('int32')) + label = '{} {:.2f}'.format(predicted_class, score) + # label_size = draw.textsize(label, font) + label_size = draw.textsize(label) + + if top - label_size[1] >= 0: + text_origin = tuple(np.array([left, top - label_size[1]])) + else: + text_origin = tuple(np.array([left, top + 1])) + + color = ImageColor.getrgb("red") + thickness = 0 + draw.rectangle([left + thickness, top + thickness, right - thickness, bottom - thickness], outline=color) + draw.text(text_origin, label, fill=color) # , font=font) + except Exception as ex: + print("{} failed, ex {}".format(v['image_loc'], ex)) + image.save(os.path.join(output, os.path.basename(v['image_loc']))) + del draw + + +def calculate_map(results, cocoGt, output): + # bbox is expected as: + # x1, x2, y1, y2 = [bb[0], bb[0] + bb[2], bb[1], bb[1] + bb[3]] + + cocoDt = cocoGt.loadRes(results) + cocoEval = COCOeval(cocoGt, cocoDt, iouType='bbox') + cocoEval.evaluate() + cocoEval.accumulate() + cocoEval.summarize() + + all_metrics = { + "DetectionBoxes_Precision/mAP": cocoEval.stats[0], + "DetectionBoxes_Precision/mAP@.50IOU": cocoEval.stats[1], + "DetectionBoxes_Precision/mAP@.75IOU": cocoEval.stats[2], + "DetectionBoxes_Precision/mAP (small)": cocoEval.stats[3], + "DetectionBoxes_Precision/mAP (medium)": cocoEval.stats[4], + "DetectionBoxes_Precision/mAP (large)": cocoEval.stats[5], + "DetectionBoxes_Recall/AR@1": cocoEval.stats[6], + "DetectionBoxes_Recall/AR@10": cocoEval.stats[7], + "DetectionBoxes_Recall/AR@100": cocoEval.stats[8], + "DetectionBoxes_Recall/AR@100 (small)": cocoEval.stats[9], + "DetectionBoxes_Recall/AR@100 (medium)": cocoEval.stats[10], + "DetectionBoxes_Recall/AR@100 (large)": cocoEval.stats[11] + } + + mAP = all_metrics['DetectionBoxes_Precision/mAP'] + recall = all_metrics['DetectionBoxes_Recall/AR@100'] + print("mAP={}, recall={}".format(mAP, recall)) + + +def main(): + args = get_args() + + with open(args.input, "r") as f: + results = json.load(f) + + annotation_file = os.path.join(args.coco, "annotations/instances_val2017.json") + cocoGt = COCO(annotation_file) + annotate_image(results, cocoGt, args.output) + calculate_map(args.input, cocoGt, args.output) + + +if __name__ == "__main__": + main() diff --git a/retired_benchmarks/vision/classification_and_detection/tools/convert-to-onnx.sh b/retired_benchmarks/vision/classification_and_detection/tools/convert-to-onnx.sh new file mode 100755 index 000000000..c2b0ae907 --- /dev/null +++ b/retired_benchmarks/vision/classification_and_detection/tools/convert-to-onnx.sh @@ -0,0 +1,50 @@ +#!/bin/bash + +# convert all mlperf inference models to onnx using +# tf2onnx (https://github.com/onnx/tensorflow-onnx). +# We assume tf2onnx is already installed (pip install -U tfonnx) +# +# by default we use opset 8 but if your runtime supports it opset 10 is a better choice. + +export CUDA_VISIBLE_DEVICES="" + +if [ $# -lt 1 ]; then + echo "Usage: convert.sh model_dir ..." + exit 1 +fi + +model_dir=$1 +shift + +opts="$@" + +# +# resnet50 +# +python -m tf2onnx.convert --input $model_dir/resnet50_v1.pb --output $model_dir/resnet50_v1.onnx \ + --inputs-as-nchw input_tensor:0 \ + --inputs input_tensor:0 \ + --outputs ArgMax:0,softmax_tensor:0 --opset 8 $opts + +# +# mobilenet +# +python -m tf2onnx.convert --input $model_dir/mobilenet_v1_1.0_224_frozen.pb --output $model_dir/mobilenet_v1_1.0_224.onnx \ + --inputs-as-nchw input:0 \ + --inputs input:0 \ + --outputs MobilenetV1/Predictions/Reshape_1:0 --opset 8 $opts + +# +# ssd_mobilenet_v1_coco +# +python -m tf2onnx.convert --input $model_dir/ssd_mobilenet_v1_coco_2018_01_28.pb --output $model_dir/ssd_mobilenet_v1_coco_2018_01_28.onnx \ + --inputs image_tensor:0 \ + --outputs num_detections:0,detection_boxes:0,detection_scores:0,detection_classes:0 \ + --opset 10 $opts + +# +# ssd_resnet34 (from tensorflow) +# +python -m tf2onnx.convert --input $model_dir/resnet34_tf.22.1.pb --output $model_dir/ssd_resnet34_mAP_20.2.onnx \ + --inputs image:0 --outputs detection_bboxes:0,detection_scores:0,detection_classes:0 \ + --opset 10 $opts diff --git a/retired_benchmarks/vision/classification_and_detection/tools/lglog2csv.py b/retired_benchmarks/vision/classification_and_detection/tools/lglog2csv.py new file mode 100644 index 000000000..901bd795d --- /dev/null +++ b/retired_benchmarks/vision/classification_and_detection/tools/lglog2csv.py @@ -0,0 +1,69 @@ +""" +collect mlperf loadgen output to csv +""" + +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +import argparse +import re +import time + + +# pylint: disable=missing-docstring + + +def get_args(): + """Parse commandline.""" + parser = argparse.ArgumentParser() + parser.add_argument("--input", required=True, help="input json") + parser.add_argument("--runtime", required=True, help="runtime") + parser.add_argument("--machine", required=True, help="machine") + parser.add_argument("--model", required=True, help="model") + parser.add_argument("--name", required=True, help="name") + args = parser.parse_args() + return args + + +def main(): + args = get_args() + + # print("name,date,machine,runtime,model,mode,qps,mean,latency_90,latency_99") + + now = int(time.time()) + + with open(args.input, "r") as fp: + mode, mean, latency_90, latency_99, qps = None, 0, 0, 0, 0 + for line in fp: + m = re.match("^Scenario\s*:\s*(\w+).*", line) + if m: + mode = m.group(1) + m = re.match("^90.00 percentile latency.*:\s*(\d+).*", line) + if m: + latency_90 = m.group(1) + m = re.match("^99.00 percentile latency.*:\s*(\d+).*", line) + if m: + latency_99 = m.group(1) + m = re.match("^Mean latency.*:\s*(\d+).*", line) + if m: + mean = m.group(1) + m = re.match("^Completed samples per second.*:\s*(\d+).*", line) + if m: + qps = m.group(1) + m = re.match("^QPS w/ loadgen overhead.*:\s*(\d+).*", line) + if m: + qps = m.group(1) + m = re.match("^Samples per second.*:\s*(\d+).*", line) + if m: + qps = m.group(1) + m = re.match("Test Parameters Used.*", line) + if m: + print("{},{},{},{},{},{},{},{},{},{}".format( + args.name, now, args.machine, args.runtime, args.model, + mode, qps, mean, latency_90, latency_99)) + mode, mean, latency_90, latency_99, qps = None, 0, 0, 0, 0 + + +if __name__ == "__main__": + main() diff --git a/retired_benchmarks/vision/classification_and_detection/tools/make_fake_imagenet.sh b/retired_benchmarks/vision/classification_and_detection/tools/make_fake_imagenet.sh new file mode 100755 index 000000000..d1c821929 --- /dev/null +++ b/retired_benchmarks/vision/classification_and_detection/tools/make_fake_imagenet.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +opt=-q + +mkdir -p fake_imagenet/val +cd fake_imagenet/val +wget $opt https://upload.wikimedia.org/wikipedia/commons/thumb/5/57/7weeks_old.JPG/800px-7weeks_old.JPG +wget $opt https://upload.wikimedia.org/wikipedia/commons/thumb/1/15/Welsh_Springer_Spaniel.jpg/800px-Welsh_Springer_Spaniel.jpg +wget $opt https://upload.wikimedia.org/wikipedia/commons/thumb/e/e7/Jammlich_crop.jpg/800px-Jammlich_crop.jpg +wget $opt https://upload.wikimedia.org/wikipedia/commons/thumb/1/15/Pumiforme.JPG/782px-Pumiforme.JPG +wget $opt https://upload.wikimedia.org/wikipedia/commons/thumb/e/e1/Sardinian_Warbler.jpg/800px-Sardinian_Warbler.jpg +wget $opt https://upload.wikimedia.org/wikipedia/commons/thumb/b/b8/Cacatua_moluccensis_-Cincinnati_Zoo-8a.jpg/512px-Cacatua_moluccensis_-Cincinnati_Zoo-8a.jpg +wget $opt https://upload.wikimedia.org/wikipedia/commons/thumb/9/9f/20180630_Tesla_Model_S_70D_2015_midnight_blue_left_front.jpg/800px-20180630_Tesla_Model_S_70D_2015_midnight_blue_left_front.jpg +wget $opt https://upload.wikimedia.org/wikipedia/commons/thumb/b/b8/Porsche_991_silver_IAA.jpg/800px-Porsche_991_silver_IAA.jpg +cd .. + +cat > val_map.txt < /dev/null 2>&1 +if [ $? -ne 0 ]; then + echo "We use tensorflow-onnx to convert tensorflow to onnx." + echo "See https://github.com/onnx/tensorflow-onnx for details." + echo "Install with:" + echo "pip install tf2onnx" + echo "or" + echo "pip install https://github.com/onnx/tensorflow-onnx" + exit 1 +fi + +model=resnet50_v1 +tfmodel=mobilenet_v1_1.0_224_frozen.pb +onnxmodel=mobilenet_v1_1.0_224.onnx +url=http://download.tensorflow.org/models/mobilenet_v1_2018_08_02/mobilenet_v1_1.0_224.tgz +tgz=$(basename $url) + +if [ ! -r $tgz ]; then + wget -q $url + tar zxvf $tgz +fi +python -m tf2onnx.convert --input $tfmodel --output $onnxmodel \ + --fold_const --opset 8 --verbose \ + --inputs-as-nchw input:0 \ + --inputs input:0 \ + --outputs MobilenetV1/Predictions/Reshape_1:0 diff --git a/retired_benchmarks/vision/classification_and_detection/tools/pylintrc b/retired_benchmarks/vision/classification_and_detection/tools/pylintrc new file mode 100755 index 000000000..955e35304 --- /dev/null +++ b/retired_benchmarks/vision/classification_and_detection/tools/pylintrc @@ -0,0 +1,320 @@ +# taken from tensorflow, but indent=4 +[MASTER] + +# Specify a configuration file. +#rcfile= + +# Python code to execute, usually for sys.path manipulation such as +# pygtk.require(). +#init-hook= + +# Profiled execution. +profile=no + +# Add files or directories to the blacklist. They should be base names, not +# paths. +ignore=CVS + +# Pickle collected data for later comparisons. +persistent=yes + +# List of plugins (as comma separated values of python modules names) to load, +# usually to register additional checkers. +load-plugins= + + +[MESSAGES CONTROL] + +# Enable the message, report, category or checker with the given id(s). You can +# either give multiple identifier separated by comma (,) or put this option +# multiple time. See also the "--disable" option for examples. +enable=indexing-exception,old-raise-syntax + +# Disable the message, report, category or checker with the given id(s). You +# can either give multiple identifiers separated by comma (,) or put this +# option multiple times (only on the command line, not in the configuration +# file where it should appear only once).You can also use "--disable=all" to +# disable everything first and then reenable specific checks. For example, if +# you want to run only the similarities checker, you can use "--disable=all +# --enable=similarities". If you want to run only the classes checker, but have +# no Warning level messages displayed, use"--disable=all --enable=classes +# --disable=W" +disable=design,similarities,no-self-use,attribute-defined-outside-init,locally-disabled,star-args,pointless-except,bad-option-value,global-statement,fixme,suppressed-message,useless-suppression,locally-enabled,no-member,no-name-in-module,import-error,unsubscriptable-object,unbalanced-tuple-unpacking,undefined-variable,not-context-manager,useless-object-inheritance + + +# Set the cache size for astng objects. +cache-size=500 + + +[REPORTS] + +# Set the output format. Available formats are text, parseable, colorized, msvs +# (visual studio) and html. You can also give a reporter class, eg +# mypackage.mymodule.MyReporterClass. +output-format=text + +# Put messages in a separate file for each module / package specified on the +# command line instead of printing them on stdout. Reports (if any) will be +# written in a file name "pylint_global.[txt|html]". +files-output=no + +# Tells whether to display a full report or only the messages +reports=no + +# Python expression which should return a note less than 10 (10 is the highest +# note). You have access to the variables errors warning, statement which +# respectively contain the number of errors / warnings messages and the total +# number of statements analyzed. This is used by the global evaluation report +# (RP0004). +evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10) + +# Add a comment according to your evaluation note. This is used by the global +# evaluation report (RP0004). +comment=no + +# Template used to display messages. This is a python new-style format string +# used to format the message information. See doc for all details +#msg-template= + + +[TYPECHECK] + +# Tells whether missing members accessed in mixin class should be ignored. A +# mixin class is detected if its name ends with "mixin" (case insensitive). +ignore-mixin-members=yes + +# List of classes names for which member attributes should not be checked +# (useful for classes with attributes dynamically set). +ignored-classes=SQLObject + +# When zope mode is activated, add a predefined set of Zope acquired attributes +# to generated-members. +zope=no + +# List of members which are set dynamically and missed by pylint inference +# system, and so shouldn't trigger E0201 when accessed. Python regular +# expressions are accepted. +generated-members=REQUEST,acl_users,aq_parent + +# List of decorators that create context managers from functions, such as +# contextlib.contextmanager. +contextmanager-decorators=contextlib.contextmanager,contextlib2.contextmanager + + +[VARIABLES] + +# Tells whether we should check for unused import in __init__ files. +init-import=no + +# A regular expression matching the beginning of the name of dummy variables +# (i.e. not used). +dummy-variables-rgx=^\*{0,2}(_$|unused_|dummy_) + +# List of additional names supposed to be defined in builtins. Remember that +# you should avoid to define new builtins when possible. +additional-builtins= + + +[BASIC] + +# Required attributes for module, separated by a comma +required-attributes= + +# List of builtins function names that should not be used, separated by a comma +bad-functions=apply,input,reduce + + +# Disable the report(s) with the given id(s). +# All non-Google reports are disabled by default. +disable-report=R0001,R0002,R0003,R0004,R0101,R0102,R0201,R0202,R0220,R0401,R0402,R0701,R0801,R0901,R0902,R0903,R0904,R0911,R0912,R0913,R0914,R0915,R0921,R0922,R0923 + +# Regular expression which should only match correct module names +module-rgx=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$ + +# Regular expression which should only match correct module level names +const-rgx=^(_?[A-Z][A-Z0-9_]*|__[a-z0-9_]+__|_?[a-z][a-z0-9_]*)$ + +# Regular expression which should only match correct class names +class-rgx=^_?[A-Z][a-zA-Z0-9]*$ + +# Regular expression which should only match correct function names +function-rgx=^(?:(?P_?[A-Z][a-zA-Z0-9]*)|(?P_?[a-z][a-z0-9_]*))$ + +# Regular expression which should only match correct method names +method-rgx=^(?:(?P__[a-z0-9_]+__|next)|(?P_{0,2}[A-Z][a-zA-Z0-9]*)|(?P_{0,2}[a-z][a-z0-9_]*))$ + +# Regular expression which should only match correct instance attribute names +attr-rgx=^_{0,2}[a-z][a-z0-9_]*$ + +# Regular expression which should only match correct argument names +argument-rgx=^[a-z][a-z0-9_]*$ + +# Regular expression which should only match correct variable names +variable-rgx=^[a-z][a-z0-9_]*$ + +# Regular expression which should only match correct attribute names in class +# bodies +class-attribute-rgx=^(_?[A-Z][A-Z0-9_]*|__[a-z0-9_]+__|_?[a-z][a-z0-9_]*)$ + +# Regular expression which should only match correct list comprehension / +# generator expression variable names +inlinevar-rgx=^[a-z][a-z0-9_]*$ + +# Good variable names which should always be accepted, separated by a comma +good-names=main,_ + +# Bad variable names which should always be refused, separated by a comma +bad-names= + +# Regular expression which should only match function or class names that do +# not require a docstring. +no-docstring-rgx=(__.*__|main) + +# Minimum line length for functions/classes that require docstrings, shorter +# ones are exempt. +docstring-min-length=10 + + +[FORMAT] + +# Maximum number of characters on a single line. +max-line-length=120 + +# Regexp for a line that is allowed to be longer than the limit. +ignore-long-lines=^\s*(# )??$ + +# Allow the body of an if to be on the same line as the test if there is no +# else. +single-line-if-stmt=y + +# List of optional constructs for which whitespace checking is disabled +no-space-check= + +# Maximum number of lines in a module +max-module-lines=99999 + +# String used as indentation unit. This is usually " " (4 spaces) or "\t" (1 +# tab). +indent-string=' ' + + +[SIMILARITIES] + +# Minimum lines number of a similarity. +min-similarity-lines=4 + +# Ignore comments when computing similarities. +ignore-comments=yes + +# Ignore docstrings when computing similarities. +ignore-docstrings=yes + +# Ignore imports when computing similarities. +ignore-imports=no + + +[MISCELLANEOUS] + +# List of note tags to take in consideration, separated by a comma. +notes= + + +[IMPORTS] + +# Deprecated modules which should not be used, separated by a comma +deprecated-modules=regsub,TERMIOS,Bastion,rexec,sets + +# Create a graph of every (i.e. internal and external) dependencies in the +# given file (report RP0402 must not be disabled) +import-graph= + +# Create a graph of external dependencies in the given file (report RP0402 must +# not be disabled) +ext-import-graph= + +# Create a graph of internal dependencies in the given file (report RP0402 must +# not be disabled) +int-import-graph= + + +[CLASSES] + +# List of interface methods to ignore, separated by a comma. This is used for +# instance to not check methods defines in Zope's Interface base class. +ignore-iface-methods=isImplementedBy,deferred,extends,names,namesAndDescriptions,queryDescriptionFor,getBases,getDescriptionFor,getDoc,getName,getTaggedValue,getTaggedValueTags,isEqualOrExtendedBy,setTaggedValue,isImplementedByInstancesOf,adaptWith,is_implemented_by + +# List of method names used to declare (i.e. assign) instance attributes. +defining-attr-methods=__init__,__new__,setUp + +# List of valid names for the first argument in a class method. +valid-classmethod-first-arg=cls,class_ + +# List of valid names for the first argument in a metaclass class method. +valid-metaclass-classmethod-first-arg=mcs + + +[DESIGN] + +# Maximum number of arguments for function / method +max-args=5 + +# Argument names that match this expression will be ignored. Default to name +# with leading underscore +ignored-argument-names=_.* + +# Maximum number of locals for function / method body +max-locals=15 + +# Maximum number of return / yield for function / method body +max-returns=6 + +# Maximum number of branch for function / method body +max-branches=12 + +# Maximum number of statements in function / method body +max-statements=50 + +# Maximum number of parents for a class (see R0901). +max-parents=7 + +# Maximum number of attributes for a class (see R0902). +max-attributes=7 + +# Minimum number of public methods for a class (see R0903). +min-public-methods=2 + +# Maximum number of public methods for a class (see R0904). +max-public-methods=20 + + +[EXCEPTIONS] + +# Exceptions that will emit a warning when being caught. Defaults to +# "Exception" +overgeneral-exceptions=Exception,StandardError,BaseException + + +[AST] + +# Maximum line length for lambdas +short-func-length=1 + +# List of module members that should be marked as deprecated. +# All of the string functions are listed in 4.1.4 Deprecated string functions +# in the Python 2.4 docs. +deprecated-members=string.atof,string.atoi,string.atol,string.capitalize,string.expandtabs,string.find,string.rfind,string.index,string.rindex,string.count,string.lower,string.split,string.rsplit,string.splitfields,string.join,string.joinfields,string.lstrip,string.rstrip,string.strip,string.swapcase,string.translate,string.upper,string.ljust,string.rjust,string.center,string.zfill,string.replace,sys.exitfunc + + +[DOCSTRING] + +# List of exceptions that do not need to be mentioned in the Raises section of +# a docstring. +ignore-exceptions=AssertionError,NotImplementedError,StopIteration,TypeError + + + +[TOKENS] + +# Number of spaces of indent required when the last token on the preceding line +# is an open (, [, or {. +indent-after-paren=4 diff --git a/retired_benchmarks/vision/classification_and_detection/tools/resnet50-to-tflite.sh b/retired_benchmarks/vision/classification_and_detection/tools/resnet50-to-tflite.sh new file mode 100755 index 000000000..39388fc1a --- /dev/null +++ b/retired_benchmarks/vision/classification_and_detection/tools/resnet50-to-tflite.sh @@ -0,0 +1,26 @@ +#!/bin/bash + +# convert resnet50 to tflite + + +model=resnet50_v1 +tfmodel="$model.pb" +url=https://zenodo.org/record/2535873/files/$tfmodel + +if [ ! -r $local ]; then + wget -o $local -q $url +fi + +tflite_convert --graph_def_file $tfmodel --output_file $model.tflite \ + --input_arrays input_tensor \ + --output_arrays ArgMax,softmax_tensor + +tflite_convert --graph_def_file $tfmodel --output_file $model"_quant.tflite" \ + --input_arrays input_tensor \ + --output_arrays ArgMax \ + --inference_type QUANTIZED_UINT8 --inference_input_type QUANTIZED_UINT8 \ + --input_shape=1,224,224,3 \ + --mean_values=128 \ + --std_dev_values=128 \ + --default_ranges_min=0 \ + --default_ranges_max=6 diff --git a/retired_benchmarks/vision/classification_and_detection/tools/resnet_save.py b/retired_benchmarks/vision/classification_and_detection/tools/resnet_save.py new file mode 100755 index 000000000..fca66ea26 --- /dev/null +++ b/retired_benchmarks/vision/classification_and_detection/tools/resnet_save.py @@ -0,0 +1,298 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Contains utility and supporting functions for ResNet. + + This module contains ResNet code which does not directly build layers. This +includes dataset management, hyperparameter and optimizer code, and argument +parsing. Code for defining the ResNet layers can be found in resnet_model.py. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import functools +import math +import multiprocessing +import os + +# pylint: disable=g-bad-import-order +from absl import flags +import tensorflow as tf +from tensorflow.contrib.data.python.ops import threadpool + +from official.resnet import resnet_model +from official.utils.flags import core as flags_core +from official.utils.export import export +from official.utils.logs import hooks_helper +from official.utils.logs import logger +from official.resnet import imagenet_preprocessing +from official.utils.misc import distribution_utils +from official.utils.misc import model_helpers + + + + +def image_bytes_serving_input_fn(image_shape, dtype=tf.float32): + """Serving input fn for raw jpeg images.""" + + def _preprocess_image(image_bytes): + """Preprocess a single raw image.""" + # Bounding box around the whole image. + bbox = tf.constant([0.0, 0.0, 1.0, 1.0], dtype=dtype, shape=[1, 1, 4]) + height, width, num_channels = image_shape + image = imagenet_preprocessing.preprocess_image( + image_bytes, bbox, height, width, num_channels, is_training=False) + return image + + image_bytes_list = tf.placeholder( + shape=[None], dtype=tf.string, name='input_tensor') + images = tf.map_fn( + _preprocess_image, image_bytes_list, back_prop=False, dtype=dtype) + return tf.estimator.export.TensorServingInputReceiver( + images, {'image_bytes': image_bytes_list}) + + + + +def resnet_model_fn(features, labels, mode, model_class, + resnet_size, weight_decay, learning_rate_fn, momentum, + data_format, resnet_version, loss_scale, + loss_filter_fn=None, dtype=resnet_model.DEFAULT_DTYPE, + fine_tune=False): + """Shared functionality for different resnet model_fns. + + Initializes the ResnetModel representing the model layers + and uses that model to build the necessary EstimatorSpecs for + the `mode` in question. For training, this means building losses, + the optimizer, and the train op that get passed into the EstimatorSpec. + For evaluation and prediction, the EstimatorSpec is returned without + a train op, but with the necessary parameters for the given mode. + + Args: + features: tensor representing input images + labels: tensor representing class labels for all input images + mode: current estimator mode; should be one of + `tf.estimator.ModeKeys.TRAIN`, `EVALUATE`, `PREDICT` + model_class: a class representing a TensorFlow model that has a __call__ + function. We assume here that this is a subclass of ResnetModel. + resnet_size: A single integer for the size of the ResNet model. + weight_decay: weight decay loss rate used to regularize learned variables. + learning_rate_fn: function that returns the current learning rate given + the current global_step + momentum: momentum term used for optimization + data_format: Input format ('channels_last', 'channels_first', or None). + If set to None, the format is dependent on whether a GPU is available. + resnet_version: Integer representing which version of the ResNet network to + use. See README for details. Valid values: [1, 2] + loss_scale: The factor to scale the loss for numerical stability. A detailed + summary is present in the arg parser help text. + loss_filter_fn: function that takes a string variable name and returns + True if the var should be included in loss calculation, and False + otherwise. If None, batch_normalization variables will be excluded + from the loss. + dtype: the TensorFlow dtype to use for calculations. + fine_tune: If True only train the dense layers(final layers). + + Returns: + EstimatorSpec parameterized according to the input params and the + current mode. + """ + + model = model_class(resnet_size, data_format, resnet_version=resnet_version, + dtype=dtype) + + logits = model(features, mode == tf.estimator.ModeKeys.TRAIN) + + # This acts as a no-op if the logits are already in fp32 (provided logits are + # not a SparseTensor). If dtype is is low precision, logits must be cast to + # fp32 for numerical stability. + logits = tf.cast(logits, tf.float32) + + predictions = { + 'classes': tf.argmax(logits, axis=1), + 'probabilities': tf.nn.softmax(logits, name='softmax_tensor') + } + + if mode == tf.estimator.ModeKeys.PREDICT: + # Return the predictions and the specification for serving a SavedModel + return tf.estimator.EstimatorSpec( + mode=mode, + predictions=predictions, + export_outputs={ + 'predict': tf.estimator.export.PredictOutput(predictions) + }) + + # Calculate loss, which includes softmax cross entropy and L2 regularization. + cross_entropy = tf.losses.sparse_softmax_cross_entropy( + logits=logits, labels=labels) + + # Create a tensor named cross_entropy for logging purposes. + tf.identity(cross_entropy, name='cross_entropy') + + # If no loss_filter_fn is passed, assume we want the default behavior, + # which is that batch_normalization variables are excluded from loss. + def exclude_batch_norm(name): + return 'batch_normalization' not in name + loss_filter_fn = loss_filter_fn or exclude_batch_norm + + # Add weight decay to the loss. + l2_loss = weight_decay * tf.add_n( + # loss is computed using fp32 for numerical stability. + [tf.nn.l2_loss(tf.cast(v, tf.float32)) for v in tf.trainable_variables() + if loss_filter_fn(v.name)]) + tf.summary.scalar('l2_loss', l2_loss) + loss = cross_entropy + l2_loss + + return tf.estimator.EstimatorSpec( + mode=mode, + predictions=predictions, + loss=loss, + train_op=train_op, + eval_metric_ops=metrics) + + +def resnet_main( + flags_obj, model_function, input_function, dataset_name, shape=None): + """Shared main loop for ResNet Models. + + Args: + flags_obj: An object containing parsed flags. See define_resnet_flags() + for details. + model_function: the function that instantiates the Model and builds the + ops for train/eval. This will be passed directly into the estimator. + input_function: the function that processes the dataset and returns a + dataset that the estimator can train on. This will be wrapped with + all the relevant flags for running and passed to estimator. + dataset_name: the name of the dataset for training and evaluation. This is + used for logging purpose. + shape: list of ints representing the shape of the images used for training. + This is only used if flags_obj.export_dir is passed. + """ + + print("RESNET MAIN") + model_helpers.apply_clean(flags.FLAGS) + + # Ensures flag override logic is only executed if explicitly triggered. + if flags_obj.tf_gpu_thread_mode: + override_flags_and_set_envars_for_gpu_thread_pool(flags_obj) + + # Creates session config. allow_soft_placement = True, is required for + # multi-GPU and is not harmful for other modes. + session_config = tf.ConfigProto(allow_soft_placement=True) + + run_config = tf.estimator.RunConfig( + session_config=session_config, + save_checkpoints_secs=60*60*24) + + # Initializes model with all but the dense layer from pretrained ResNet. + if flags_obj.pretrained_model_checkpoint_path is not None: + warm_start_settings = tf.estimator.WarmStartSettings( + flags_obj.pretrained_model_checkpoint_path, + vars_to_warm_start='^(?!.*dense)') + else: + warm_start_settings = None + + classifier = tf.estimator.Estimator( + model_fn=model_function, model_dir=flags_obj.model_dir, config=run_config, + warm_start_from=warm_start_settings, params={ + 'resnet_size': int(flags_obj.resnet_size), + 'data_format': flags_obj.data_format, + 'batch_size': flags_obj.batch_size, + 'resnet_version': int(flags_obj.resnet_version), + 'loss_scale': flags_core.get_loss_scale(flags_obj), + 'dtype': flags_core.get_tf_dtype(flags_obj), + 'fine_tune': flags_obj.fine_tune + }) + + run_params = { + 'batch_size': flags_obj.batch_size, + 'dtype': flags_core.get_tf_dtype(flags_obj), + 'resnet_size': flags_obj.resnet_size, + 'resnet_version': flags_obj.resnet_version, + 'synthetic_data': flags_obj.use_synthetic_data, + 'train_epochs': flags_obj.train_epochs, + } + + def input_fn_eval(): + return input_function( + is_training=False, + data_dir=flags_obj.data_dir, + batch_size=distribution_utils.per_device_batch_size( + flags_obj.batch_size, flags_core.get_num_gpus(flags_obj)), + num_epochs=1, + dtype=flags_core.get_tf_dtype(flags_obj)) + + schedule, n_loops = [0], 1 + if flags_obj.export_dir is not None: + # Exports a saved model for the given classifier. + export_dtype = flags_core.get_tf_dtype(flags_obj) + if flags_obj.image_bytes_as_serving_input: + input_receiver_fn = functools.partial( + image_bytes_serving_input_fn, shape, dtype=export_dtype) + else: + input_receiver_fn = export.build_tensor_serving_input_receiver_fn( + shape, batch_size=flags_obj.batch_size, dtype=export_dtype) + classifier.export_savedmodel(flags_obj.export_dir, input_receiver_fn, + strip_default_attrs=True) + + +def define_resnet_flags(resnet_size_choices=None): + """Add flags and validators for ResNet.""" + flags_core.define_base() + flags_core.define_performance(num_parallel_calls=False, + tf_gpu_thread_mode=True, + datasets_num_private_threads=True, + datasets_num_parallel_batches=True) + flags_core.define_image() + flags_core.define_benchmark() + flags.adopt_module_key_flags(flags_core) + + flags.DEFINE_enum( + name='resnet_version', short_name='rv', default='1', + enum_values=['1', '2'], + help=flags_core.help_wrap( + 'Version of ResNet. (1 or 2) See README.md for details.')) + flags.DEFINE_bool( + name='fine_tune', short_name='ft', default=False, + help=flags_core.help_wrap( + 'If True do not train any parameters except for the final layer.')) + flags.DEFINE_string( + name='pretrained_model_checkpoint_path', short_name='pmcp', default=None, + help=flags_core.help_wrap( + 'If not None initialize all the network except the final layer with ' + 'these values')) + flags.DEFINE_boolean( + name='eval_only', default=False, + help=flags_core.help_wrap('Skip training and only perform evaluation on ' + 'the latest checkpoint.')) + flags.DEFINE_boolean( + name='image_bytes_as_serving_input', default=False, + help=flags_core.help_wrap( + 'If True exports savedmodel with serving signature that accepts ' + 'JPEG image bytes instead of a fixed size [HxWxC] tensor that ' + 'represents the image. The former is easier to use for serving at ' + 'the expense of image resize/cropping being done as part of model ' + 'inference. Note, this flag only applies to ImageNet and cannot ' + 'be used for CIFAR.')) + + choice_kwargs = dict( + name='resnet_size', short_name='rs', default='50', + help=flags_core.help_wrap('The size of the ResNet model to use.')) + + if resnet_size_choices is None: + flags.DEFINE_string(**choice_kwargs) + else: + flags.DEFINE_enum(enum_values=resnet_size_choices, **choice_kwargs) diff --git a/retired_benchmarks/vision/classification_and_detection/tools/ssd-nhwc.py b/retired_benchmarks/vision/classification_and_detection/tools/ssd-nhwc.py new file mode 100644 index 000000000..f255fd965 --- /dev/null +++ b/retired_benchmarks/vision/classification_and_detection/tools/ssd-nhwc.py @@ -0,0 +1,267 @@ +import os +import sys +import argparse +import tensorflow as tf +from tensorflow.core.framework import graph_pb2 + +def get_args(): + parser = argparse.ArgumentParser() + parser.add_argument('pbfile') + return parser.parse_args() + +def insert_transpose(graph, a, b, to_nchw): + if not isinstance(b, list): + b = [b] + trans_perm = graph.node.add() + trans_perm.name = a.name + '/transpose/perm' + trans_perm.op = 'Const' + trans_perm.attr['dtype'].type = 3 # DT_INT32 + trans_perm.attr['value'].tensor.dtype = 3 # DT_INT32 + trans_perm.attr['value'].tensor.tensor_shape.dim.add() + trans_perm.attr['value'].tensor.tensor_shape.dim[0].size = 4 + if to_nchw: + trans_perm.attr['value'].tensor.tensor_content = b'\000\000\000\000\003\000\000\000\001\000\000\000\002\000\000\000' + else: + trans_perm.attr['value'].tensor.tensor_content = b'\000\000\000\000\002\000\000\000\003\000\000\000\001\000\000\000' + + trans = graph.node.add() + trans.name = a.name + '/transpose' + trans.op = 'Transpose' + trans.input.append(a.name) + trans.input.append(trans_perm.name) + trans.attr['T'].type = 1 + trans.attr['Tperm'].type = 3 + + for n in b: + inputs = [] + for i in n.input: + if i == a.name: + inputs.append(trans.name) + else: + inputs.append(i) + cnt = len(n.input) + for i in range(0, cnt): + del n.input[0] + for i in range(0, cnt): + n.input.append(inputs[i]) + +def convert_list_nhwc(l): + c = l.i[1] + h = l.i[2] + w = l.i[3] + l.i[1] = h + l.i[2] = w + l.i[3] = c + +def convert_conv_nhwc(node_conv): + node_conv.attr['data_format'].s = b'NHWC' + convert_list_nhwc(node_conv.attr['dilations'].list) + convert_list_nhwc(node_conv.attr['strides'].list) + +def convert_general_nhwc(node): + node.attr['data_format'].s = b'NHWC' + +def convert_mp_nhwc(node_mp): + node_mp.attr['data_format'].s = b'NHWC' + convert_list_nhwc(node_mp.attr['ksize'].list) + convert_list_nhwc(node_mp.attr['strides'].list) + +def convert_image_nhwc(node_image): + c = node_image.attr['shape'].shape.dim[1].size + del node_image.attr['shape'].shape.dim[1] + d = node_image.attr['shape'].shape.dim.add() + d.size = c + +def init_node(n): + node = {} + node['node'] = n + node['inputs'] = [] + node['outputs'] = [] + return node + +def connect_nodes(n1, n2): + if n2['node'].name not in n1['outputs']: + n1['outputs'].append(n2['node'].name) + n2['inputs'].append(n1['node'].name) + else: + print('{} -> {} already connected'.format(n1['node'].name, n2['node'].name)) + +def disconnect_nodes(n1, n2): + if n1['node'].name not in n2['inputs'] or n2['node'].name not in n1['outputs']: + print('{} -> {} not connected'.format(n1['node'].name, n2['node'].name)) + for i in range(0, len(n1['outputs'])): + if n1['outputs'][i] == n2['node'].name: + del n1['outputs'][i] + break + for i in range(0, len(n2['inputs'])): + if n2['inputs'][i] == n1['node'].name: + del n2['inputs'][i] + break + +def build_graph(graph): + node_map = {} + for n in graph.node: + node = init_node(n) + node_map[n.name] = node + for n in node_map: + for i in node_map[n]['node'].input: + if ':' in i: + i = i[:i.find(':')] + i = i.lstrip('^') + if i not in node_map: + print('node {} not found'.format(i)) + else: + connect_nodes(node_map[i], node_map[n]) + return node_map + +def trim_const_from_graph(node_map): + trim_list = [] + for n in node_map: + if node_map[n]['node'].op == 'Const': + trim_list.append(n) + for n in trim_list: + print('trimming {}'.format(n)) + for o in node_map[n]['outputs']: + disconnect_nodes(node_map[n], node_map[o]) + del node_map[n] + + trim_list = [] + for n in node_map: + if node_map[n]['node'].op == 'Identity' and len(node_map[n]['inputs']) == 0: + trim_list.append(n) + for n in trim_list: + print('trimming {}'.format(n)) + for o in node_map[n]['outputs']: + disconnect_nodes(node_map[n], node_map[o]) + del node_map[n] + + +def all_input_in_nhwc(n, node_map, nhwc_nodes): + for i in node_map[n]['inputs']: + if i not in nhwc_nodes: + return False + return True + +def all_output_in_nhwc(n, node_map, nhwc_nodes): + for o in node_map[n]['outputs']: + if o not in nhwc_nodes: + return False + return True + +def find_nhwc_region(node_map): + transpose_nhwc_nodes = {} + transpose_nchw_nodes = {} + nhwc_nodes = [] + + transpose_nhwc_nodes_append_list = [] + transpose_nchw_nodes_append_list = [] + for n in node_map: + if node_map[n]['node'].op == 'Conv2D': + transpose_nhwc_nodes_append_list.append(n) + transpose_nchw_nodes_append_list.append(n) + nhwc_nodes.append(n) + for n in transpose_nhwc_nodes_append_list: + if not all_input_in_nhwc(n, node_map, nhwc_nodes): + transpose_nhwc_nodes[n] = 1 + for n in transpose_nchw_nodes_append_list: + if not all_output_in_nhwc(n, node_map, nhwc_nodes): + transpose_nchw_nodes[n] = 1 + + prev_cnt_nhwc_nodes = len(nhwc_nodes) + nhwc_op_list = ['Conv2D', 'Relu', 'FusedBatchNorm', 'MaxPool', 'BiasAdd', 'Add'] + while True: + transpose_nchw_nodes_append_list = [] + for n in transpose_nchw_nodes: + for o in node_map[n]['outputs']: + if o not in nhwc_nodes and node_map[o]['node'].op in nhwc_op_list: + if all_input_in_nhwc(o, node_map, nhwc_nodes): + nhwc_nodes.append(o) + if o not in transpose_nchw_nodes_append_list: + transpose_nchw_nodes_append_list.append(o) + + transpose_nhwc_nodes_remove_list = [] + transpose_nchw_nodes_remove_list = [] + for n in transpose_nhwc_nodes: + if (all_input_in_nhwc(n, node_map, nhwc_nodes) and + n not in transpose_nhwc_nodes_remove_list): + transpose_nhwc_nodes_remove_list.append(n) + for n in transpose_nhwc_nodes_remove_list: + del transpose_nhwc_nodes[n] + + for n in transpose_nchw_nodes: + if (all_output_in_nhwc(n, node_map, nhwc_nodes) and + n not in transpose_nchw_nodes_remove_list): + transpose_nchw_nodes_remove_list.append(n) + for n in transpose_nchw_nodes_remove_list: + del transpose_nchw_nodes[n] + + for n in transpose_nchw_nodes_append_list: + if not all_output_in_nhwc(n, node_map, nhwc_nodes): + transpose_nchw_nodes[n] = 1 + + if len(nhwc_nodes) == prev_cnt_nhwc_nodes: + break + prev_cnt_nhwc_nodes = len(nhwc_nodes) + + print('\n\nTranspose to NHWC at nodes:') + for n in transpose_nhwc_nodes: + print(' {}'.format(n)) + + print('\n\nTranspose to NCHW at nodes:') + for n in transpose_nchw_nodes: + print(' {}'.format(n)) + + return nhwc_nodes, transpose_nhwc_nodes, transpose_nchw_nodes + +def main(): + args = get_args() + + graph = graph_pb2.GraphDef() + with open(args.pbfile, 'rb') as f: + graph.ParseFromString(f.read()) + + node_map = build_graph(graph) + trim_const_from_graph(node_map) + + nhwc_nodes, transpose_nhwc_nodes, transpose_nchw_nodes = find_nhwc_region(node_map) + + nhwc_op_list = ['Conv2D', 'Relu', 'FusedBatchNorm', 'MaxPool', 'BiasAdd', 'Add'] + for n in nhwc_nodes: + if node_map[n]['node'].op == 'Conv2D': + convert_conv_nhwc(node_map[n]['node']) + elif node_map[n]['node'].op in ['FusedBatchNorm', 'BiasAdd']: + convert_general_nhwc(node_map[n]['node']) + elif node_map[n]['node'].op == 'MaxPool': + convert_mp_nhwc(node_map[n]['node']) + + done_nhwc = False + if len(transpose_nhwc_nodes) == 1: + for n in transpose_nhwc_nodes: + if len(node_map[n]['inputs']) == 1 and node_map[n]['inputs'][0] == 'image': + image_outputs = [] + for o in node_map['image']['outputs']: + if o != n: + image_outputs.append(node_map[o]['node']) + insert_transpose(graph, node_map['image']['node'], image_outputs, True) + convert_image_nhwc(node_map['image']['node']) + done_nhwc = True + + if not done_nhwc: + for n in transpose_nhwc_nodes: + for i in node_map[n]['inputs']: + if i not in nhwc_nodes: + insert_transpose(graph, node_map[i]['node'], node_map[n]['node'], False) + + for n in transpose_nchw_nodes: + node_outputs = [] + for o in node_map[n]['outputs']: + if o not in nhwc_nodes: + node_outputs.append(node_map[o]['node']) + insert_transpose(graph, node_map[n]['node'], node_outputs, True) + + with open(args.pbfile+'.patch', 'wb') as f: + f.write(graph.SerializeToString()) + +if __name__ == '__main__': + main() + diff --git a/retired_benchmarks/vision/classification_and_detection/user.conf b/retired_benchmarks/vision/classification_and_detection/user.conf new file mode 100644 index 000000000..545569c1a --- /dev/null +++ b/retired_benchmarks/vision/classification_and_detection/user.conf @@ -0,0 +1,6 @@ +# Please set these fields depending on the performance of your system to +# override default LoadGen settings. +*.SingleStream.target_latency = 10 +*.Server.target_qps = 1.0 +*.Offline.target_qps = 1.0 +*.MultiStream.samples_per_query = 4 \ No newline at end of file