From a4228df1b9e2e896b6740a9098595d0663357bb1 Mon Sep 17 00:00:00 2001 From: yuwenzho Date: Thu, 16 Mar 2023 09:00:37 +0800 Subject: [PATCH] Migrate onnx nlp and obj examples into 2.x API (#579) Signed-off-by: yuwenzho --- .../scripts/codeScan/pyspelling/inc_dict.txt | 3 + examples/.config/model_params_onnxrt.json | 328 +++++---- examples/README.md | 134 +++- .../bert/quantization/ptq_dynamic/README.md | 59 ++ .../bert/quantization/ptq_dynamic/export.py | 64 ++ .../nlp/bert/quantization/ptq_dynamic/main.py | 411 +++++++++++ .../quantization/ptq_dynamic/prepare_data.sh | 34 + .../quantization/ptq_dynamic/prepare_model.sh | 41 ++ .../quantization/ptq_dynamic/requirements.txt | 7 + .../quantization/ptq_dynamic/run_benchmark.sh | 61 ++ .../quantization/ptq_dynamic/run_tuning.sh | 48 ++ .../bert/quantization/ptq_static/README.md | 69 ++ .../bert/quantization/ptq_static/export.py | 64 ++ .../nlp/bert/quantization/ptq_static/main.py | 422 +++++++++++ .../quantization/ptq_static/prepare_data.sh | 34 + .../quantization/ptq_static/prepare_model.sh | 41 ++ .../quantization/ptq_static/requirements.txt | 7 + .../quantization/ptq_static/run_benchmark.sh | 61 ++ .../quantization/ptq_static/run_tuning.sh | 53 ++ .../quantization/ptq_dynamic/README.md | 79 ++ .../quantization/ptq_dynamic/export.py | 60 ++ .../quantization/ptq_dynamic/main.py | 404 +++++++++++ .../quantization/ptq_dynamic/prepare_data.sh | 34 + .../quantization/ptq_dynamic/prepare_model.sh | 39 + .../quantization/ptq_dynamic/readme.md | 69 ++ .../quantization/ptq_dynamic/requirements.txt | 8 + .../quantization/ptq_dynamic/run_benchmark.sh | 62 ++ .../quantization/ptq_dynamic/run_tuning.sh | 49 ++ .../quantization/ptq_static/README.md | 80 +++ .../quantization/ptq_static/export.py | 60 ++ .../quantization/ptq_static/main.py | 414 +++++++++++ .../quantization/ptq_static/prepare_data.sh | 34 + .../quantization/ptq_static/prepare_model.sh | 39 + .../quantization/ptq_static/readme.md | 70 ++ .../quantization/ptq_static/requirements.txt | 8 + .../quantization/ptq_static/run_benchmark.sh | 62 ++ .../quantization/ptq_static/run_tuning.sh | 53 ++ .../quantization/ptq_dynamic/README.md | 13 +- .../quantization/ptq_dynamic/export.py | 58 +- .../quantization/ptq_dynamic/main.py | 39 +- .../quantization/ptq_dynamic/requirements.txt | 5 +- .../quantization/ptq_dynamic/run_benchmark.sh | 12 +- .../quantization/ptq_dynamic/run_tuning.sh | 18 +- .../quantization/ptq_dynamic/utils_model.py | 8 +- .../quantization/ptq_static/README.md | 10 +- .../quantization/ptq_static/export.py | 3 +- .../quantization/ptq_static/main.py | 45 +- .../quantization/ptq_static/requirements.txt | 5 +- .../quantization/ptq_static/run_benchmark.sh | 4 +- .../quantization/ptq_static/run_tuning.sh | 12 +- .../quantization/ptq_static/utils_model.py | 8 +- .../quantization/ptq_dynamic/README.md | 7 +- .../quantization/ptq_dynamic/export.py | 16 +- .../quantization/ptq_dynamic/main.py | 37 +- .../quantization/ptq_dynamic/requirements.txt | 2 +- .../quantization/ptq_dynamic/run_benchmark.sh | 22 +- .../quantization/ptq_dynamic/run_tuning.sh | 32 +- .../quantization/ptq_static/README.md | 13 +- .../quantization/ptq_static/main.py | 47 +- .../quantization/ptq_static/requirements.txt | 2 +- .../quantization/ptq_static/run_benchmark.sh | 30 +- .../quantization/ptq_static/run_tuning.sh | 48 +- .../quantization/ptq_dynamic/README.md | 82 +++ .../quantization/ptq_dynamic/export.py | 61 ++ .../quantization/ptq_dynamic/main.py | 412 +++++++++++ .../quantization/ptq_dynamic/prepare_data.sh | 34 + .../quantization/ptq_dynamic/prepare_model.sh | 39 + .../quantization/ptq_dynamic/requirements.txt | 7 + .../quantization/ptq_dynamic/run_benchmark.sh | 62 ++ .../quantization/ptq_dynamic/run_tuning.sh | 49 ++ .../quantization/ptq_static/README.md | 83 +++ .../quantization/ptq_static/export.py | 61 ++ .../quantization/ptq_static/main.py | 422 +++++++++++ .../quantization/ptq_static/prepare_data.sh | 34 + .../quantization/ptq_static/prepare_model.sh | 39 + .../quantization/ptq_static/requirements.txt | 7 + .../quantization/ptq_static/run_benchmark.sh | 62 ++ .../quantization/ptq_static/run_tuning.sh | 53 ++ .../{ptq => ptq_dynamic}/README.md | 14 +- .../quantization/{ptq => ptq_dynamic}/main.py | 3 +- .../{ptq => ptq_dynamic}/requirements.txt | 1 + .../{ptq => ptq_dynamic}/run_benchmark.sh | 0 .../{ptq => ptq_dynamic}/run_tuning.sh | 0 .../{ptq => ptq_dynamic}/README.md | 14 +- .../quantization/{ptq => ptq_dynamic}/main.py | 1 - .../{ptq => ptq_dynamic}/requirements.txt | 0 .../{ptq => ptq_dynamic}/run_benchmark.sh | 0 .../{ptq => ptq_dynamic}/run_onnx_squad.py | 0 .../{ptq => ptq_dynamic}/run_tuning.sh | 0 .../{ptq => ptq_dynamic}/squad_evaluate.py | 0 .../{ptq => ptq_dynamic}/tokenization.py | 0 .../{ptq => ptq_dynamic}/README.md | 17 +- .../{ptq => ptq_dynamic}/export.py | 0 .../quantization/{ptq => ptq_dynamic}/gpt2.py | 65 +- .../{ptq => ptq_dynamic}/requirements.txt | 1 + .../{ptq => ptq_dynamic}/run_benchmark.sh | 5 +- .../{ptq => ptq_dynamic}/run_tuning.sh | 3 +- .../{ptq => ptq_dynamic}/README.md | 19 +- .../quantization/ptq_dynamic/main.py | 163 +++++ .../{ptq => ptq_dynamic}/requirements.txt | 1 + .../quantization/ptq_dynamic/run_benchmark.sh | 42 ++ .../{ptq => ptq_dynamic}/run_onnx_squad.py | 4 +- .../quantization/ptq_dynamic}/run_tuning.sh | 8 +- .../{ptq => ptq_dynamic}/squad_evaluate.py | 0 .../{ptq => ptq_dynamic}/tokenization.py | 0 .../quantization/ptq_static/README.md | 63 ++ .../quantization/{ptq => ptq_static}/main.py | 13 +- .../quantization/ptq_static/requirements.txt | 8 + .../quantization/ptq_static/run_benchmark.sh | 42 ++ .../quantization/ptq_static/run_onnx_squad.py | 581 +++++++++++++++ .../{ptq => ptq_static}/run_tuning.sh | 0 .../quantization/ptq_static/squad_evaluate.py | 108 +++ .../quantization/ptq_static/tokenization.py | 399 +++++++++++ .../quantization/ptq_dynamic/README.md | 79 ++ .../quantization/ptq_dynamic/export.py | 61 ++ .../roberta/quantization/ptq_dynamic/main.py | 412 +++++++++++ .../quantization/ptq_dynamic/prepare_data.sh | 34 + .../quantization/ptq_dynamic/prepare_model.sh | 41 ++ .../quantization/ptq_dynamic/requirements.txt | 7 + .../quantization/ptq_dynamic/run_benchmark.sh | 62 ++ .../quantization/ptq_dynamic/run_tuning.sh | 49 ++ .../roberta/quantization/ptq_static/README.md | 82 +++ .../roberta/quantization/ptq_static/export.py | 61 ++ .../roberta/quantization/ptq_static/main.py | 422 +++++++++++ .../quantization/ptq_static/prepare_data.sh | 34 + .../quantization/ptq_static/prepare_model.sh | 41 ++ .../quantization/ptq_static/requirements.txt | 7 + .../quantization/ptq_static/run_benchmark.sh | 62 ++ .../quantization/ptq_static/run_tuning.sh | 53 ++ .../{ptq => ptq_static}/README.md | 21 +- .../{ptq => ptq_static}/cityscapes_labels.py | 0 .../quantization/{ptq => ptq_static}/main.py | 24 +- .../{ptq => ptq_static}/requirements.txt | 0 .../{ptq => ptq_static}/run_benchmark.sh | 3 +- .../DUC/quantization/ptq_static/run_tuning.sh | 43 ++ .../quantization/ptq_static/README.md | 71 ++ .../quantization/ptq_static/coco_label_map.py | 103 +++ .../quantization/ptq_static/coco_tools.py | 672 ++++++++++++++++++ .../quantization/ptq_static/label_map.yaml | 80 +++ .../quantization/ptq_static/main.py | 398 +++++++++++ .../quantization/ptq_static/requirements.txt | 4 + .../quantization/ptq_static/run_benchmark.sh | 49 ++ .../quantization/ptq_static/run_tuning.sh | 51 ++ .../quantization/ptq_static/README.md | 71 ++ .../quantization/ptq_static/coco_label_map.py | 103 +++ .../quantization/ptq_static/coco_tools.py | 672 ++++++++++++++++++ .../quantization/ptq_static/label_map.yaml | 80 +++ .../mask_rcnn/quantization/ptq_static/main.py | 397 +++++++++++ .../quantization/ptq_static/requirements.txt | 4 + .../quantization/ptq_static/run_benchmark.sh | 49 ++ .../quantization/ptq_static/run_tuning.sh | 51 ++ .../ssd/quantization/ptq_static/README.md | 71 ++ .../quantization/ptq_static/coco_label_map.py | 103 +++ .../ssd/quantization/ptq_static/coco_tools.py | 672 ++++++++++++++++++ .../ssd/quantization/ptq_static/data_utils.py | 471 ++++++++++++ .../quantization/ptq_static/label_map.yaml | 80 +++ .../ssd/quantization/ptq_static/main.py | 161 +++++ .../quantization/ptq_static/requirements.txt | 6 + .../quantization/ptq_static/run_benchmark.sh | 49 ++ .../ssd/quantization/ptq_static/run_tuning.sh | 51 ++ .../quantization/ptq_static/README.md | 71 ++ .../quantization/ptq_static/coco_label_map.py | 103 +++ .../quantization/ptq_static/coco_tools.py | 672 ++++++++++++++++++ .../quantization/ptq_static/data_utils.py | 470 ++++++++++++ .../quantization/ptq_static/label_map.yaml | 80 +++ .../quantization/ptq_static/main.py | 153 ++++ .../quantization/ptq_static/requirements.txt | 4 + .../quantization/ptq_static/run_benchmark.sh | 51 ++ .../quantization/ptq_static/run_tuning.sh | 51 ++ .../quantization/ptq_static/README.md | 62 ++ .../quantization/ptq_static/coco_label_map.py | 103 +++ .../quantization/ptq_static/coco_tools.py | 672 ++++++++++++++++++ .../quantization/ptq_static/label_map.yaml | 80 +++ .../quantization/ptq_static/main.py | 401 +++++++++++ .../quantization/ptq_static/requirements.txt | 4 + .../quantization/ptq_static/run_benchmark.sh | 51 ++ .../quantization/ptq_static/run_tuning.sh | 51 ++ .../yolov3/quantization/ptq_static/README.md | 61 ++ .../quantization/ptq_static/coco_label_map.py | 103 +++ .../quantization/ptq_static/coco_tools.py | 672 ++++++++++++++++++ .../quantization/ptq_static/label_map.yaml | 80 +++ .../yolov3/quantization/ptq_static/main.py | 403 +++++++++++ .../quantization/ptq_static/requirements.txt | 4 + .../quantization/ptq_static/run_benchmark.sh | 51 ++ .../quantization/ptq_static/run_tuning.sh | 51 ++ .../yolov4/quantization/ptq_static/README.md | 61 ++ .../quantization/ptq_static/coco_label_map.py | 103 +++ .../quantization/ptq_static/coco_tools.py | 672 ++++++++++++++++++ .../quantization/ptq_static/label_map.yaml | 80 +++ .../yolov4/quantization/ptq_static/main.py | 546 ++++++++++++++ .../quantization/ptq_static/requirements.txt | 4 + .../quantization/ptq_static/run_benchmark.sh | 51 ++ .../quantization/ptq_static/run_tuning.sh | 51 ++ .../ptq_static/yolov4_anchors.txt | 1 + .../quantization/ptq_static/README.md | 71 ++ .../quantization/ptq_static/coco_label_map.py | 103 +++ .../quantization/ptq_static/coco_tools.py | 672 ++++++++++++++++++ .../quantization/ptq_static/data_utils.py | 470 ++++++++++++ .../quantization/ptq_static/main.py | 151 ++++ .../quantization/ptq_static/readme.md | 47 ++ .../quantization/ptq_static/requirements.txt | 6 + .../quantization/ptq_static}/run_benchmark.sh | 1 - .../quantization/ptq_static/run_tuning.sh | 43 ++ .../quantization/ptq_static/README.md | 72 ++ .../quantization/ptq_static/coco_label_map.py | 103 +++ .../quantization/ptq_static/coco_tools.py | 672 ++++++++++++++++++ .../quantization/ptq_static/data_utils.py | 470 ++++++++++++ .../quantization/ptq_static/main.py | 152 ++++ .../quantization/ptq_static/requirements.txt | 6 + .../quantization/ptq_static/run_benchmark.sh | 44 ++ .../quantization/ptq_static/run_tuning.sh | 43 ++ neural_compressor/adaptor/onnxrt.py | 5 +- neural_compressor/config.py | 2 +- neural_compressor/model/onnx_model.py | 24 +- test/config/test_config.py | 2 +- 215 files changed, 22040 insertions(+), 452 deletions(-) create mode 100644 examples/onnxrt/nlp/bert/quantization/ptq_dynamic/README.md create mode 100644 examples/onnxrt/nlp/bert/quantization/ptq_dynamic/export.py create mode 100644 examples/onnxrt/nlp/bert/quantization/ptq_dynamic/main.py create mode 100644 examples/onnxrt/nlp/bert/quantization/ptq_dynamic/prepare_data.sh create mode 100644 examples/onnxrt/nlp/bert/quantization/ptq_dynamic/prepare_model.sh create mode 100644 examples/onnxrt/nlp/bert/quantization/ptq_dynamic/requirements.txt create mode 100644 examples/onnxrt/nlp/bert/quantization/ptq_dynamic/run_benchmark.sh create mode 100644 examples/onnxrt/nlp/bert/quantization/ptq_dynamic/run_tuning.sh create mode 100644 examples/onnxrt/nlp/bert/quantization/ptq_static/README.md create mode 100644 examples/onnxrt/nlp/bert/quantization/ptq_static/export.py create mode 100644 examples/onnxrt/nlp/bert/quantization/ptq_static/main.py create mode 100644 examples/onnxrt/nlp/bert/quantization/ptq_static/prepare_data.sh create mode 100644 examples/onnxrt/nlp/bert/quantization/ptq_static/prepare_model.sh create mode 100644 examples/onnxrt/nlp/bert/quantization/ptq_static/requirements.txt create mode 100644 examples/onnxrt/nlp/bert/quantization/ptq_static/run_benchmark.sh create mode 100644 examples/onnxrt/nlp/bert/quantization/ptq_static/run_tuning.sh create mode 100644 examples/onnxrt/nlp/distilbert/quantization/ptq_dynamic/README.md create mode 100644 examples/onnxrt/nlp/distilbert/quantization/ptq_dynamic/export.py create mode 100644 examples/onnxrt/nlp/distilbert/quantization/ptq_dynamic/main.py create mode 100644 examples/onnxrt/nlp/distilbert/quantization/ptq_dynamic/prepare_data.sh create mode 100644 examples/onnxrt/nlp/distilbert/quantization/ptq_dynamic/prepare_model.sh create mode 100644 examples/onnxrt/nlp/distilbert/quantization/ptq_dynamic/readme.md create mode 100644 examples/onnxrt/nlp/distilbert/quantization/ptq_dynamic/requirements.txt create mode 100644 examples/onnxrt/nlp/distilbert/quantization/ptq_dynamic/run_benchmark.sh create mode 100644 examples/onnxrt/nlp/distilbert/quantization/ptq_dynamic/run_tuning.sh create mode 100644 examples/onnxrt/nlp/distilbert/quantization/ptq_static/README.md create mode 100644 examples/onnxrt/nlp/distilbert/quantization/ptq_static/export.py create mode 100644 examples/onnxrt/nlp/distilbert/quantization/ptq_static/main.py create mode 100644 examples/onnxrt/nlp/distilbert/quantization/ptq_static/prepare_data.sh create mode 100644 examples/onnxrt/nlp/distilbert/quantization/ptq_static/prepare_model.sh create mode 100644 examples/onnxrt/nlp/distilbert/quantization/ptq_static/readme.md create mode 100644 examples/onnxrt/nlp/distilbert/quantization/ptq_static/requirements.txt create mode 100644 examples/onnxrt/nlp/distilbert/quantization/ptq_static/run_benchmark.sh create mode 100644 examples/onnxrt/nlp/distilbert/quantization/ptq_static/run_tuning.sh create mode 100644 examples/onnxrt/nlp/mobilebert/quantization/ptq_dynamic/README.md create mode 100644 examples/onnxrt/nlp/mobilebert/quantization/ptq_dynamic/export.py create mode 100644 examples/onnxrt/nlp/mobilebert/quantization/ptq_dynamic/main.py create mode 100644 examples/onnxrt/nlp/mobilebert/quantization/ptq_dynamic/prepare_data.sh create mode 100644 examples/onnxrt/nlp/mobilebert/quantization/ptq_dynamic/prepare_model.sh create mode 100644 examples/onnxrt/nlp/mobilebert/quantization/ptq_dynamic/requirements.txt create mode 100644 examples/onnxrt/nlp/mobilebert/quantization/ptq_dynamic/run_benchmark.sh create mode 100644 examples/onnxrt/nlp/mobilebert/quantization/ptq_dynamic/run_tuning.sh create mode 100644 examples/onnxrt/nlp/mobilebert/quantization/ptq_static/README.md create mode 100644 examples/onnxrt/nlp/mobilebert/quantization/ptq_static/export.py create mode 100644 examples/onnxrt/nlp/mobilebert/quantization/ptq_static/main.py create mode 100644 examples/onnxrt/nlp/mobilebert/quantization/ptq_static/prepare_data.sh create mode 100644 examples/onnxrt/nlp/mobilebert/quantization/ptq_static/prepare_model.sh create mode 100644 examples/onnxrt/nlp/mobilebert/quantization/ptq_static/requirements.txt create mode 100644 examples/onnxrt/nlp/mobilebert/quantization/ptq_static/run_benchmark.sh create mode 100644 examples/onnxrt/nlp/mobilebert/quantization/ptq_static/run_tuning.sh rename examples/onnxrt/nlp/onnx_model_zoo/BiDAF/quantization/{ptq => ptq_dynamic}/README.md (83%) rename examples/onnxrt/nlp/onnx_model_zoo/BiDAF/quantization/{ptq => ptq_dynamic}/main.py (98%) rename examples/onnxrt/nlp/onnx_model_zoo/BiDAF/quantization/{ptq => ptq_dynamic}/requirements.txt (94%) rename examples/onnxrt/nlp/onnx_model_zoo/BiDAF/quantization/{ptq => ptq_dynamic}/run_benchmark.sh (100%) rename examples/onnxrt/nlp/onnx_model_zoo/BiDAF/quantization/{ptq => ptq_dynamic}/run_tuning.sh (100%) rename examples/onnxrt/nlp/onnx_model_zoo/bert-squad/quantization/{ptq => ptq_dynamic}/README.md (85%) rename examples/onnxrt/nlp/onnx_model_zoo/bert-squad/quantization/{ptq => ptq_dynamic}/main.py (99%) rename examples/onnxrt/nlp/onnx_model_zoo/bert-squad/quantization/{ptq => ptq_dynamic}/requirements.txt (100%) rename examples/onnxrt/nlp/onnx_model_zoo/bert-squad/quantization/{ptq => ptq_dynamic}/run_benchmark.sh (100%) rename examples/onnxrt/nlp/onnx_model_zoo/bert-squad/quantization/{ptq => ptq_dynamic}/run_onnx_squad.py (100%) rename examples/onnxrt/nlp/onnx_model_zoo/bert-squad/quantization/{ptq => ptq_dynamic}/run_tuning.sh (100%) rename examples/onnxrt/nlp/onnx_model_zoo/bert-squad/quantization/{ptq => ptq_dynamic}/squad_evaluate.py (100%) rename examples/onnxrt/nlp/onnx_model_zoo/bert-squad/quantization/{ptq => ptq_dynamic}/tokenization.py (100%) rename examples/onnxrt/nlp/onnx_model_zoo/gpt2/quantization/{ptq => ptq_dynamic}/README.md (77%) rename examples/onnxrt/nlp/onnx_model_zoo/gpt2/quantization/{ptq => ptq_dynamic}/export.py (100%) rename examples/onnxrt/nlp/onnx_model_zoo/gpt2/quantization/{ptq => ptq_dynamic}/gpt2.py (86%) rename examples/onnxrt/nlp/onnx_model_zoo/gpt2/quantization/{ptq => ptq_dynamic}/requirements.txt (95%) rename examples/onnxrt/nlp/onnx_model_zoo/gpt2/quantization/{ptq => ptq_dynamic}/run_benchmark.sh (88%) rename examples/onnxrt/nlp/onnx_model_zoo/gpt2/quantization/{ptq => ptq_dynamic}/run_tuning.sh (88%) rename examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/{ptq => ptq_dynamic}/README.md (84%) create mode 100644 examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_dynamic/main.py rename examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/{ptq => ptq_dynamic}/requirements.txt (97%) create mode 100644 examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_dynamic/run_benchmark.sh rename examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/{ptq => ptq_dynamic}/run_onnx_squad.py (99%) rename examples/onnxrt/{object_detection/onnx_model_zoo/DUC/quantization/ptq => nlp/onnx_model_zoo/mobilebert/quantization/ptq_dynamic}/run_tuning.sh (93%) rename examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/{ptq => ptq_dynamic}/squad_evaluate.py (100%) rename examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/{ptq => ptq_dynamic}/tokenization.py (100%) create mode 100644 examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_static/README.md rename examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/{ptq => ptq_static}/main.py (93%) create mode 100644 examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_static/requirements.txt create mode 100644 examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_static/run_benchmark.sh create mode 100644 examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_static/run_onnx_squad.py rename examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/{ptq => ptq_static}/run_tuning.sh (100%) create mode 100644 examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_static/squad_evaluate.py create mode 100644 examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_static/tokenization.py create mode 100644 examples/onnxrt/nlp/roberta/quantization/ptq_dynamic/README.md create mode 100644 examples/onnxrt/nlp/roberta/quantization/ptq_dynamic/export.py create mode 100644 examples/onnxrt/nlp/roberta/quantization/ptq_dynamic/main.py create mode 100644 examples/onnxrt/nlp/roberta/quantization/ptq_dynamic/prepare_data.sh create mode 100644 examples/onnxrt/nlp/roberta/quantization/ptq_dynamic/prepare_model.sh create mode 100644 examples/onnxrt/nlp/roberta/quantization/ptq_dynamic/requirements.txt create mode 100644 examples/onnxrt/nlp/roberta/quantization/ptq_dynamic/run_benchmark.sh create mode 100644 examples/onnxrt/nlp/roberta/quantization/ptq_dynamic/run_tuning.sh create mode 100644 examples/onnxrt/nlp/roberta/quantization/ptq_static/README.md create mode 100644 examples/onnxrt/nlp/roberta/quantization/ptq_static/export.py create mode 100644 examples/onnxrt/nlp/roberta/quantization/ptq_static/main.py create mode 100644 examples/onnxrt/nlp/roberta/quantization/ptq_static/prepare_data.sh create mode 100644 examples/onnxrt/nlp/roberta/quantization/ptq_static/prepare_model.sh create mode 100644 examples/onnxrt/nlp/roberta/quantization/ptq_static/requirements.txt create mode 100644 examples/onnxrt/nlp/roberta/quantization/ptq_static/run_benchmark.sh create mode 100644 examples/onnxrt/nlp/roberta/quantization/ptq_static/run_tuning.sh rename examples/onnxrt/object_detection/onnx_model_zoo/DUC/quantization/{ptq => ptq_static}/README.md (65%) rename examples/onnxrt/object_detection/onnx_model_zoo/DUC/quantization/{ptq => ptq_static}/cityscapes_labels.py (100%) rename examples/onnxrt/object_detection/onnx_model_zoo/DUC/quantization/{ptq => ptq_static}/main.py (94%) rename examples/onnxrt/object_detection/onnx_model_zoo/DUC/quantization/{ptq => ptq_static}/requirements.txt (100%) rename examples/onnxrt/object_detection/onnx_model_zoo/DUC/quantization/{ptq => ptq_static}/run_benchmark.sh (91%) create mode 100644 examples/onnxrt/object_detection/onnx_model_zoo/DUC/quantization/ptq_static/run_tuning.sh create mode 100644 examples/onnxrt/object_detection/onnx_model_zoo/faster_rcnn/quantization/ptq_static/README.md create mode 100644 examples/onnxrt/object_detection/onnx_model_zoo/faster_rcnn/quantization/ptq_static/coco_label_map.py create mode 100644 examples/onnxrt/object_detection/onnx_model_zoo/faster_rcnn/quantization/ptq_static/coco_tools.py create mode 100644 examples/onnxrt/object_detection/onnx_model_zoo/faster_rcnn/quantization/ptq_static/label_map.yaml create mode 100644 examples/onnxrt/object_detection/onnx_model_zoo/faster_rcnn/quantization/ptq_static/main.py create mode 100644 examples/onnxrt/object_detection/onnx_model_zoo/faster_rcnn/quantization/ptq_static/requirements.txt create mode 100644 examples/onnxrt/object_detection/onnx_model_zoo/faster_rcnn/quantization/ptq_static/run_benchmark.sh create mode 100644 examples/onnxrt/object_detection/onnx_model_zoo/faster_rcnn/quantization/ptq_static/run_tuning.sh create mode 100644 examples/onnxrt/object_detection/onnx_model_zoo/mask_rcnn/quantization/ptq_static/README.md create mode 100644 examples/onnxrt/object_detection/onnx_model_zoo/mask_rcnn/quantization/ptq_static/coco_label_map.py create mode 100644 examples/onnxrt/object_detection/onnx_model_zoo/mask_rcnn/quantization/ptq_static/coco_tools.py create mode 100644 examples/onnxrt/object_detection/onnx_model_zoo/mask_rcnn/quantization/ptq_static/label_map.yaml create mode 100644 examples/onnxrt/object_detection/onnx_model_zoo/mask_rcnn/quantization/ptq_static/main.py create mode 100644 examples/onnxrt/object_detection/onnx_model_zoo/mask_rcnn/quantization/ptq_static/requirements.txt create mode 100644 examples/onnxrt/object_detection/onnx_model_zoo/mask_rcnn/quantization/ptq_static/run_benchmark.sh create mode 100644 examples/onnxrt/object_detection/onnx_model_zoo/mask_rcnn/quantization/ptq_static/run_tuning.sh create mode 100644 examples/onnxrt/object_detection/onnx_model_zoo/ssd/quantization/ptq_static/README.md create mode 100644 examples/onnxrt/object_detection/onnx_model_zoo/ssd/quantization/ptq_static/coco_label_map.py create mode 100644 examples/onnxrt/object_detection/onnx_model_zoo/ssd/quantization/ptq_static/coco_tools.py create mode 100644 examples/onnxrt/object_detection/onnx_model_zoo/ssd/quantization/ptq_static/data_utils.py create mode 100644 examples/onnxrt/object_detection/onnx_model_zoo/ssd/quantization/ptq_static/label_map.yaml create mode 100644 examples/onnxrt/object_detection/onnx_model_zoo/ssd/quantization/ptq_static/main.py create mode 100644 examples/onnxrt/object_detection/onnx_model_zoo/ssd/quantization/ptq_static/requirements.txt create mode 100644 examples/onnxrt/object_detection/onnx_model_zoo/ssd/quantization/ptq_static/run_benchmark.sh create mode 100644 examples/onnxrt/object_detection/onnx_model_zoo/ssd/quantization/ptq_static/run_tuning.sh create mode 100644 examples/onnxrt/object_detection/onnx_model_zoo/ssd_mobilenet_v1/quantization/ptq_static/README.md create mode 100644 examples/onnxrt/object_detection/onnx_model_zoo/ssd_mobilenet_v1/quantization/ptq_static/coco_label_map.py create mode 100644 examples/onnxrt/object_detection/onnx_model_zoo/ssd_mobilenet_v1/quantization/ptq_static/coco_tools.py create mode 100644 examples/onnxrt/object_detection/onnx_model_zoo/ssd_mobilenet_v1/quantization/ptq_static/data_utils.py create mode 100644 examples/onnxrt/object_detection/onnx_model_zoo/ssd_mobilenet_v1/quantization/ptq_static/label_map.yaml create mode 100644 examples/onnxrt/object_detection/onnx_model_zoo/ssd_mobilenet_v1/quantization/ptq_static/main.py create mode 100644 examples/onnxrt/object_detection/onnx_model_zoo/ssd_mobilenet_v1/quantization/ptq_static/requirements.txt create mode 100755 examples/onnxrt/object_detection/onnx_model_zoo/ssd_mobilenet_v1/quantization/ptq_static/run_benchmark.sh create mode 100755 examples/onnxrt/object_detection/onnx_model_zoo/ssd_mobilenet_v1/quantization/ptq_static/run_tuning.sh create mode 100644 examples/onnxrt/object_detection/onnx_model_zoo/tiny_yolov3/quantization/ptq_static/README.md create mode 100644 examples/onnxrt/object_detection/onnx_model_zoo/tiny_yolov3/quantization/ptq_static/coco_label_map.py create mode 100644 examples/onnxrt/object_detection/onnx_model_zoo/tiny_yolov3/quantization/ptq_static/coco_tools.py create mode 100644 examples/onnxrt/object_detection/onnx_model_zoo/tiny_yolov3/quantization/ptq_static/label_map.yaml create mode 100644 examples/onnxrt/object_detection/onnx_model_zoo/tiny_yolov3/quantization/ptq_static/main.py create mode 100644 examples/onnxrt/object_detection/onnx_model_zoo/tiny_yolov3/quantization/ptq_static/requirements.txt create mode 100644 examples/onnxrt/object_detection/onnx_model_zoo/tiny_yolov3/quantization/ptq_static/run_benchmark.sh create mode 100644 examples/onnxrt/object_detection/onnx_model_zoo/tiny_yolov3/quantization/ptq_static/run_tuning.sh create mode 100644 examples/onnxrt/object_detection/onnx_model_zoo/yolov3/quantization/ptq_static/README.md create mode 100644 examples/onnxrt/object_detection/onnx_model_zoo/yolov3/quantization/ptq_static/coco_label_map.py create mode 100644 examples/onnxrt/object_detection/onnx_model_zoo/yolov3/quantization/ptq_static/coco_tools.py create mode 100644 examples/onnxrt/object_detection/onnx_model_zoo/yolov3/quantization/ptq_static/label_map.yaml create mode 100644 examples/onnxrt/object_detection/onnx_model_zoo/yolov3/quantization/ptq_static/main.py create mode 100644 examples/onnxrt/object_detection/onnx_model_zoo/yolov3/quantization/ptq_static/requirements.txt create mode 100644 examples/onnxrt/object_detection/onnx_model_zoo/yolov3/quantization/ptq_static/run_benchmark.sh create mode 100644 examples/onnxrt/object_detection/onnx_model_zoo/yolov3/quantization/ptq_static/run_tuning.sh create mode 100644 examples/onnxrt/object_detection/onnx_model_zoo/yolov4/quantization/ptq_static/README.md create mode 100644 examples/onnxrt/object_detection/onnx_model_zoo/yolov4/quantization/ptq_static/coco_label_map.py create mode 100644 examples/onnxrt/object_detection/onnx_model_zoo/yolov4/quantization/ptq_static/coco_tools.py create mode 100644 examples/onnxrt/object_detection/onnx_model_zoo/yolov4/quantization/ptq_static/label_map.yaml create mode 100644 examples/onnxrt/object_detection/onnx_model_zoo/yolov4/quantization/ptq_static/main.py create mode 100644 examples/onnxrt/object_detection/onnx_model_zoo/yolov4/quantization/ptq_static/requirements.txt create mode 100644 examples/onnxrt/object_detection/onnx_model_zoo/yolov4/quantization/ptq_static/run_benchmark.sh create mode 100644 examples/onnxrt/object_detection/onnx_model_zoo/yolov4/quantization/ptq_static/run_tuning.sh create mode 100644 examples/onnxrt/object_detection/onnx_model_zoo/yolov4/quantization/ptq_static/yolov4_anchors.txt create mode 100644 examples/onnxrt/object_detection/ssd_mobilenet_v1/quantization/ptq_static/README.md create mode 100644 examples/onnxrt/object_detection/ssd_mobilenet_v1/quantization/ptq_static/coco_label_map.py create mode 100644 examples/onnxrt/object_detection/ssd_mobilenet_v1/quantization/ptq_static/coco_tools.py create mode 100644 examples/onnxrt/object_detection/ssd_mobilenet_v1/quantization/ptq_static/data_utils.py create mode 100644 examples/onnxrt/object_detection/ssd_mobilenet_v1/quantization/ptq_static/main.py create mode 100644 examples/onnxrt/object_detection/ssd_mobilenet_v1/quantization/ptq_static/readme.md create mode 100644 examples/onnxrt/object_detection/ssd_mobilenet_v1/quantization/ptq_static/requirements.txt rename examples/onnxrt/{nlp/onnx_model_zoo/mobilebert/quantization/ptq => object_detection/ssd_mobilenet_v1/quantization/ptq_static}/run_benchmark.sh (98%) create mode 100644 examples/onnxrt/object_detection/ssd_mobilenet_v1/quantization/ptq_static/run_tuning.sh create mode 100644 examples/onnxrt/object_detection/ssd_mobilenet_v2/quantization/ptq_static/README.md create mode 100644 examples/onnxrt/object_detection/ssd_mobilenet_v2/quantization/ptq_static/coco_label_map.py create mode 100644 examples/onnxrt/object_detection/ssd_mobilenet_v2/quantization/ptq_static/coco_tools.py create mode 100644 examples/onnxrt/object_detection/ssd_mobilenet_v2/quantization/ptq_static/data_utils.py create mode 100644 examples/onnxrt/object_detection/ssd_mobilenet_v2/quantization/ptq_static/main.py create mode 100644 examples/onnxrt/object_detection/ssd_mobilenet_v2/quantization/ptq_static/requirements.txt create mode 100644 examples/onnxrt/object_detection/ssd_mobilenet_v2/quantization/ptq_static/run_benchmark.sh create mode 100644 examples/onnxrt/object_detection/ssd_mobilenet_v2/quantization/ptq_static/run_tuning.sh diff --git a/.azure-pipelines/scripts/codeScan/pyspelling/inc_dict.txt b/.azure-pipelines/scripts/codeScan/pyspelling/inc_dict.txt index 4a363df0f09..81985a1167d 100644 --- a/.azure-pipelines/scripts/codeScan/pyspelling/inc_dict.txt +++ b/.azure-pipelines/scripts/codeScan/pyspelling/inc_dict.txt @@ -2539,4 +2539,7 @@ zalandoresearch emCgSTlJaAg matsubara yoshitomo +deepset +FAC +electra parallelizes diff --git a/examples/.config/model_params_onnxrt.json b/examples/.config/model_params_onnxrt.json index f1447a3dce3..88308521b57 100644 --- a/examples/.config/model_params_onnxrt.json +++ b/examples/.config/model_params_onnxrt.json @@ -28,67 +28,53 @@ "new_benchmark": true }, "ssd_mobilenet_v1": { - "model_src_dir": "object_detection/ssd_mobilenet_v1/quantization/ptq", + "model_src_dir": "object_detection/ssd_mobilenet_v1/quantization/ptq_static", "dataset_location": "/tf_dataset/dataset/coco_dataset/raw-data", "input_model": "/tf_dataset2/models/onnx/ssd_mobilenet_v1/ssd_mobilenet_v1_frozen.onnx", - "yaml": "ssd_mobilenet_v1.yaml", - "strategy": "basic", - "batch_size": 1, - "new_benchmark": true + "main_script": "main.py", + "batch_size": 1 }, "ssd_mobilenet_v2": { - "model_src_dir": "object_detection/ssd_mobilenet_v2/quantization/ptq", + "model_src_dir": "object_detection/ssd_mobilenet_v2/quantization/ptq_static", "dataset_location": "/tf_dataset/dataset/coco_dataset/raw-data", "input_model": "/tf_dataset2/models/onnx/ssd_mobilenet_v2/ssd_mobilenet_v2_frozen.onnx", - "yaml": "ssd_mobilenet_v2.yaml", - "strategy": "basic", - "batch_size": 1, - "new_benchmark": true + "main_script": "main.py", + "batch_size": 1 }, - "bert_base_MRPC_static": { - "model_src_dir": "nlp/bert/quantization/ptq", + "bert_base_MRPC": { + "model_src_dir": "nlp/bert/quantization/ptq_static", "dataset_location": "/tf_dataset/pytorch/glue_data/MRPC", "input_model": "/tf_dataset2/models/onnx/bert_base_MRPC/bert.onnx", - "yaml": "bert_static.yaml", - "strategy": "basic", - "batch_size": 8, - "new_benchmark": true + "main_script": "main.py", + "batch_size": 8 }, "bert_base_MRPC_dynamic": { - "model_src_dir": "nlp/bert/quantization/ptq", + "model_src_dir": "nlp/bert/quantization/ptq_dynamic", "dataset_location": "/tf_dataset/pytorch/glue_data/MRPC", "input_model": "/tf_dataset2/models/onnx/bert_base_MRPC/bert.onnx", - "yaml": "bert_dynamic.yaml", - "strategy": "basic", - "batch_size": 8, - "new_benchmark": true + "main_script": "main.py", + "batch_size": 8 }, - "distilbert_base_MRPC": { - "model_src_dir": "nlp/distilbert/quantization/ptq", + "distilbert_base_MRPC_dynamic": { + "model_src_dir": "nlp/distilbert/quantization/ptq_dynamic", "dataset_location": "/tf_dataset/pytorch/glue_data/MRPC", "input_model": "/tf_dataset2/models/onnx/distilbert_base_MRPC/distilbert-base-uncased.onnx", - "yaml": "distilbert.yaml", - "strategy": "basic", - "batch_size": 8, - "new_benchmark": true + "main_script": "main.py", + "batch_size": 8 }, - "mobilebert_MRPC": { - "model_src_dir": "nlp/mobilebert/quantization/ptq", + "mobilebert_MRPC_dynamic": { + "model_src_dir": "nlp/mobilebert/quantization/ptq_dynamic", "dataset_location": "/tf_dataset/pytorch/glue_data/MRPC", "input_model": "/tf_dataset2/models/onnx/mobilebert_MRPC/mobilebert-uncased.onnx", - "yaml": "mobilebert.yaml", - "strategy": "basic", - "batch_size": 8, - "new_benchmark": true + "main_script": "main.py", + "batch_size": 8 }, - "roberta_base_MRPC": { - "model_src_dir": "nlp/roberta/quantization/ptq", + "roberta_base_MRPC_dynamic": { + "model_src_dir": "nlp/roberta/quantization/ptq_dynamic", "dataset_location": "/tf_dataset/pytorch/glue_data/MRPC", "input_model": "/tf_dataset2/models/onnx/roberta_base_MRPC/roberta-base.onnx", - "yaml": "roberta.yaml", - "strategy": "basic", - "batch_size": 8, - "new_benchmark": true + "main_script": "main.py", + "batch_size": 8 }, "resnet50-v1-12": { "model_src_dir": "image_recognition/onnx_model_zoo/resnet50/quantization/ptq", @@ -118,22 +104,22 @@ "new_benchmark": true }, "bert_squad_model_zoo_dynamic": { - "model_src_dir": "nlp/onnx_model_zoo/bert-squad/quantization/ptq", + "model_src_dir": "nlp/onnx_model_zoo/bert-squad/quantization/ptq_dynamic", "dataset_location": "/tf_dataset2/datasets/squad", "input_model": "/tf_dataset2/models/onnx/bert_squad/bert_squad_model_zoo.onnx", "main_script": "main.py", "batch_size": 1 }, "mobilebert_squad_mlperf_dynamic": { - "model_src_dir": "nlp/onnx_model_zoo/mobilebert/quantization/ptq", + "model_src_dir": "nlp/onnx_model_zoo/mobilebert/quantization/ptq_dynamic", "dataset_location": "/tf_dataset2/datasets/squad", "input_model": "/tf_dataset2/models/onnx/mobilebert_squad/mobilebert_squad_mlperf.onnx", "main_script": "main.py", "batch_size": 1 }, "gpt2_lm_head_wikitext_model_zoo_dynamic": { - "model_src_dir": "nlp/onnx_model_zoo/gpt2/quantization/ptq", - "dataset_location": "/tf_dataset2/datasets/wikitext/wikitext-2-raw/", + "model_src_dir": "nlp/onnx_model_zoo/gpt2/quantization/ptq_dynamic", + "dataset_location": "/tf_dataset2/datasets/wikitext/wikitext-2-raw/wiki.test.raw", "input_model": "/tf_dataset2/models/onnx/gpt2/gpt2_lm_head_wikitext_model_zoo.onnx", "main_script": "gpt2.py", "batch_size": 1 @@ -227,13 +213,11 @@ "new_benchmark": true }, "ssd-12": { - "model_src_dir": "object_detection/onnx_model_zoo/ssd/quantization/ptq", + "model_src_dir": "object_detection/onnx_model_zoo/ssd/quantization/ptq_static", "dataset_location": "/tf_dataset/dataset/coco_dataset/raw-data", "input_model": "/tf_dataset2/models/onnx/ssd/ssd-12.onnx", - "yaml": "ssd.yaml", - "strategy": "basic", - "batch_size": 1, - "new_benchmark": true + "main_script": "main.py", + "batch_size": 1 }, "fcn": { "model_src_dir": "image_recognition/onnx_model_zoo/fcn/quantization/ptq", @@ -243,31 +227,25 @@ "batch_size": 1 }, "ssd_mobilenet_v1-2": { - "model_src_dir": "object_detection/onnx_model_zoo/ssd_mobilenet_v1/quantization/ptq", + "model_src_dir": "object_detection/onnx_model_zoo/ssd_mobilenet_v1/quantization/ptq_static", "dataset_location": "/tf_dataset/dataset/coco_dataset/raw-data", "input_model": "/tf_dataset2/models/onnx/ssdmb/ssd_mobilenet_v1_12.onnx", - "yaml": "ssd_mobilenet_v1.yaml", - "strategy": "basic", - "batch_size": 1, - "new_benchmark": true + "main_script": "main.py", + "batch_size": 1 }, "faster_rcnn": { - "model_src_dir": "object_detection/onnx_model_zoo/faster_rcnn/quantization/ptq", + "model_src_dir": "object_detection/onnx_model_zoo/faster_rcnn/quantization/ptq_static", "dataset_location": "/tf_dataset2/datasets/coco2017/coco/", "input_model": "/tf_dataset2/models/onnx/faster_rcnn/FasterRCNN-12.onnx", - "yaml": "faster_rcnn.yaml", - "strategy": "basic", - "batch_size": 1, - "new_benchmark": true + "main_script": "main.py", + "batch_size": 1 }, "mask_rcnn": { - "model_src_dir": "object_detection/onnx_model_zoo/mask_rcnn/quantization/ptq", + "model_src_dir": "object_detection/onnx_model_zoo/mask_rcnn/quantization/ptq_static", "dataset_location": "/tf_dataset2/datasets/coco2017/coco/", "input_model": "/tf_dataset2/models/onnx/mask_rcnn/MaskRCNN-12.onnx", - "yaml": "mask_rcnn.yaml", - "strategy": "basic", - "batch_size": 1, - "new_benchmark": true + "main_script": "main.py", + "batch_size": 1 }, "densenet": { "model_src_dir": "image_recognition/onnx_model_zoo/densenet/quantization/ptq", @@ -288,13 +266,11 @@ "new_benchmark": true }, "yolov3": { - "model_src_dir": "object_detection/onnx_model_zoo/yolov3/quantization/ptq", + "model_src_dir": "object_detection/onnx_model_zoo/yolov3/quantization/ptq_static", "dataset_location": "/tf_dataset2/datasets/coco2017/coco/", "input_model": "/tf_dataset2/models/onnx/yolov3/yolov3-12.onnx", - "yaml": "yolov3.yaml", - "strategy": "basic", - "batch_size": 1, - "new_benchmark": true + "main_script": "main.py", + "batch_size": 1 }, "resnet50_v1_5_qdq": { "model_src_dir": "image_recognition/resnet50/quantization/ptq", @@ -324,58 +300,46 @@ "new_benchmark": true }, "ssd_mobilenet_v1_qdq": { - "model_src_dir": "object_detection/ssd_mobilenet_v1/quantization/ptq", + "model_src_dir": "object_detection/ssd_mobilenet_v1/quantization/ptq_static", "dataset_location": "/tf_dataset/dataset/coco_dataset/raw-data", "input_model": "/tf_dataset2/models/onnx/ssd_mobilenet_v1/ssd_mobilenet_v1_frozen-13.onnx", - "yaml": "ssd_mobilenet_v1_qdq.yaml", - "strategy": "basic", - "batch_size": 1, - "new_benchmark": true + "main_script": "main.py", + "batch_size": 1 }, "ssd_mobilenet_v2_qdq": { - "model_src_dir": "object_detection/ssd_mobilenet_v2/quantization/ptq", + "model_src_dir": "object_detection/ssd_mobilenet_v2/quantization/ptq_static", "dataset_location": "/tf_dataset/dataset/coco_dataset/raw-data", "input_model": "/tf_dataset2/models/onnx/ssd_mobilenet_v2/ssd_mobilenet_v2_frozen-13.onnx", - "yaml": "ssd_mobilenet_v2_qdq.yaml", - "strategy": "basic", - "batch_size": 1, - "new_benchmark": true + "main_script": "main.py", + "batch_size": 1 }, - "bert_base_MRPC_static_qdq": { - "model_src_dir": "nlp/bert/quantization/ptq", + "bert_base_MRPC_qdq": { + "model_src_dir": "nlp/bert/quantization/ptq_static", "dataset_location": "/tf_dataset/pytorch/glue_data/MRPC", "input_model": "/tf_dataset2/models/onnx/bert_base_MRPC/bert.onnx", - "yaml": "bert_qdq.yaml", - "strategy": "basic", - "batch_size": 8, - "new_benchmark": true + "main_script": "main.py", + "batch_size": 8 }, "distilbert_base_MRPC_qdq": { - "model_src_dir": "nlp/distilbert/quantization/ptq", + "model_src_dir": "nlp/distilbert/quantization/ptq_static", "dataset_location": "/tf_dataset/pytorch/glue_data/MRPC", "input_model": "/tf_dataset2/models/onnx/distilbert_base_MRPC/distilbert-base-uncased.onnx", - "yaml": "distilbert_qdq.yaml", - "strategy": "basic", - "batch_size": 8, - "new_benchmark": true + "main_script": "main.py", + "batch_size": 8 }, "mobilebert_MRPC_qdq": { - "model_src_dir": "nlp/mobilebert/quantization/ptq", + "model_src_dir": "nlp/mobilebert/quantization/ptq_static", "dataset_location": "/tf_dataset/pytorch/glue_data/MRPC", "input_model": "/tf_dataset2/models/onnx/mobilebert_MRPC/mobilebert-uncased.onnx", - "yaml": "mobilebert_qdq.yaml", - "strategy": "basic", - "batch_size": 8, - "new_benchmark": true + "main_script": "main.py", + "batch_size": 8 }, "roberta_base_MRPC_qdq": { - "model_src_dir": "nlp/roberta/quantization/ptq", + "model_src_dir": "nlp/roberta/quantization/ptq_static", "dataset_location": "/tf_dataset/pytorch/glue_data/MRPC", "input_model": "/tf_dataset2/models/onnx/roberta_base_MRPC/roberta-base.onnx", - "yaml": "roberta_qdq.yaml", - "strategy": "basic", - "batch_size": 8, - "new_benchmark": true + "main_script": "main.py", + "batch_size": 8 }, "resnet50-v1-12_qdq": { "model_src_dir": "image_recognition/onnx_model_zoo/resnet50/quantization/ptq", @@ -405,7 +369,7 @@ "new_benchmark": true }, "mobilebert_squad_mlperf_qdq": { - "model_src_dir": "nlp/onnx_model_zoo/mobilebert/quantization/ptq", + "model_src_dir": "nlp/onnx_model_zoo/mobilebert/quantization/ptq_static", "dataset_location": "/tf_dataset2/datasets/squad", "input_model": "/tf_dataset2/models/onnx/mobilebert_squad/mobilebert_squad_mlperf-13.onnx", "main_script": "main.py", @@ -500,13 +464,11 @@ "new_benchmark": true }, "ssd-12_qdq": { - "model_src_dir": "object_detection/onnx_model_zoo/ssd/quantization/ptq", + "model_src_dir": "object_detection/onnx_model_zoo/ssd/quantization/ptq_static", "dataset_location": "/tf_dataset/dataset/coco_dataset/raw-data", "input_model": "/tf_dataset2/models/onnx/ssd/ssd-12.onnx", - "yaml": "ssd_qdq.yaml", - "strategy": "basic", - "batch_size": 1, - "new_benchmark": true + "main_script": "main.py", + "batch_size": 1 }, "fcn_qdq": { "model_src_dir": "image_recognition/onnx_model_zoo/fcn/quantization/ptq", @@ -516,52 +478,42 @@ "batch_size": 1 }, "ssd_mobilenet_v1-2_qdq": { - "model_src_dir": "object_detection/onnx_model_zoo/ssd_mobilenet_v1/quantization/ptq", + "model_src_dir": "object_detection/onnx_model_zoo/ssd_mobilenet_v1/quantization/ptq_static", "dataset_location": "/tf_dataset/dataset/coco_dataset/raw-data", "input_model": "/tf_dataset2/models/onnx/ssdmb/ssd_mobilenet_v1_13.onnx", - "yaml": "ssd_mobilenet_v1_qdq.yaml", - "strategy": "basic", - "batch_size": 1, - "new_benchmark": true + "main_script": "main.py", + "batch_size": 1 }, "faster_rcnn_qdq": { - "model_src_dir": "object_detection/onnx_model_zoo/faster_rcnn/quantization/ptq", + "model_src_dir": "object_detection/onnx_model_zoo/faster_rcnn/quantization/ptq_static", "dataset_location": "/tf_dataset2/datasets/coco2017/coco/", "input_model": "/tf_dataset2/models/onnx/faster_rcnn/FasterRCNN-12.onnx", - "yaml": "faster_rcnn_qdq.yaml", - "strategy": "basic", - "batch_size": 1, - "new_benchmark": true + "main_script": "main.py", + "batch_size": 1 }, "mask_rcnn_qdq": { - "model_src_dir": "object_detection/onnx_model_zoo/mask_rcnn/quantization/ptq", + "model_src_dir": "object_detection/onnx_model_zoo/mask_rcnn/quantization/ptq_static", "dataset_location": "/tf_dataset2/datasets/coco2017/coco/", "input_model": "/tf_dataset2/models/onnx/mask_rcnn/MaskRCNN-12.onnx", - "yaml": "mask_rcnn_qdq.yaml", - "strategy": "basic", - "batch_size": 1, - "new_benchmark": true + "main_script": "main.py", + "batch_size": 1 }, "yolov4": { - "model_src_dir": "object_detection/onnx_model_zoo/yolov4/quantization/ptq", + "model_src_dir": "object_detection/onnx_model_zoo/yolov4/quantization/ptq_static", "dataset_location": "/tf_dataset2/datasets/coco2017/coco/", "input_model": "/tf_dataset2/models/onnx/yolov4/yolov4-12.onnx", - "yaml": "yolov4.yaml", - "strategy": "basic", - "batch_size": 1, - "new_benchmark": true + "main_script": "main.py", + "batch_size": 1 }, "tiny_yolov3": { - "model_src_dir": "object_detection/onnx_model_zoo/tiny_yolov3/quantization/ptq", + "model_src_dir": "object_detection/onnx_model_zoo/tiny_yolov3/quantization/ptq_static", "dataset_location": "/tf_dataset2/datasets/coco2017/coco/", "input_model": "/tf_dataset2/models/onnx/tiny_yolov3/tiny-yolov3-12.onnx", - "yaml": "tiny_yolov3.yaml", - "strategy": "basic", - "batch_size": 1, - "new_benchmark": true + "main_script": "main.py", + "batch_size": 1 }, "duc": { - "model_src_dir": "object_detection/onnx_model_zoo/DUC/quantization/ptq", + "model_src_dir": "object_detection/onnx_model_zoo/DUC/quantization/ptq_static", "dataset_location": "/tf_dataset2/datasets/leftImg8bit/val", "input_model": "/tf_dataset2/models/onnx/DUC/ResNet101-DUC-12.onnx", "main_script": "main.py", @@ -598,7 +550,7 @@ "new_benchmark": true }, "BiDAF_dynamic": { - "model_src_dir": "nlp/onnx_model_zoo/BiDAF/quantization/ptq", + "model_src_dir": "nlp/onnx_model_zoo/BiDAF/quantization/ptq_dynamic", "dataset_location": "/tf_dataset2/datasets/squad/dev-v1.1.json", "input_model": "/tf_dataset2/models/onnx/BiDAF/bidaf-11.onnx", "main_script": "main.py", @@ -639,13 +591,6 @@ "main_script": "main.py", "batch_size": 8 }, - "hf_xlm-roberta-base": { - "model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq_static", - "dataset_location": "/tf_dataset/pytorch/glue_data/MRPC", - "input_model": "/tf_dataset2/models/onnx/hf_xlm-roberta-base_dynamic/xlm-roberta-base-mrpc.onnx", - "main_script": "main.py", - "batch_size": 8 - }, "hf_camembert-base_dynamic": { "model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq_dynamic", "dataset_location": "/tf_dataset/pytorch/glue_data/MRPC", @@ -653,13 +598,6 @@ "main_script": "main.py", "batch_size": 8 }, - "hf_camembert-base": { - "model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq_static", - "dataset_location": "/tf_dataset/pytorch/glue_data/MRPC", - "input_model": "/tf_dataset2/models/onnx/hf_camembert-base_dynamic/camembert-base-mrpc.onnx", - "main_script": "main.py", - "batch_size": 8 - }, "hf_MiniLM-L12-H384-uncased_dynamic": { "model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq_dynamic", "dataset_location": "/tf_dataset/pytorch/glue_data/MRPC", @@ -695,13 +633,6 @@ "main_script": "main.py", "batch_size": 8 }, - "hf_albert-base-v2": { - "model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq_static", - "dataset_location": "/tf_dataset/pytorch/glue_data/SST-2/", - "input_model": "/tf_dataset2/models/onnx/hf_albert-base-v2_dynamic/albert-base-v2-sst2.onnx", - "main_script": "main.py", - "batch_size": 8 - }, "hf_MiniLM-L6-H384-uncased_dynamic": { "model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq_dynamic", "dataset_location": "/tf_dataset/pytorch/glue_data/SST-2/", @@ -743,6 +674,97 @@ "input_model": "/tf_dataset2/models/onnx/hf_bert-base-multilingual-cased_dynamic/bert-base-multilingual-cased-finetuned-squad.onnx", "main_script": "main.py", "batch_size": 1 + }, + "hf_bert-base-cased_dynamic": { + "model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq_dynamic", + "dataset_location": "/tf_dataset/pytorch/glue_data/MRPC", + "input_model": "/tf_dataset2/models/onnx/hf_bert-base-cased_dynamic/bert-base-cased-finetuned-mrpc.onnx", + "main_script": "main.py", + "batch_size": 8 + }, + "hf_bert-base-cased": { + "model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq_static", + "dataset_location": "/tf_dataset/pytorch/glue_data/MRPC", + "input_model": "/tf_dataset2/models/onnx/hf_bert-base-cased_static/bert-base-cased-finetuned-mrpc.onnx", + "main_script": "main.py", + "batch_size": 8 + }, + "hf_electra-small-discriminator_dynamic": { + "model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq_dynamic", + "dataset_location": "/tf_dataset/pytorch/glue_data/MRPC", + "input_model": "/tf_dataset2/models/onnx/hf_electra-small-discriminator_dynamic/electra-small-discriminator-mrpc.onnx", + "main_script": "main.py", + "batch_size": 8 + }, + "hf_electra-small-discriminator": { + "model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq_static", + "dataset_location": "/tf_dataset/pytorch/glue_data/MRPC", + "input_model": "/tf_dataset2/models/onnx/hf_electra-small-discriminator_dynamic/electra-small-discriminator-mrpc.onnx", + "main_script": "main.py", + "batch_size": 8 + }, + "hf_bert-mini_dynamic": { + "model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq_dynamic", + "dataset_location": "/tf_dataset/pytorch/glue_data/MRPC", + "input_model": "/tf_dataset2/models/onnx/hf_bert-mini_dynamic/bert-mini-finetuned-mrpc.onnx", + "main_script": "main.py", + "batch_size": 8 + }, + "hf_bert-mini": { + "model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq_static", + "dataset_location": "/tf_dataset/pytorch/glue_data/MRPC", + "input_model": "/tf_dataset2/models/onnx/hf_bert-mini_dynamic/bert-mini-finetuned-mrpc.onnx", + "main_script": "main.py", + "batch_size": 8 + }, + "hf_xlnet-base-cased_dynamic": { + "model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq_dynamic", + "dataset_location": "/tf_dataset/pytorch/glue_data/MRPC", + "input_model": "/tf_dataset2/models/onnx/hf_xlnet-base-cased_dynamic/xlnet-base-cased-mrpc.onnx", + "main_script": "main.py", + "batch_size": 8 + }, + "hf_xlnet-base-cased": { + "model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq_static", + "dataset_location": "/tf_dataset/pytorch/glue_data/MRPC", + "input_model": "/tf_dataset2/models/onnx/hf_xlnet-base-cased_dynamic/xlnet-base-cased-mrpc.onnx", + "main_script": "main.py", + "batch_size": 8 + }, + "hf_bart-large_dynamic": { + "model_src_dir": "nlp/huggingface_model/text_classification/quantization/ptq_dynamic", + "dataset_location": "/tf_dataset/pytorch/glue_data/MRPC", + "input_model": "/tf_dataset2/models/onnx/hf_bart-large_dynamic/bart-large-mrpc-hf.onnx", + "main_script": "main.py", + "batch_size": 8 + }, + "hf_distilbert-base-uncased-distilled_dynamic": { + "model_src_dir": "nlp/huggingface_model/question_answering/quantization/ptq_dynamic", + "dataset_location": "/tf_dataset2/datasets/squad", + "input_model": "/tf_dataset2/models/onnx/hf_distilbert-squad_dynamic/distilbert-base-uncased-distilled-squad.onnx", + "main_script": "main.py", + "batch_size": 1 + }, + "hf_bert-large-uncased_dynamic": { + "model_src_dir": "nlp/huggingface_model/question_answering/quantization/ptq_dynamic", + "dataset_location": "/tf_dataset2/datasets/squad", + "input_model": "/tf_dataset2/models/onnx/hf_bert-large_dynamic/bert-large-uncased-whole-word-masking-finetuned-squad.onnx", + "main_script": "main.py", + "batch_size": 1 + }, + "hf_bert-large-uncased": { + "model_src_dir": "nlp/huggingface_model/question_answering/quantization/ptq_static", + "dataset_location": "/tf_dataset2/datasets/squad", + "input_model": "/tf_dataset2/models/onnx/hf_bert-large_static/bert-large-uncased-whole-word-masking-finetuned-squad.onnx", + "main_script": "main.py", + "batch_size": 1 + }, + "hf_roberta-large_dynamic": { + "model_src_dir": "nlp/huggingface_model/question_answering/quantization/ptq_dynamic", + "dataset_location": "/tf_dataset2/datasets/squad", + "input_model": "/tf_dataset2/models/onnx/hf_roberta-large_dynamic/roberta-large-squad2.onnx", + "main_script": "main.py", + "batch_size": 1 } } } diff --git a/examples/README.md b/examples/README.md index 2ffa245e0e9..73b84ffa600 100644 --- a/examples/README.md +++ b/examples/README.md @@ -953,58 +953,58 @@ Intel® Neural Compressor validated examples with multiple compression technique qlinearops - *BERT base MRPC + BERT base MRPC Natural Language Processing Post-Training Static Quantization - integerops / qdq + integerops / qdq - *BERT base MRPC + BERT base MRPC Natural Language Processing Post-Training Dynamic Quantization - integerops + integerops - *DistilBERT base MRPC + DistilBERT base MRPC Natural Language Processing Post-Training Dynamic / Static Quantization - integerops / qdq + integerops / qdq - *Mobile bert MRPC + Mobile bert MRPC Natural Language Processing Post-Training Dynamic / Static Quantization - integerops / qdq + integerops / qdq - *Roberta base MRPC + Roberta base MRPC Natural Language Processing Post-Training Dynamic / Static Quantization - integerops / qdq + integerops / qdq BERT SQuAD Natural Language Processing Post-Training Dynamic / Static Quantization - integerops / qdq + integerops GPT2 lm head WikiText Natural Language Processing Post-Training Dynamic Quantization - integerops + integerops MobileBERT SQuAD MLPerf Natural Language Processing Post-Training Dynamic / Static Quantization - integerops / qdq + integerops / qdq BiDAF Natural Language Processing Post-Training Dynamic Quantization - integerops + integerops BERT base uncased MRPC (HuggingFace) @@ -1025,17 +1025,17 @@ Intel® Neural Compressor validated examples with multiple compression technique XLM Roberta base MRPC (HuggingFace) Natural Language Processing - Post-Training Dynamic / Static Quantization + Post-Training Dynamic Quantization - integerops / qlinearops + integerops Camembert base MRPC (HuggingFace) Natural Language Processing - Post-Training Dynamic / Static Quantization + Post-Training Dynamic Quantization - integerops / qlinearops + integerops @@ -1057,19 +1057,59 @@ Intel® Neural Compressor validated examples with multiple compression technique Albert base v2 SST-2 (HuggingFace) Natural Language Processing + Post-Training Dynamic Quantization + + integerops + + + + MiniLM L6 H384 uncased SST-2 (HuggingFace) + Natural Language Processing Post-Training Dynamic / Static Quantization integerops / qlinearops - MiniLM L6 H384 uncased SST-2 (HuggingFace) + BERT base cased MRPC (HuggingFace) + Natural Language Processing + Post-Training Dynamic / Static Quantization + + integerops / qlinearops + + + + Electra small discriminator MRPC (HuggingFace) + Natural Language Processing + Post-Training Dynamic / Static Quantization + + integerops / qlinearops + + + + BERT mini MRPC (HuggingFace) Natural Language Processing Post-Training Dynamic / Static Quantization integerops / qlinearops + + Xlnet base cased MRPC (HuggingFace) + Natural Language Processing + Post-Training Dynamic / Static Quantization + + integerops / qlinearops + + + + BART large MRPC (HuggingFace) + Natural Language Processing + Post-Training Dynamic Quantization + + integerops + + Spanbert SQuAD (HuggingFace) Natural Language Processing @@ -1083,64 +1123,82 @@ Intel® Neural Compressor validated examples with multiple compression technique integerops / qlinearops - *SSD MobileNet V1 + DistilBert base uncased SQuAD (HuggingFace) + Natural Language Processing + Post-Training Dynamic Quantization + integerops + + + BERT large uncased whole word masking SQuAD (HuggingFace) + Natural Language Processing + Post-Training Dynamic / Static Quantization + integerops / qlinearops + + + Roberta large SQuAD v2 (HuggingFace) + Natural Language Processing + Post-Training Dynamic Quantization + integerops + + + SSD MobileNet V1 Object Detection Post-Training Static Quantization - qlinearops / qdq + qlinearops / qdq - *SSD MobileNet V2 + SSD MobileNet V2 Object Detection Post-Training Static Quantization - qlinearops / qdq + qlinearops / qdq - *SSD MobileNet V1 (ONNX Model Zoo) + SSD MobileNet V1 (ONNX Model Zoo) Object Detection Post-Training Static Quantization - qlinearops / qdq + qlinearops / qdq DUC Object Detection Post-Training Static Quantization - qlinearops + qlinearops - *Faster R-CNN + Faster R-CNN Object Detection Post-Training Static Quantization - qlinearops / qdq + qlinearops / qdq - *Mask R-CNN + Mask R-CNN Object Detection Post-Training Static Quantization - qlinearops / qdq + qlinearops / qdq - *SSD + SSD Object Detection Post-Training Static Quantization - qlinearops / qdq + qlinearops / qdq - *Tiny YOLOv3 + Tiny YOLOv3 Object Detection Post-Training Static Quantization - qlinearops + qlinearops - *YOLOv3 + YOLOv3 Object Detection Post-Training Static Quantization - qlinearops + qlinearops - *YOLOv4 + YOLOv4 Object Detection Post-Training Static Quantization - qlinearops + qlinearops Emotion FERPlus diff --git a/examples/onnxrt/nlp/bert/quantization/ptq_dynamic/README.md b/examples/onnxrt/nlp/bert/quantization/ptq_dynamic/README.md new file mode 100644 index 00000000000..157990e9996 --- /dev/null +++ b/examples/onnxrt/nlp/bert/quantization/ptq_dynamic/README.md @@ -0,0 +1,59 @@ +Step-by-Step +============ + +This example load a BERT model and confirm its accuracy and speed based on [GLUE data](https://gluebenchmark.com/). + +# Prerequisite + +## 1. Environment + +```shell +pip install neural-compressor +pip install -r requirements.txt +``` +> Note: Validated ONNX Runtime [Version](/docs/source/installation_guide.md#validated-software-environment). + +## 2. Prepare Dataset + +download the GLUE data with `prepare_data.sh` script. +```shell +export GLUE_DIR=path/to/glue_data +export TASK_NAME=MRPC + +bash prepare_data.sh --data_dir=$GLUE_DIR --task_name=$TASK_NAME +``` + +## 3. Prepare Model + +Please refer to [Bert-GLUE_OnnxRuntime_quantization guide](https://github.com/microsoft/onnxruntime/blob/master/onnxruntime/python/tools/quantization/notebooks/Bert-GLUE_OnnxRuntime_quantization.ipynb) for detailed model export. + +Run the `prepare_model.sh` script + + +Usage: +```shell +bash prepare_model.sh --input_dir=./MRPC \ + --task_name=$TASK_NAME \ + --output_model=path/to/model # model path as *.onnx +``` + +# Run + +## 1. Quantization + +Dynamic quantization: + +```bash +bash run_tuning.sh --input_model=path/to/model \ # model path as *.onnx + --output_model=path/to/model_tune \ # model path as *.onnx + --dataset_location=path/to/glue_data +``` + +## 2. Benchmark + +```bash +bash run_benchmark.sh --input_model=path/to/model \ # model path as *.onnx + --dataset_location=path/to/glue_data \ + --batch_size=batch_size \ + --mode=performance # or accuracy +``` diff --git a/examples/onnxrt/nlp/bert/quantization/ptq_dynamic/export.py b/examples/onnxrt/nlp/bert/quantization/ptq_dynamic/export.py new file mode 100644 index 00000000000..8211442bf0d --- /dev/null +++ b/examples/onnxrt/nlp/bert/quantization/ptq_dynamic/export.py @@ -0,0 +1,64 @@ +import argparse + +import torch +from transformers import BertForSequenceClassification + +def export_onnx_model(args, model, onnx_model_path): + with torch.no_grad(): + inputs = {'input_ids': torch.ones(1,args.max_len, dtype=torch.int64), + 'attention_mask': torch.ones(1,args.max_len, dtype=torch.int64), + 'token_type_ids': torch.ones(1,args.max_len, dtype=torch.int64)} + outputs = model(**inputs) + + symbolic_names = {0: 'batch_size', 1: 'max_seq_len'} + torch.onnx.export(model, # model being run + (inputs['input_ids'], + inputs['attention_mask'], + inputs['token_type_ids']), # model input (or a tuple for + # multiple inputs) + onnx_model_path, # where to save the model (can be a file + # or file-like object) + opset_version=11, # the ONNX version to export the model + do_constant_folding=True, # whether to execute constant folding + input_names=['input_ids', # the model's input names + 'input_mask', + 'segment_ids'], + output_names=['output'], # the model's output names + dynamic_axes={'input_ids': symbolic_names, # variable length axes + 'input_mask' : symbolic_names, + 'segment_ids' : symbolic_names}) + print("ONNX Model exported to {0}".format(onnx_model_path)) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description='Export bert onnx model', + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument( + '--input_dir', + type=str, + help='input_dir of bert model, must contain config.json') + parser.add_argument( + '--task_name', + type=str, + choices=["MRPC", "MNLI"], + help='tasks names of bert model') + parser.add_argument( + '--max_len', + type=int, + default=128, + help='Maximum length of the sentence pairs') + parser.add_argument( + '--do_lower_case', + type=bool, + default=True, + help='whether lower the tokenizer') + parser.add_argument( + '--output_model', + type=str, + default='bert.onnx', + help='path to exported model file') + args = parser.parse_args() + + model = BertForSequenceClassification.from_pretrained(args.input_dir) + export_onnx_model(args, model, args.output_model) diff --git a/examples/onnxrt/nlp/bert/quantization/ptq_dynamic/main.py b/examples/onnxrt/nlp/bert/quantization/ptq_dynamic/main.py new file mode 100644 index 00000000000..12cb3fcf1f4 --- /dev/null +++ b/examples/onnxrt/nlp/bert/quantization/ptq_dynamic/main.py @@ -0,0 +1,411 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# pylint:disable=redefined-outer-name,logging-format-interpolation + +import logging +import argparse +import os +import onnx +import onnxruntime +import transformers +import torch +import numpy as np +from dataclasses import dataclass +from typing import List, Optional, Union +from neural_compressor.data.dataloaders.onnxrt_dataloader import DefaultDataLoader + +logger = logging.getLogger(__name__) +logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s - %(message)s', + datefmt = '%m/%d/%Y %H:%M:%S', + level = logging.WARN) + +class ONNXRTBertDataset: + """Dataset used for model Bert. + Args: data_dir (str): The input data dir. + model_name_or_path (str): Path to pre-trained student model or shortcut name, + selected in the list: + max_seq_length (int, default=128): The maximum length after tokenization. + Sequences longer than this will be truncated, + sequences shorter will be padded. + do_lower_case (bool, default=True): Whether to lowercase the input when tokenizing. + task (str, default=mrpc): The name of the task to fine-tune. + Choices include mrpc, qqp, qnli, rte, + sts-b, cola, mnli, wnli. + model_type (str, default='bert'): model type, support 'distilbert', 'bert', + 'mobilebert', 'roberta'. + dynamic_length (bool, default=False): Whether to use fixed sequence length. + evaluate (bool, default=True): Whether do evaluation or training. + transform (transform object, default=None): transform to process input data. + filter (Filter objects, default=None): filter out examples according + to specific conditions. + """ + def __init__(self, model, data_dir, model_name_or_path, max_seq_length=128,\ + do_lower_case=True, task='mrpc', model_type='bert', dynamic_length=False,\ + evaluate=True, transform=None, filter=None): + self.inputs = [inp.name for inp in onnx.load(model).graph.input] + task = task.lower() + model_type = model_type.lower() + assert task in ['mrpc', 'qqp', 'qnli', 'rte', 'sts-b', 'cola', \ + 'mnli', 'wnli', 'sst-2'], 'Unsupported task type' + assert model_type in ['distilbert', 'bert', 'mobilebert', 'roberta'], 'Unsupported \ + model type' + self.dynamic_length = dynamic_length + self.model_type = model_type + self.max_seq_length = max_seq_length + tokenizer = transformers.AutoTokenizer.from_pretrained(model_name_or_path, + do_lower_case=do_lower_case) + self.dataset = load_and_cache_examples(data_dir, model_name_or_path, \ + max_seq_length, task, model_type, tokenizer, evaluate) + + def __len__(self): + return len(self.dataset) + + def __getitem__(self, index): + batch = tuple(t.detach().cpu().numpy() if not isinstance(t, np.ndarray) else t for t in self.dataset[index]) + return batch[:len(self.inputs)], batch[-1] + +def load_and_cache_examples(data_dir, model_name_or_path, max_seq_length, task, \ + model_type, tokenizer, evaluate): + from torch.utils.data import TensorDataset + + processor = transformers.glue_processors[task]() + output_mode = transformers.glue_output_modes[task] + # Load data features from cache or dataset file + if not os.path.exists("./dataset_cached"): + os.makedirs("./dataset_cached") + cached_features_file = os.path.join("./dataset_cached", 'cached_{}_{}_{}_{}'.format( + 'dev' if evaluate else 'train', + list(filter(None, model_name_or_path.split('/'))).pop(), + str(max_seq_length), + str(task))) + if os.path.exists(cached_features_file): + logger.info("Load features from cached file {}.".format(cached_features_file)) + features = torch.load(cached_features_file) + else: + logger.info("Create features from dataset file at {}.".format(data_dir)) + label_list = processor.get_labels() + examples = processor.get_dev_examples(data_dir) if evaluate else \ + processor.get_train_examples(data_dir) + features = convert_examples_to_features(examples, + tokenizer, + task=task, + label_list=label_list, + max_length=max_seq_length, + output_mode=output_mode, + ) + logger.info("Save features into cached file {}.".format(cached_features_file)) + torch.save(features, cached_features_file) + # Convert to Tensors and build dataset + all_input_ids = torch.tensor([f.input_ids for f in features], dtype=torch.long) + all_attention_mask = torch.tensor([f.attention_mask for f in features], dtype=torch.long) + all_token_type_ids = torch.tensor([f.token_type_ids for f in features], dtype=torch.long) + all_seq_lengths = torch.tensor([f.seq_length for f in features], dtype=torch.long) + if output_mode == "classification": + all_labels = torch.tensor([f.label for f in features], dtype=torch.long) + elif output_mode == "regression": + all_labels = torch.tensor([f.label for f in features], dtype=torch.float) + dataset = TensorDataset(all_input_ids, all_attention_mask, all_token_type_ids, \ + all_seq_lengths, all_labels) + return dataset + +def convert_examples_to_features( + examples, + tokenizer, + max_length=128, + task=None, + label_list=None, + output_mode="classification", + pad_token=0, + pad_token_segment_id=0, + mask_padding_with_zero=True, +): + processor = transformers.glue_processors[task]() + if label_list is None: + label_list = processor.get_labels() + logger.info("Use label list {} for task {}.".format(label_list, task)) + label_map = {label: i for i, label in enumerate(label_list)} + features = [] + for (ex_index, example) in enumerate(examples): + inputs = tokenizer.encode_plus( + example.text_a, + example.text_b, + add_special_tokens=True, + max_length=max_length, + return_token_type_ids=True, + truncation=True, + ) + input_ids, token_type_ids = inputs["input_ids"], inputs["token_type_ids"] + # The mask has 1 for real tokens and 0 for padding tokens. Only real + # tokens are attended to. + attention_mask = [1 if mask_padding_with_zero else 0] * len(input_ids) + + # Zero-pad up to the sequence length. + seq_length = len(input_ids) + padding_length = max_length - len(input_ids) + + input_ids = input_ids + ([pad_token] * padding_length) + attention_mask = attention_mask + \ + ([0 if mask_padding_with_zero else 1] * padding_length) + token_type_ids = token_type_ids + ([pad_token_segment_id] * padding_length) + + assert len(input_ids) == max_length, \ + "Error with input_ids length {} vs {}".format( + len(input_ids), max_length) + assert len(attention_mask) == max_length, \ + "Error with attention_mask length {} vs {}".format( + len(attention_mask), max_length + ) + assert len(token_type_ids) == max_length, \ + "Error with token_type_ids length {} vs {}".format( + len(token_type_ids), max_length + ) + if output_mode == "classification": + label = label_map[example.label] + elif output_mode == "regression": + label = float(example.label) + else: + raise KeyError(output_mode) + + feats = InputFeatures( + input_ids=input_ids, + attention_mask=attention_mask, + token_type_ids=token_type_ids, + label=label, + seq_length=seq_length, + ) + features.append(feats) + return features + +@dataclass(frozen=True) +class InputFeatures: + """ + A single set of features of data. + Property names are the same names as the corresponding inputs to a model. + Args: + input_ids: Indices of input sequence tokens in the vocabulary. + attention_mask: Mask to avoid performing attention on padding token indices. + Mask values selected in ``[0, 1]``: Usually ``1`` for tokens that are NOT MASKED, + ``0`` for MASKED (padded) tokens. + token_type_ids: (Optional) Segment token indices to indicate first and second + portions of the inputs. Only some models use them. + label: (Optional) Label corresponding to the input. Int for classification problems, + float for regression problems. + seq_length: (Optional) The length of input sequence before padding. + """ + + input_ids: List[int] + attention_mask: Optional[List[int]] = None + token_type_ids: Optional[List[int]] = None + label: Optional[Union[int, float]] = None + seq_length: Optional[List[int]] = None + +class ONNXRTGLUE: + """Computes GLUE score. + + Args: + task (str, default=mrpc): The name of the task. + Choices include mrpc, qqp, qnli, rte, + sts-b, cola, mnli, wnli. + + """ + def __init__(self, task='mrpc'): + assert task in ['mrpc', 'qqp', 'qnli', 'rte', 'sts-b', 'cola', \ + 'mnli', 'wnli', 'sst-2'], 'Unsupported task type' + self.pred_list = None + self.label_list = None + self.task = task + self.return_key = { + "cola": "mcc", + "mrpc": "acc", + "sts-b": "corr", + "qqp": "acc", + "mnli": "mnli/acc", + "qnli": "acc", + "rte": "acc", + "wnli": "acc", + "sst-2": "acc" + } + + def update(self, preds, labels): + """add preds and labels to storage""" + if isinstance(preds, list) and len(preds) == 1: + preds = preds[0] + if isinstance(labels, list) and len(labels) == 1: + labels = labels[0] + if self.pred_list is None: + self.pred_list = preds + self.label_list = labels + else: + self.pred_list = np.append(self.pred_list, preds, axis=0) + self.label_list = np.append(self.label_list, labels, axis=0) + + def reset(self): + """clear preds and labels storage""" + self.pred_list = None + self.label_list = None + + def result(self): + """calculate metric""" + output_mode = transformers.glue_output_modes[self.task] + + if output_mode == "classification": + processed_preds = np.argmax(self.pred_list, axis=1) + elif output_mode == "regression": + processed_preds = np.squeeze(self.pred_list) + result = transformers.glue_compute_metrics(\ + self.task, processed_preds, self.label_list) + return result[self.return_key[self.task]] + +if __name__ == "__main__": + logger.info('Evaluating ONNXRuntime full precision accuracy and performance:') + parser = argparse.ArgumentParser( + description='BERT fine-tune examples for classification/regression tasks.', + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument( + '--model_path', + type=str, + help="Pre-trained resnet50 model on onnx file" + ) + parser.add_argument( + '--benchmark', + action='store_true', \ + default=False + ) + parser.add_argument( + '--tune', + action='store_true', \ + default=False, + help="whether quantize the model" + ) + parser.add_argument( + '--output_model', + type=str, + help="output model path" + ) + parser.add_argument( + '--mode', + type=str, + help="benchmark mode of performance or accuracy" + ) + parser.add_argument( + '--model_name_or_path', + type=str, + help="pretrained model name or path" + ) + parser.add_argument( + '--data_path', + type=str, + help="input data path" + ) + parser.add_argument( + '--batch_size', + default=8, + type=int, + ) + parser.add_argument( + '--task', + type=str, + default='mrpc', + choices=['mrpc', 'qqp', 'qnli', 'rte', 'sts-b', 'cola', \ + 'mnli', 'wnli', 'sst-2'], + help="GLUE task name" + ) + parser.add_argument( + "--dynamic_length", + type=bool, + default=False, + help="dynamic length" + ) + parser.add_argument( + "--max_seq_length", + type=int, + default=128, + help="max sequence length" + ) + parser.add_argument( + "--model_type", + type=str, + default="bert", + choices=["distilbert", "bert", "mobilebert", "roberta"], + help="model type" + ) + args = parser.parse_args() + + dataset = ONNXRTBertDataset(args.model_path, + data_dir=args.data_path, + model_name_or_path=args.model_name_or_path, + max_seq_length=args.max_seq_length, + task=args.task, + model_type=args.model_type, + dynamic_length=args.dynamic_length) + dataloader = DefaultDataLoader(dataset, args.batch_size) + metric = ONNXRTGLUE(args.task) + + def eval_func(model): + metric.reset() + session = onnxruntime.InferenceSession(model.SerializeToString(), + providers=onnxruntime.get_available_providers()) + ort_inputs = {} + len_inputs = len(session.get_inputs()) + inputs_names = [session.get_inputs()[i].name for i in range(len_inputs)] + for idx, (inputs, labels) in enumerate(dataloader): + if not isinstance(labels, list): + labels = [labels] + inputs = inputs[:len_inputs] + for i in range(len_inputs): + ort_inputs.update({inputs_names[i]: inputs[i]}) + predictions = session.run(None, ort_inputs) + metric.update(predictions[0], labels) + return metric.result() + + if args.benchmark: + model = onnx.load(args.model_path) + if args.mode == 'performance': + from neural_compressor.benchmark import fit + from neural_compressor.config import BenchmarkConfig + conf = BenchmarkConfig(iteration=100, + cores_per_instance=4, + num_of_instance=1) + fit(model, conf, b_dataloader=dataloader) + elif args.mode == 'accuracy': + acc_result = eval_func(model) + print("Batch size = %d" % args.batch_size) + print("Accuracy: %.5f" % acc_result) + + if args.tune: + if onnxruntime.__version__ <= '1.13.1': + from onnxruntime.transformers import optimizer + from onnxruntime.transformers.fusion_options import FusionOptions + opt_options = FusionOptions('bert') + opt_options.enable_embed_layer_norm = False + + model_optimizer = optimizer.optimize_model( + args.model_path, + 'bert', + num_heads=12, + hidden_size=768, + optimization_options=opt_options) + model = model_optimizer.model + else: + model = onnx.load(args.model_path) + + from neural_compressor import quantization, PostTrainingQuantConfig + config = PostTrainingQuantConfig(approach='dynamic') + q_model = quantization.fit(model, + config, + eval_func=eval_func) + q_model.save(args.output_model) diff --git a/examples/onnxrt/nlp/bert/quantization/ptq_dynamic/prepare_data.sh b/examples/onnxrt/nlp/bert/quantization/ptq_dynamic/prepare_data.sh new file mode 100644 index 00000000000..8e434a5c521 --- /dev/null +++ b/examples/onnxrt/nlp/bert/quantization/ptq_dynamic/prepare_data.sh @@ -0,0 +1,34 @@ +#!/bin/bash +set -x + +function main { + init_params "$@" + download_data + +} + +# init params +function init_params { + + for var in "$@" + do + case $var in + --data_dir=*) + data_dir=$(echo $var |cut -f2 -d=) + ;; + --task_name=*) + task_name=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_tuning +function download_data { + wget https://raw.githubusercontent.com/huggingface/transformers/f98ef14d161d7bcdc9808b5ec399981481411cc1/utils/download_glue_data.py + python download_glue_data.py --data_dir=${data_dir} --tasks=${task_name} +} + +main "$@" + diff --git a/examples/onnxrt/nlp/bert/quantization/ptq_dynamic/prepare_model.sh b/examples/onnxrt/nlp/bert/quantization/ptq_dynamic/prepare_model.sh new file mode 100644 index 00000000000..33ae9a1fdde --- /dev/null +++ b/examples/onnxrt/nlp/bert/quantization/ptq_dynamic/prepare_model.sh @@ -0,0 +1,41 @@ +#!/bin/bash +set -x + +function main { + init_params "$@" + export_model + +} + +# init params +function init_params { + + for var in "$@" + do + case $var in + --input_dir=*) + input_dir=$(echo $var |cut -f2 -d=) + ;; + --task_name=*) + task_name=$(echo $var |cut -f2 -d=) + ;; + --max_len=*) + max_len=$(echo $var |cut -f2 -d=) + ;; + --output_model=*) + output_model=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_tuning +function export_model { + curl https://download.pytorch.org/tutorial/MRPC.zip --output MPRC.zip + unzip -n MPRC.zip + python export.py --input_dir ${input_dir} --task_name ${task_name} --output_model ${output_model} +} + +main "$@" + diff --git a/examples/onnxrt/nlp/bert/quantization/ptq_dynamic/requirements.txt b/examples/onnxrt/nlp/bert/quantization/ptq_dynamic/requirements.txt new file mode 100644 index 00000000000..1fb753da72e --- /dev/null +++ b/examples/onnxrt/nlp/bert/quantization/ptq_dynamic/requirements.txt @@ -0,0 +1,7 @@ +torch +transformers +onnx +onnxruntime +coloredlogs +sympy +onnxruntime-extensions; python_version < '3.10' diff --git a/examples/onnxrt/nlp/bert/quantization/ptq_dynamic/run_benchmark.sh b/examples/onnxrt/nlp/bert/quantization/ptq_dynamic/run_benchmark.sh new file mode 100644 index 00000000000..d71c0a908db --- /dev/null +++ b/examples/onnxrt/nlp/bert/quantization/ptq_dynamic/run_benchmark.sh @@ -0,0 +1,61 @@ +#!/bin/bash +set -x + +function main { + + init_params "$@" + run_benchmark + +} + +# init params +function init_params { + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --mode=*) + mode=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + --batch_size=*) + batch_size=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_benchmark +function run_benchmark { + if [[ ${mode} == "accuracy" ]]; then + dynamic_length=False + elif [[ ${mode} == "performance" ]]; then + dynamic_length=True + else + echo "Error: No such mode: ${mode}" + exit 1 + fi + + model_name_or_path="bert-base-uncased" + task_name="mrpc" + model_type="bert" + + python main.py \ + --model_path ${input_model} \ + --model_name_or_path ${model_name_or_path} \ + --data_path ${dataset_location} \ + --task ${task_name} \ + --batch_size ${batch_size} \ + --mode ${mode} \ + --dynamic_length ${dynamic_length} \ + --benchmark + +} + +main "$@" + diff --git a/examples/onnxrt/nlp/bert/quantization/ptq_dynamic/run_tuning.sh b/examples/onnxrt/nlp/bert/quantization/ptq_dynamic/run_tuning.sh new file mode 100644 index 00000000000..6876ddc509c --- /dev/null +++ b/examples/onnxrt/nlp/bert/quantization/ptq_dynamic/run_tuning.sh @@ -0,0 +1,48 @@ +#!/bin/bash +set -x + +function main { + init_params "$@" + run_tuning +} + +# init params +function init_params { + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --output_model=*) + output_model=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_tuning +function run_tuning { + model_name_or_path="bert-base-uncased" + batch_size=8 + task_name="mrpc" + model_type="bert" + + python main.py \ + --model_path ${input_model} \ + --output_model ${output_model} \ + --model_name_or_path ${model_name_or_path} \ + --data_path ${dataset_location} \ + --task ${task_name} \ + --batch_size ${batch_size} \ + --tune +} + +main "$@" + + + diff --git a/examples/onnxrt/nlp/bert/quantization/ptq_static/README.md b/examples/onnxrt/nlp/bert/quantization/ptq_static/README.md new file mode 100644 index 00000000000..711159e1bb4 --- /dev/null +++ b/examples/onnxrt/nlp/bert/quantization/ptq_static/README.md @@ -0,0 +1,69 @@ +Step-by-Step +============ + +This example load a BERT model and confirm its accuracy and speed based on [GLUE data](https://gluebenchmark.com/). + +# Prerequisite + +## 1. Environment + +```shell +pip install neural-compressor +pip install -r requirements.txt +``` +> Note: Validated ONNX Runtime [Version](/docs/source/installation_guide.md#validated-software-environment). + +## 2. Prepare Dataset + +download the GLUE data with `prepare_data.sh` script. +```shell +export GLUE_DIR=path/to/glue_data +export TASK_NAME=MRPC + +bash prepare_data.sh --data_dir=$GLUE_DIR --task_name=$TASK_NAME +``` + +## 3. Prepare Model + +Please refer to [Bert-GLUE_OnnxRuntime_quantization guide](https://github.com/microsoft/onnxruntime/blob/master/onnxruntime/python/tools/quantization/notebooks/Bert-GLUE_OnnxRuntime_quantization.ipynb) for detailed model export. + +Run the `prepare_model.sh` script + + +Usage: +```shell +bash prepare_model.sh --input_dir=./MRPC \ + --task_name=$TASK_NAME \ + --output_model=path/to/model # model path as *.onnx +``` + +# Run + +## 1. Quantization + +Static quantization with QOperator format: + +```bash +bash run_tuning.sh --input_model=path/to/model \ # model path as *.onnx + --output_model=path/to/model_tune \ + --dataset_location=path/to/glue_data \ + --quant_format="QOperator" +``` + +Static quantization with QDQ format: + +```bash +bash run_tuning.sh --input_model=path/to/model \ # model path as *.onnx + --output_model=path/to/model_tune \ # model path as *.onnx + --dataset_location=path/to/glue_data \ + --quant_format="QDQ" +``` + +## 2. Benchmark + +```bash +bash run_benchmark.sh --input_model=path/to/model \ # model path as *.onnx + --dataset_location=path/to/glue_data \ + --batch_size=batch_size \ + --mode=performance # or accuracy +``` diff --git a/examples/onnxrt/nlp/bert/quantization/ptq_static/export.py b/examples/onnxrt/nlp/bert/quantization/ptq_static/export.py new file mode 100644 index 00000000000..8211442bf0d --- /dev/null +++ b/examples/onnxrt/nlp/bert/quantization/ptq_static/export.py @@ -0,0 +1,64 @@ +import argparse + +import torch +from transformers import BertForSequenceClassification + +def export_onnx_model(args, model, onnx_model_path): + with torch.no_grad(): + inputs = {'input_ids': torch.ones(1,args.max_len, dtype=torch.int64), + 'attention_mask': torch.ones(1,args.max_len, dtype=torch.int64), + 'token_type_ids': torch.ones(1,args.max_len, dtype=torch.int64)} + outputs = model(**inputs) + + symbolic_names = {0: 'batch_size', 1: 'max_seq_len'} + torch.onnx.export(model, # model being run + (inputs['input_ids'], + inputs['attention_mask'], + inputs['token_type_ids']), # model input (or a tuple for + # multiple inputs) + onnx_model_path, # where to save the model (can be a file + # or file-like object) + opset_version=11, # the ONNX version to export the model + do_constant_folding=True, # whether to execute constant folding + input_names=['input_ids', # the model's input names + 'input_mask', + 'segment_ids'], + output_names=['output'], # the model's output names + dynamic_axes={'input_ids': symbolic_names, # variable length axes + 'input_mask' : symbolic_names, + 'segment_ids' : symbolic_names}) + print("ONNX Model exported to {0}".format(onnx_model_path)) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description='Export bert onnx model', + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument( + '--input_dir', + type=str, + help='input_dir of bert model, must contain config.json') + parser.add_argument( + '--task_name', + type=str, + choices=["MRPC", "MNLI"], + help='tasks names of bert model') + parser.add_argument( + '--max_len', + type=int, + default=128, + help='Maximum length of the sentence pairs') + parser.add_argument( + '--do_lower_case', + type=bool, + default=True, + help='whether lower the tokenizer') + parser.add_argument( + '--output_model', + type=str, + default='bert.onnx', + help='path to exported model file') + args = parser.parse_args() + + model = BertForSequenceClassification.from_pretrained(args.input_dir) + export_onnx_model(args, model, args.output_model) diff --git a/examples/onnxrt/nlp/bert/quantization/ptq_static/main.py b/examples/onnxrt/nlp/bert/quantization/ptq_static/main.py new file mode 100644 index 00000000000..ff3a85f2358 --- /dev/null +++ b/examples/onnxrt/nlp/bert/quantization/ptq_static/main.py @@ -0,0 +1,422 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# pylint:disable=redefined-outer-name,logging-format-interpolation + +import logging +import argparse +import os +import onnx +import onnxruntime +import transformers +import torch +import numpy as np +from dataclasses import dataclass +from typing import List, Optional, Union +from neural_compressor.data.dataloaders.onnxrt_dataloader import DefaultDataLoader + +logger = logging.getLogger(__name__) +logging.basicConfig(format = "%(asctime)s - %(levelname)s - %(name)s - %(message)s", + datefmt = "%m/%d/%Y %H:%M:%S", + level = logging.WARN) + +class ONNXRTBertDataset: + """Dataset used for model Bert. + Args: data_dir (str): The input data dir. + model_name_or_path (str): Path to pre-trained student model or shortcut name, + selected in the list: + max_seq_length (int, default=128): The maximum length after tokenization. + Sequences longer than this will be truncated, + sequences shorter will be padded. + do_lower_case (bool, default=True): Whether to lowercase the input when tokenizing. + task (str, default=mrpc): The name of the task to fine-tune. + Choices include mrpc, qqp, qnli, rte, + sts-b, cola, mnli, wnli. + model_type (str, default="bert"): model type, support "distilbert", "bert", + "mobilebert", "roberta". + dynamic_length (bool, default=False): Whether to use fixed sequence length. + evaluate (bool, default=True): Whether do evaluation or training. + transform (transform object, default=None): transform to process input data. + filter (Filter objects, default=None): filter out examples according + to specific conditions. + """ + def __init__(self, model, data_dir, model_name_or_path, max_seq_length=128,\ + do_lower_case=True, task="mrpc", model_type="bert", dynamic_length=False,\ + evaluate=True, transform=None, filter=None): + self.inputs = [inp.name for inp in onnx.load(model).graph.input] + task = task.lower() + model_type = model_type.lower() + assert task in ["mrpc", "qqp", "qnli", "rte", "sts-b", "cola", \ + "mnli", "wnli", "sst-2"], "Unsupported task type" + assert model_type in ["distilbert", "bert", "mobilebert", "roberta"], "Unsupported \ + model type" + self.dynamic_length = dynamic_length + self.model_type = model_type + self.max_seq_length = max_seq_length + tokenizer = transformers.AutoTokenizer.from_pretrained(model_name_or_path, + do_lower_case=do_lower_case) + self.dataset = load_and_cache_examples(data_dir, model_name_or_path, \ + max_seq_length, task, model_type, tokenizer, evaluate) + + def __len__(self): + return len(self.dataset) + + def __getitem__(self, index): + batch = tuple(t.detach().cpu().numpy() if not isinstance(t, np.ndarray) else t for t in self.dataset[index]) + return batch[:len(self.inputs)], batch[-1] + +def load_and_cache_examples(data_dir, model_name_or_path, max_seq_length, task, \ + model_type, tokenizer, evaluate): + from torch.utils.data import TensorDataset + + processor = transformers.glue_processors[task]() + output_mode = transformers.glue_output_modes[task] + # Load data features from cache or dataset file + if not os.path.exists("./dataset_cached"): + os.makedirs("./dataset_cached") + cached_features_file = os.path.join("./dataset_cached", "cached_{}_{}_{}_{}".format( + "dev" if evaluate else "train", + list(filter(None, model_name_or_path.split("/"))).pop(), + str(max_seq_length), + str(task))) + if os.path.exists(cached_features_file): + logger.info("Load features from cached file {}.".format(cached_features_file)) + features = torch.load(cached_features_file) + else: + logger.info("Create features from dataset file at {}.".format(data_dir)) + label_list = processor.get_labels() + examples = processor.get_dev_examples(data_dir) if evaluate else \ + processor.get_train_examples(data_dir) + features = convert_examples_to_features(examples, + tokenizer, + task=task, + label_list=label_list, + max_length=max_seq_length, + output_mode=output_mode, + ) + logger.info("Save features into cached file {}.".format(cached_features_file)) + torch.save(features, cached_features_file) + # Convert to Tensors and build dataset + all_input_ids = torch.tensor([f.input_ids for f in features], dtype=torch.long) + all_attention_mask = torch.tensor([f.attention_mask for f in features], dtype=torch.long) + all_token_type_ids = torch.tensor([f.token_type_ids for f in features], dtype=torch.long) + all_seq_lengths = torch.tensor([f.seq_length for f in features], dtype=torch.long) + if output_mode == "classification": + all_labels = torch.tensor([f.label for f in features], dtype=torch.long) + elif output_mode == "regression": + all_labels = torch.tensor([f.label for f in features], dtype=torch.float) + dataset = TensorDataset(all_input_ids, all_attention_mask, all_token_type_ids, \ + all_seq_lengths, all_labels) + return dataset + +def convert_examples_to_features( + examples, + tokenizer, + max_length=128, + task=None, + label_list=None, + output_mode="classification", + pad_token=0, + pad_token_segment_id=0, + mask_padding_with_zero=True, +): + processor = transformers.glue_processors[task]() + if label_list is None: + label_list = processor.get_labels() + logger.info("Use label list {} for task {}.".format(label_list, task)) + label_map = {label: i for i, label in enumerate(label_list)} + features = [] + for (ex_index, example) in enumerate(examples): + inputs = tokenizer.encode_plus( + example.text_a, + example.text_b, + add_special_tokens=True, + max_length=max_length, + return_token_type_ids=True, + truncation=True, + ) + input_ids, token_type_ids = inputs["input_ids"], inputs["token_type_ids"] + # The mask has 1 for real tokens and 0 for padding tokens. Only real + # tokens are attended to. + attention_mask = [1 if mask_padding_with_zero else 0] * len(input_ids) + + # Zero-pad up to the sequence length. + seq_length = len(input_ids) + padding_length = max_length - len(input_ids) + + input_ids = input_ids + ([pad_token] * padding_length) + attention_mask = attention_mask + \ + ([0 if mask_padding_with_zero else 1] * padding_length) + token_type_ids = token_type_ids + ([pad_token_segment_id] * padding_length) + + assert len(input_ids) == max_length, \ + "Error with input_ids length {} vs {}".format( + len(input_ids), max_length) + assert len(attention_mask) == max_length, \ + "Error with attention_mask length {} vs {}".format( + len(attention_mask), max_length + ) + assert len(token_type_ids) == max_length, \ + "Error with token_type_ids length {} vs {}".format( + len(token_type_ids), max_length + ) + if output_mode == "classification": + label = label_map[example.label] + elif output_mode == "regression": + label = float(example.label) + else: + raise KeyError(output_mode) + + feats = InputFeatures( + input_ids=input_ids, + attention_mask=attention_mask, + token_type_ids=token_type_ids, + label=label, + seq_length=seq_length, + ) + features.append(feats) + return features + +@dataclass(frozen=True) +class InputFeatures: + """ + A single set of features of data. + Property names are the same names as the corresponding inputs to a model. + Args: + input_ids: Indices of input sequence tokens in the vocabulary. + attention_mask: Mask to avoid performing attention on padding token indices. + Mask values selected in ``[0, 1]``: Usually ``1`` for tokens that are NOT MASKED, + ``0`` for MASKED (padded) tokens. + token_type_ids: (Optional) Segment token indices to indicate first and second + portions of the inputs. Only some models use them. + label: (Optional) Label corresponding to the input. Int for classification problems, + float for regression problems. + seq_length: (Optional) The length of input sequence before padding. + """ + + input_ids: List[int] + attention_mask: Optional[List[int]] = None + token_type_ids: Optional[List[int]] = None + label: Optional[Union[int, float]] = None + seq_length: Optional[List[int]] = None + +class ONNXRTGLUE: + """Computes GLUE score. + + Args: + task (str, default=mrpc): The name of the task. + Choices include mrpc, qqp, qnli, rte, + sts-b, cola, mnli, wnli. + + """ + def __init__(self, task="mrpc"): + assert task in ["mrpc", "qqp", "qnli", "rte", "sts-b", "cola", \ + "mnli", "wnli", "sst-2"], "Unsupported task type" + self.pred_list = None + self.label_list = None + self.task = task + self.return_key = { + "cola": "mcc", + "mrpc": "acc", + "sts-b": "corr", + "qqp": "acc", + "mnli": "mnli/acc", + "qnli": "acc", + "rte": "acc", + "wnli": "acc", + "sst-2": "acc" + } + + def update(self, preds, labels): + """add preds and labels to storage""" + if isinstance(preds, list) and len(preds) == 1: + preds = preds[0] + if isinstance(labels, list) and len(labels) == 1: + labels = labels[0] + if self.pred_list is None: + self.pred_list = preds + self.label_list = labels + else: + self.pred_list = np.append(self.pred_list, preds, axis=0) + self.label_list = np.append(self.label_list, labels, axis=0) + + def reset(self): + """clear preds and labels storage""" + self.pred_list = None + self.label_list = None + + def result(self): + """calculate metric""" + output_mode = transformers.glue_output_modes[self.task] + + if output_mode == "classification": + processed_preds = np.argmax(self.pred_list, axis=1) + elif output_mode == "regression": + processed_preds = np.squeeze(self.pred_list) + result = transformers.glue_compute_metrics(\ + self.task, processed_preds, self.label_list) + return result[self.return_key[self.task]] + +if __name__ == "__main__": + logger.info("Evaluating ONNXRuntime full precision accuracy and performance:") + parser = argparse.ArgumentParser( + description="BERT fine-tune examples for classification/regression tasks.", + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument( + "--model_path", + type=str, + help="Pre-trained model on onnx file" + ) + parser.add_argument( + "--benchmark", + action="store_true", \ + default=False + ) + parser.add_argument( + "--tune", + action="store_true", \ + default=False, + help="whether quantize the model" + ) + parser.add_argument( + "--output_model", + type=str, + help="output model path" + ) + parser.add_argument( + "--mode", + type=str, + help="benchmark mode of performance or accuracy" + ) + parser.add_argument( + "--model_name_or_path", + type=str, + help="pretrained model name or path" + ) + parser.add_argument( + "--data_path", + type=str, + help="input data path" + ) + parser.add_argument( + "--batch_size", + default=8, + type=int, + ) + parser.add_argument( + "--task", + type=str, + default="mrpc", + choices=["mrpc", "qqp", "qnli", "rte", "sts-b", "cola", \ + "mnli", "wnli", "sst-2"], + help="GLUE task name" + ) + parser.add_argument( + "--quant_format", + type=str, + default="QOperator", + choices=["QDQ", "QOperator"], + help="quantization format" + ) + parser.add_argument( + "--dynamic_length", + type=bool, + default=False, + help="dynamic length" + ) + parser.add_argument( + "--max_seq_length", + type=int, + default=128, + help="max sequence length" + ) + parser.add_argument( + "--model_type", + type=str, + default="bert", + choices=["distilbert", "bert", "mobilebert", "roberta"], + help="model type" + ) + args = parser.parse_args() + + dataset = ONNXRTBertDataset(args.model_path, + data_dir=args.data_path, + model_name_or_path=args.model_name_or_path, + max_seq_length=args.max_seq_length, + task=args.task, + model_type=args.model_type, + dynamic_length=args.dynamic_length) + dataloader = DefaultDataLoader(dataset, args.batch_size) + metric = ONNXRTGLUE(args.task) + + def eval_func(model): + metric.reset() + session = onnxruntime.InferenceSession(model.SerializeToString(), + providers=onnxruntime.get_available_providers()) + ort_inputs = {} + len_inputs = len(session.get_inputs()) + inputs_names = [session.get_inputs()[i].name for i in range(len_inputs)] + for idx, (inputs, labels) in enumerate(dataloader): + if not isinstance(labels, list): + labels = [labels] + inputs = inputs[:len_inputs] + for i in range(len_inputs): + ort_inputs.update({inputs_names[i]: inputs[i]}) + predictions = session.run(None, ort_inputs) + metric.update(predictions[0], labels) + return metric.result() + + if args.benchmark: + model = onnx.load(args.model_path) + if args.mode == "performance": + from neural_compressor.benchmark import fit + from neural_compressor.config import BenchmarkConfig + conf = BenchmarkConfig(iteration=100, + cores_per_instance=4, + num_of_instance=1) + fit(model, conf, b_dataloader=dataloader) + elif args.mode == "accuracy": + acc_result = eval_func(model) + print("Batch size = %d" % args.batch_size) + print("Accuracy: %.5f" % acc_result) + + if args.tune: + if onnxruntime.__version__ <= '1.13.1': + from onnxruntime.transformers import optimizer + from onnxruntime.transformers.fusion_options import FusionOptions + opt_options = FusionOptions("bert") + opt_options.enable_embed_layer_norm = False + + model_optimizer = optimizer.optimize_model( + args.model_path, + "bert", + num_heads=12, + hidden_size=768, + optimization_options=opt_options) + model = model_optimizer.model + else: + model = onnx.load(args.model_path) + + from neural_compressor import quantization, PostTrainingQuantConfig + config = PostTrainingQuantConfig(approach="static", + quant_format=args.quant_format, + calibration_sampling_size=[8, 16, 32], + recipes={"optypes_to_exclude_output_quant": ["MatMul", "Gemm", "Attention", "FusedGemm"]}) + q_model = quantization.fit(model, + config, + eval_func=eval_func, + calib_dataloader=dataloader) + q_model.save(args.output_model) diff --git a/examples/onnxrt/nlp/bert/quantization/ptq_static/prepare_data.sh b/examples/onnxrt/nlp/bert/quantization/ptq_static/prepare_data.sh new file mode 100644 index 00000000000..8e434a5c521 --- /dev/null +++ b/examples/onnxrt/nlp/bert/quantization/ptq_static/prepare_data.sh @@ -0,0 +1,34 @@ +#!/bin/bash +set -x + +function main { + init_params "$@" + download_data + +} + +# init params +function init_params { + + for var in "$@" + do + case $var in + --data_dir=*) + data_dir=$(echo $var |cut -f2 -d=) + ;; + --task_name=*) + task_name=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_tuning +function download_data { + wget https://raw.githubusercontent.com/huggingface/transformers/f98ef14d161d7bcdc9808b5ec399981481411cc1/utils/download_glue_data.py + python download_glue_data.py --data_dir=${data_dir} --tasks=${task_name} +} + +main "$@" + diff --git a/examples/onnxrt/nlp/bert/quantization/ptq_static/prepare_model.sh b/examples/onnxrt/nlp/bert/quantization/ptq_static/prepare_model.sh new file mode 100644 index 00000000000..33ae9a1fdde --- /dev/null +++ b/examples/onnxrt/nlp/bert/quantization/ptq_static/prepare_model.sh @@ -0,0 +1,41 @@ +#!/bin/bash +set -x + +function main { + init_params "$@" + export_model + +} + +# init params +function init_params { + + for var in "$@" + do + case $var in + --input_dir=*) + input_dir=$(echo $var |cut -f2 -d=) + ;; + --task_name=*) + task_name=$(echo $var |cut -f2 -d=) + ;; + --max_len=*) + max_len=$(echo $var |cut -f2 -d=) + ;; + --output_model=*) + output_model=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_tuning +function export_model { + curl https://download.pytorch.org/tutorial/MRPC.zip --output MPRC.zip + unzip -n MPRC.zip + python export.py --input_dir ${input_dir} --task_name ${task_name} --output_model ${output_model} +} + +main "$@" + diff --git a/examples/onnxrt/nlp/bert/quantization/ptq_static/requirements.txt b/examples/onnxrt/nlp/bert/quantization/ptq_static/requirements.txt new file mode 100644 index 00000000000..1fb753da72e --- /dev/null +++ b/examples/onnxrt/nlp/bert/quantization/ptq_static/requirements.txt @@ -0,0 +1,7 @@ +torch +transformers +onnx +onnxruntime +coloredlogs +sympy +onnxruntime-extensions; python_version < '3.10' diff --git a/examples/onnxrt/nlp/bert/quantization/ptq_static/run_benchmark.sh b/examples/onnxrt/nlp/bert/quantization/ptq_static/run_benchmark.sh new file mode 100644 index 00000000000..d71c0a908db --- /dev/null +++ b/examples/onnxrt/nlp/bert/quantization/ptq_static/run_benchmark.sh @@ -0,0 +1,61 @@ +#!/bin/bash +set -x + +function main { + + init_params "$@" + run_benchmark + +} + +# init params +function init_params { + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --mode=*) + mode=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + --batch_size=*) + batch_size=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_benchmark +function run_benchmark { + if [[ ${mode} == "accuracy" ]]; then + dynamic_length=False + elif [[ ${mode} == "performance" ]]; then + dynamic_length=True + else + echo "Error: No such mode: ${mode}" + exit 1 + fi + + model_name_or_path="bert-base-uncased" + task_name="mrpc" + model_type="bert" + + python main.py \ + --model_path ${input_model} \ + --model_name_or_path ${model_name_or_path} \ + --data_path ${dataset_location} \ + --task ${task_name} \ + --batch_size ${batch_size} \ + --mode ${mode} \ + --dynamic_length ${dynamic_length} \ + --benchmark + +} + +main "$@" + diff --git a/examples/onnxrt/nlp/bert/quantization/ptq_static/run_tuning.sh b/examples/onnxrt/nlp/bert/quantization/ptq_static/run_tuning.sh new file mode 100644 index 00000000000..08821d98343 --- /dev/null +++ b/examples/onnxrt/nlp/bert/quantization/ptq_static/run_tuning.sh @@ -0,0 +1,53 @@ +#!/bin/bash +set -x + +function main { + init_params "$@" + run_tuning +} + +# init params +function init_params { + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --output_model=*) + output_model=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + --quant_format=*) + quant_format=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_tuning +function run_tuning { + model_name_or_path="bert-base-uncased" + batch_size=8 + task_name="mrpc" + model_type="bert" + + python main.py \ + --model_path ${input_model} \ + --output_model ${output_model} \ + --model_name_or_path ${model_name_or_path} \ + --data_path ${dataset_location} \ + --task ${task_name} \ + --batch_size ${batch_size} \ + --model_type ${model_type} \ + --quant_format ${quant_format} \ + --tune +} + +main "$@" + + + diff --git a/examples/onnxrt/nlp/distilbert/quantization/ptq_dynamic/README.md b/examples/onnxrt/nlp/distilbert/quantization/ptq_dynamic/README.md new file mode 100644 index 00000000000..a79afb67f9f --- /dev/null +++ b/examples/onnxrt/nlp/distilbert/quantization/ptq_dynamic/README.md @@ -0,0 +1,79 @@ +Step-by-Step +============ + +This example load a DistilBERT model and confirm its accuracy and speed based on [GLUE data](https://gluebenchmark.com/). + +# Prerequisite + +## 1. Environment + +```shell +pip install neural-compressor +pip install -r requirements.txt +``` +> Note: Validated ONNX Runtime [Version](/docs/source/installation_guide.md#validated-software-environment). + +## 2. Prepare Dataset + +download the GLUE data with `prepare_data.sh` script. +```shell +export GLUE_DIR=path/to/glue_data +export TASK_NAME=MRPC + +bash prepare_data.sh --data_dir=$GLUE_DIR --task_name=$TASK_NAME +``` + +## 3. Prepare Model + +Please refer to [Bert-GLUE_OnnxRuntime_quantization guide](https://github.com/microsoft/onnxruntime/blob/master/onnxruntime/python/tools/quantization/notebooks/Bert-GLUE_OnnxRuntime_quantization.ipynb) for detailed model export. The following is a simple example. + +Use [Huggingface Transformers](https://github.com/huggingface/transformers/tree/v2.2.1) to fine-tune the model based on the [MRPC](https://github.com/huggingface/transformers/tree/master/examples/text-classification#mrpc) example with command like: +```shell +export OUT_DIR=/path/to/out_dir/ +python ./run_glue.py \ + --model_type distilbert \ + --model_name_or_path distilbert-base-uncased \ + --task_name $TASK_NAME \ + --do_train \ + --do_eval \ + --do_lower_case \ + --data_dir $GLUE_DIR/$TASK_NAME \ + --max_seq_length 128 \ + --per_gpu_eval_batch_size=8 \ + --per_gpu_train_batch_size=8 \ + --learning_rate 2e-5 \ + --num_train_epochs 3.0 \ + --save_steps 100000 \ + --output_dir $OUT_DIR +``` +Run the `prepare_model.sh` script + +Usage: +```shell +cd examples/onnxrt/language_translation/distilbert/ + +bash prepare_model.sh --input_dir=$OUT_DIR \ + --task_name=$TASK_NAME \ + --output_model=path/to/model # model path as *.onnx +``` + +# Run + +## 1. Quantization + +Dynamic quantization: + +```bash +bash run_tuning.sh --input_model=path/to/model \ # model path as *.onnx + --output_model=path/to/model_tune \ # model path as *.onnx + --dataset_location=path/to/glue_data +``` + +## 2. Benchmark + +```bash +bash run_benchmark.sh --input_model=path/to/model \ # model path as *.onnx + --dataset_location=path/to/glue_data \ + --batch_size=batch_size \ + --mode=performance # or accuracy +``` diff --git a/examples/onnxrt/nlp/distilbert/quantization/ptq_dynamic/export.py b/examples/onnxrt/nlp/distilbert/quantization/ptq_dynamic/export.py new file mode 100644 index 00000000000..a3550163af9 --- /dev/null +++ b/examples/onnxrt/nlp/distilbert/quantization/ptq_dynamic/export.py @@ -0,0 +1,60 @@ +import argparse + +import torch +from transformers import DistilBertForSequenceClassification + +def export_onnx_model(args, model, onnx_model_path): + with torch.no_grad(): + inputs = {'input_ids': torch.ones(1,args.max_len, dtype=torch.int64), + 'attention_mask': torch.ones(1,args.max_len, dtype=torch.int64)} + outputs = model(**inputs) + + symbolic_names = {0: 'batch_size', 1: 'max_seq_len'} + torch.onnx.export(model, # model being run + (inputs['input_ids'], + inputs['attention_mask']), # model input (or a tuple for + # multiple inputs) + onnx_model_path, # where to save the model (can be a file + # or file-like object) + opset_version=11, # the ONNX version to export the model + do_constant_folding=True, # whether to execute constant folding + input_names=['input_ids', # the model's input names + 'input_mask'], + output_names=['output'], # the model's output names + dynamic_axes={'input_ids': symbolic_names, # variable length axes + 'input_mask' : symbolic_names}) + print("ONNX Model exported to {0}".format(onnx_model_path)) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description='Export bert onnx model', + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument( + '--input_dir', + type=str, + help='input_dir of bert model, must contain config.json') + parser.add_argument( + '--task_name', + type=str, + choices=["MRPC", "MNLI"], + help='tasks names of bert model') + parser.add_argument( + '--max_len', + type=int, + default=128, + help='Maximum length of the sentence pairs') + parser.add_argument( + '--do_lower_case', + type=bool, + default=True, + help='whether lower the tokenizer') + parser.add_argument( + '--output_model', + type=str, + default='bert.onnx', + help='path to exported model file') + args = parser.parse_args() + + model = DistilBertForSequenceClassification.from_pretrained(args.input_dir) + export_onnx_model(args, model, args.output_model) diff --git a/examples/onnxrt/nlp/distilbert/quantization/ptq_dynamic/main.py b/examples/onnxrt/nlp/distilbert/quantization/ptq_dynamic/main.py new file mode 100644 index 00000000000..284858fc073 --- /dev/null +++ b/examples/onnxrt/nlp/distilbert/quantization/ptq_dynamic/main.py @@ -0,0 +1,404 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# pylint:disable=redefined-outer-name,logging-format-interpolation + +import logging +import argparse +import os +import onnx +import onnxruntime +import transformers +import torch +import numpy as np +from dataclasses import dataclass +from typing import List, Optional, Union +from neural_compressor.data.dataloaders.onnxrt_dataloader import DefaultDataLoader + +logger = logging.getLogger(__name__) +logging.basicConfig(format = "%(asctime)s - %(levelname)s - %(name)s - %(message)s", + datefmt = "%m/%d/%Y %H:%M:%S", + level = logging.WARN) + +class ONNXRTBertDataset: + """Dataset used for model Bert. + Args: data_dir (str): The input data dir. + model_name_or_path (str): Path to pre-trained student model or shortcut name, + selected in the list: + max_seq_length (int, default=128): The maximum length after tokenization. + Sequences longer than this will be truncated, + sequences shorter will be padded. + do_lower_case (bool, default=True): Whether to lowercase the input when tokenizing. + task (str, default=mrpc): The name of the task to fine-tune. + Choices include mrpc, qqp, qnli, rte, + sts-b, cola, mnli, wnli. + model_type (str, default="bert"): model type, support "distilbert", "bert", + "mobilebert", "roberta". + dynamic_length (bool, default=False): Whether to use fixed sequence length. + evaluate (bool, default=True): Whether do evaluation or training. + transform (transform object, default=None): transform to process input data. + filter (Filter objects, default=None): filter out examples according + to specific conditions. + """ + def __init__(self, model, data_dir, model_name_or_path, max_seq_length=128,\ + do_lower_case=True, task="mrpc", model_type="bert", dynamic_length=False,\ + evaluate=True, transform=None, filter=None): + self.inputs = [inp.name for inp in onnx.load(model).graph.input] + task = task.lower() + model_type = model_type.lower() + assert task in ["mrpc", "qqp", "qnli", "rte", "sts-b", "cola", \ + "mnli", "wnli", "sst-2"], "Unsupported task type" + assert model_type in ["distilbert", "bert", "mobilebert", "roberta"], "Unsupported \ + model type" + self.dynamic_length = dynamic_length + self.model_type = model_type + self.max_seq_length = max_seq_length + tokenizer = transformers.AutoTokenizer.from_pretrained(model_name_or_path, + do_lower_case=do_lower_case) + self.dataset = load_and_cache_examples(data_dir, model_name_or_path, \ + max_seq_length, task, model_type, tokenizer, evaluate) + + def __len__(self): + return len(self.dataset) + + def __getitem__(self, index): + batch = tuple(t.detach().cpu().numpy() if not isinstance(t, np.ndarray) else t for t in self.dataset[index]) + return batch[:len(self.inputs)], batch[-1] + +def load_and_cache_examples(data_dir, model_name_or_path, max_seq_length, task, \ + model_type, tokenizer, evaluate): + from torch.utils.data import TensorDataset + + processor = transformers.glue_processors[task]() + output_mode = transformers.glue_output_modes[task] + # Load data features from cache or dataset file + if not os.path.exists("./dataset_cached"): + os.makedirs("./dataset_cached") + cached_features_file = os.path.join("./dataset_cached", "cached_{}_{}_{}_{}".format( + "dev" if evaluate else "train", + list(filter(None, model_name_or_path.split("/"))).pop(), + str(max_seq_length), + str(task))) + if os.path.exists(cached_features_file): + logger.info("Load features from cached file {}.".format(cached_features_file)) + features = torch.load(cached_features_file) + else: + logger.info("Create features from dataset file at {}.".format(data_dir)) + label_list = processor.get_labels() + examples = processor.get_dev_examples(data_dir) if evaluate else \ + processor.get_train_examples(data_dir) + features = convert_examples_to_features(examples, + tokenizer, + task=task, + label_list=label_list, + max_length=max_seq_length, + output_mode=output_mode, + ) + logger.info("Save features into cached file {}.".format(cached_features_file)) + torch.save(features, cached_features_file) + # Convert to Tensors and build dataset + all_input_ids = torch.tensor([f.input_ids for f in features], dtype=torch.long) + all_attention_mask = torch.tensor([f.attention_mask for f in features], dtype=torch.long) + all_token_type_ids = torch.tensor([f.token_type_ids for f in features], dtype=torch.long) + all_seq_lengths = torch.tensor([f.seq_length for f in features], dtype=torch.long) + if output_mode == "classification": + all_labels = torch.tensor([f.label for f in features], dtype=torch.long) + elif output_mode == "regression": + all_labels = torch.tensor([f.label for f in features], dtype=torch.float) + dataset = TensorDataset(all_input_ids, all_attention_mask, all_token_type_ids, \ + all_seq_lengths, all_labels) + return dataset + +def convert_examples_to_features( + examples, + tokenizer, + max_length=128, + task=None, + label_list=None, + output_mode="classification", + pad_token=0, + pad_token_segment_id=0, + mask_padding_with_zero=True, +): + processor = transformers.glue_processors[task]() + if label_list is None: + label_list = processor.get_labels() + logger.info("Use label list {} for task {}.".format(label_list, task)) + label_map = {label: i for i, label in enumerate(label_list)} + features = [] + for (ex_index, example) in enumerate(examples): + inputs = tokenizer.encode_plus( + example.text_a, + example.text_b, + add_special_tokens=True, + max_length=max_length, + return_token_type_ids=True, + truncation=True, + ) + input_ids, token_type_ids = inputs["input_ids"], inputs["token_type_ids"] + # The mask has 1 for real tokens and 0 for padding tokens. Only real + # tokens are attended to. + attention_mask = [1 if mask_padding_with_zero else 0] * len(input_ids) + + # Zero-pad up to the sequence length. + seq_length = len(input_ids) + padding_length = max_length - len(input_ids) + + input_ids = input_ids + ([pad_token] * padding_length) + attention_mask = attention_mask + \ + ([0 if mask_padding_with_zero else 1] * padding_length) + token_type_ids = token_type_ids + ([pad_token_segment_id] * padding_length) + + assert len(input_ids) == max_length, \ + "Error with input_ids length {} vs {}".format( + len(input_ids), max_length) + assert len(attention_mask) == max_length, \ + "Error with attention_mask length {} vs {}".format( + len(attention_mask), max_length + ) + assert len(token_type_ids) == max_length, \ + "Error with token_type_ids length {} vs {}".format( + len(token_type_ids), max_length + ) + if output_mode == "classification": + label = label_map[example.label] + elif output_mode == "regression": + label = float(example.label) + else: + raise KeyError(output_mode) + + feats = InputFeatures( + input_ids=input_ids, + attention_mask=attention_mask, + token_type_ids=token_type_ids, + label=label, + seq_length=seq_length, + ) + features.append(feats) + return features + +@dataclass(frozen=True) +class InputFeatures: + """ + A single set of features of data. + Property names are the same names as the corresponding inputs to a model. + Args: + input_ids: Indices of input sequence tokens in the vocabulary. + attention_mask: Mask to avoid performing attention on padding token indices. + Mask values selected in ``[0, 1]``: Usually ``1`` for tokens that are NOT MASKED, + ``0`` for MASKED (padded) tokens. + token_type_ids: (Optional) Segment token indices to indicate first and second + portions of the inputs. Only some models use them. + label: (Optional) Label corresponding to the input. Int for classification problems, + float for regression problems. + seq_length: (Optional) The length of input sequence before padding. + """ + + input_ids: List[int] + attention_mask: Optional[List[int]] = None + token_type_ids: Optional[List[int]] = None + label: Optional[Union[int, float]] = None + seq_length: Optional[List[int]] = None + +class ONNXRTGLUE: + """Computes GLUE score. + + Args: + task (str, default=mrpc): The name of the task. + Choices include mrpc, qqp, qnli, rte, + sts-b, cola, mnli, wnli. + + """ + def __init__(self, task="mrpc"): + assert task in ["mrpc", "qqp", "qnli", "rte", "sts-b", "cola", \ + "mnli", "wnli", "sst-2"], "Unsupported task type" + self.pred_list = None + self.label_list = None + self.task = task + self.return_key = { + "cola": "mcc", + "mrpc": "acc", + "sts-b": "corr", + "qqp": "acc", + "mnli": "mnli/acc", + "qnli": "acc", + "rte": "acc", + "wnli": "acc", + "sst-2": "acc" + } + + def update(self, preds, labels): + """add preds and labels to storage""" + if isinstance(preds, list) and len(preds) == 1: + preds = preds[0] + if isinstance(labels, list) and len(labels) == 1: + labels = labels[0] + if self.pred_list is None: + self.pred_list = preds + self.label_list = labels + else: + self.pred_list = np.append(self.pred_list, preds, axis=0) + self.label_list = np.append(self.label_list, labels, axis=0) + + def reset(self): + """clear preds and labels storage""" + self.pred_list = None + self.label_list = None + + def result(self): + """calculate metric""" + output_mode = transformers.glue_output_modes[self.task] + + if output_mode == "classification": + processed_preds = np.argmax(self.pred_list, axis=1) + elif output_mode == "regression": + processed_preds = np.squeeze(self.pred_list) + result = transformers.glue_compute_metrics(\ + self.task, processed_preds, self.label_list) + return result[self.return_key[self.task]] + +if __name__ == "__main__": + logger.info("Evaluating ONNXRuntime full precision accuracy and performance:") + parser = argparse.ArgumentParser( + description="BERT fine-tune examples for classification/regression tasks.", + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument( + "--model_path", + type=str, + help="Pre-trained model on onnx file" + ) + parser.add_argument( + "--benchmark", + action="store_true", \ + default=False + ) + parser.add_argument( + "--tune", + action="store_true", \ + default=False, + help="whether quantize the model" + ) + parser.add_argument( + "--output_model", + type=str, + help="output model path" + ) + parser.add_argument( + "--mode", + type=str, + help="benchmark mode of performance or accuracy" + ) + parser.add_argument( + "--model_name_or_path", + type=str, + help="pretrained model name or path" + ) + parser.add_argument( + "--data_path", + type=str, + help="input data path" + ) + parser.add_argument( + "--batch_size", + default=8, + type=int, + ) + parser.add_argument( + "--task", + type=str, + default="mrpc", + choices=["mrpc", "qqp", "qnli", "rte", "sts-b", "cola", \ + "mnli", "wnli", "sst-2"], + help="GLUE task name" + ) + parser.add_argument( + "--dynamic_length", + type=bool, + default=False, + help="dynamic length" + ) + parser.add_argument( + "--model_type", + type=str, + default="bert", + choices=["distilbert", "bert", "mobilebert", "roberta"], + help="model type" + ) + args = parser.parse_args() + + dataset = ONNXRTBertDataset(args.model_path, + data_dir=args.data_path, + model_name_or_path=args.model_name_or_path, + task=args.task, + model_type=args.model_type, + dynamic_length=args.dynamic_length) + dataloader = DefaultDataLoader(dataset, args.batch_size) + metric = ONNXRTGLUE(args.task) + + def eval_func(model): + metric.reset() + session = onnxruntime.InferenceSession(model.SerializeToString(), + providers=onnxruntime.get_available_providers()) + ort_inputs = {} + len_inputs = len(session.get_inputs()) + inputs_names = [session.get_inputs()[i].name for i in range(len_inputs)] + for idx, (inputs, labels) in enumerate(dataloader): + if not isinstance(labels, list): + labels = [labels] + inputs = inputs[:len_inputs] + for i in range(len_inputs): + ort_inputs.update({inputs_names[i]: inputs[i]}) + predictions = session.run(None, ort_inputs) + metric.update(predictions[0], labels) + return metric.result() + + if args.benchmark: + model = onnx.load(args.model_path) + if args.mode == "performance": + from neural_compressor.benchmark import fit + from neural_compressor.config import BenchmarkConfig + conf = BenchmarkConfig(iteration=100, + cores_per_instance=4, + num_of_instance=1) + fit(model, conf, b_dataloader=dataloader) + elif args.mode == "accuracy": + acc_result = eval_func(model) + print("Batch size = %d" % args.batch_size) + print("Accuracy: %.5f" % acc_result) + + if args.tune: + if onnxruntime.__version__ <= '1.13.1': + from onnxruntime.transformers import optimizer + from onnxruntime.transformers.fusion_options import FusionOptions + opt_options = FusionOptions("bert") + opt_options.enable_embed_layer_norm = False + + model_optimizer = optimizer.optimize_model( + args.model_path, + "bert", + num_heads=12, + hidden_size=768, + optimization_options=opt_options) + model = model_optimizer.model + else: + model = onnx.load(args.model_path) + + from neural_compressor import quantization, PostTrainingQuantConfig + config = PostTrainingQuantConfig(approach="dynamic") + q_model = quantization.fit(model, + config, + eval_func=eval_func) + q_model.save(args.output_model) diff --git a/examples/onnxrt/nlp/distilbert/quantization/ptq_dynamic/prepare_data.sh b/examples/onnxrt/nlp/distilbert/quantization/ptq_dynamic/prepare_data.sh new file mode 100644 index 00000000000..8e434a5c521 --- /dev/null +++ b/examples/onnxrt/nlp/distilbert/quantization/ptq_dynamic/prepare_data.sh @@ -0,0 +1,34 @@ +#!/bin/bash +set -x + +function main { + init_params "$@" + download_data + +} + +# init params +function init_params { + + for var in "$@" + do + case $var in + --data_dir=*) + data_dir=$(echo $var |cut -f2 -d=) + ;; + --task_name=*) + task_name=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_tuning +function download_data { + wget https://raw.githubusercontent.com/huggingface/transformers/f98ef14d161d7bcdc9808b5ec399981481411cc1/utils/download_glue_data.py + python download_glue_data.py --data_dir=${data_dir} --tasks=${task_name} +} + +main "$@" + diff --git a/examples/onnxrt/nlp/distilbert/quantization/ptq_dynamic/prepare_model.sh b/examples/onnxrt/nlp/distilbert/quantization/ptq_dynamic/prepare_model.sh new file mode 100644 index 00000000000..8d6eb064930 --- /dev/null +++ b/examples/onnxrt/nlp/distilbert/quantization/ptq_dynamic/prepare_model.sh @@ -0,0 +1,39 @@ +#!/bin/bash +set -x + +function main { + init_params "$@" + export_model + +} + +# init params +function init_params { + + for var in "$@" + do + case $var in + --input_dir=*) + input_dir=$(echo $var |cut -f2 -d=) + ;; + --task_name=*) + task_name=$(echo $var |cut -f2 -d=) + ;; + --max_len=*) + max_len=$(echo $var |cut -f2 -d=) + ;; + --output_model=*) + output_model=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_tuning +function export_model { + python export.py --input_dir ${input_dir} --task_name ${task_name} --output_model ${output_model} +} + +main "$@" + diff --git a/examples/onnxrt/nlp/distilbert/quantization/ptq_dynamic/readme.md b/examples/onnxrt/nlp/distilbert/quantization/ptq_dynamic/readme.md new file mode 100644 index 00000000000..b90ff6429b8 --- /dev/null +++ b/examples/onnxrt/nlp/distilbert/quantization/ptq_dynamic/readme.md @@ -0,0 +1,69 @@ +# Evaluate performance of ONNX Runtime(DistilBERT) +>ONNX runtime quantization is under active development. please use 1.6.0+ to get more quantization support. + +This example load a language translation model and confirm its accuracy and speed based on [GLUE data](https://gluebenchmark.com/). + +### Environment +onnx: 1.7.0 +onnxruntime: 1.6.0+ + +### Prepare dataset +download the GLUE data with `prepare_data.sh` script. +```shell +export GLUE_DIR=/path/to/glue_data +export TASK_NAME=MRPC + +bash prepare_data.sh --data_dir=$GLUE_DIR --task_name=$TASK_NAME +``` + +### Prepare model +Please refer to [Bert-GLUE_OnnxRuntime_quantization guide](https://github.com/microsoft/onnxruntime/blob/master/onnxruntime/python/tools/quantization/notebooks/Bert-GLUE_OnnxRuntime_quantization.ipynb) for detailed model export. The following is a simple example. + +Use [Huggingface Transformers](https://github.com/huggingface/transformers/tree/v2.2.1) to fine-tune the model based on the [MRPC](https://github.com/huggingface/transformers/tree/master/examples/text-classification#mrpc) example with command like: +```shell +export OUT_DIR=/path/to/out_dir/ +python ./run_glue.py \ + --model_type distilbert \ + --model_name_or_path distilbert-base-uncased \ + --task_name $TASK_NAME \ + --do_train \ + --do_eval \ + --do_lower_case \ + --data_dir $GLUE_DIR/$TASK_NAME \ + --max_seq_length 128 \ + --per_gpu_eval_batch_size=8 \ + --per_gpu_train_batch_size=8 \ + --learning_rate 2e-5 \ + --num_train_epochs 3.0 \ + --save_steps 100000 \ + --output_dir $OUT_DIR +``` +Run the `prepare_model.sh` script + +Usage: +```shell +cd examples/onnxrt/language_translation/distilbert/ + +bash prepare_model.sh --input_dir=$OUT_DIR \ + --task_name=$TASK_NAME \ + --output_model=path/to/model # model path as *.onnx +``` + +### Quantization + +Quantize model with dynamic quantization: + +```bash +bash run_tuning.sh --input_model=path/to/model \ # model path as *.onnx + --output_model=path/to/model_tune \ + --dataset_location=path/to/glue/data +``` + +### Benchmark + +```bash +bash run_benchmark.sh --input_model=path/to/model \ # model path as *.onnx + --dataset_location=path/to/glue/data \ + --batch_size=batch_size \ + --mode=performance # or accuracy +``` diff --git a/examples/onnxrt/nlp/distilbert/quantization/ptq_dynamic/requirements.txt b/examples/onnxrt/nlp/distilbert/quantization/ptq_dynamic/requirements.txt new file mode 100644 index 00000000000..90affea18d9 --- /dev/null +++ b/examples/onnxrt/nlp/distilbert/quantization/ptq_dynamic/requirements.txt @@ -0,0 +1,8 @@ +torch +transformers +onnx +onnxruntime +coloredlogs +sympy +onnxruntime-extensions; python_version < '3.10' +tokenizers>=0.12.0 diff --git a/examples/onnxrt/nlp/distilbert/quantization/ptq_dynamic/run_benchmark.sh b/examples/onnxrt/nlp/distilbert/quantization/ptq_dynamic/run_benchmark.sh new file mode 100644 index 00000000000..f358d9a59d5 --- /dev/null +++ b/examples/onnxrt/nlp/distilbert/quantization/ptq_dynamic/run_benchmark.sh @@ -0,0 +1,62 @@ +#!/bin/bash +set -x + +function main { + + init_params "$@" + run_benchmark + +} + +# init params +function init_params { + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --mode=*) + mode=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + --batch_size=*) + batch_size=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_benchmark +function run_benchmark { + if [[ ${mode} == "accuracy" ]]; then + dynamic_length=False + elif [[ ${mode} == "performance" ]]; then + dynamic_length=True + else + echo "Error: No such mode: ${mode}" + exit 1 + fi + + model_name_or_path="distilbert-base-uncased" + task_name="mrpc" + model_type="distilbert" + + python main.py \ + --model_path ${input_model} \ + --model_name_or_path ${model_name_or_path} \ + --data_path ${dataset_location} \ + --task ${task_name} \ + --batch_size ${batch_size} \ + --model_type ${model_type} \ + --mode ${mode} \ + --dynamic_length ${dynamic_length} \ + --benchmark + +} + +main "$@" + diff --git a/examples/onnxrt/nlp/distilbert/quantization/ptq_dynamic/run_tuning.sh b/examples/onnxrt/nlp/distilbert/quantization/ptq_dynamic/run_tuning.sh new file mode 100644 index 00000000000..3da16aaf9bd --- /dev/null +++ b/examples/onnxrt/nlp/distilbert/quantization/ptq_dynamic/run_tuning.sh @@ -0,0 +1,49 @@ +#!/bin/bash +set -x + +function main { + init_params "$@" + run_tuning +} + +# init params +function init_params { + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --output_model=*) + output_model=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_tuning +function run_tuning { + model_name_or_path="distilbert-base-uncased" + batch_size=8 + task_name="mrpc" + model_type="distilbert" + + python main.py \ + --model_path ${input_model} \ + --output_model ${output_model} \ + --model_name_or_path ${model_name_or_path} \ + --data_path ${dataset_location} \ + --task ${task_name} \ + --batch_size ${batch_size} \ + --model_type ${model_type} \ + --tune +} + +main "$@" + + + diff --git a/examples/onnxrt/nlp/distilbert/quantization/ptq_static/README.md b/examples/onnxrt/nlp/distilbert/quantization/ptq_static/README.md new file mode 100644 index 00000000000..b1cb085f260 --- /dev/null +++ b/examples/onnxrt/nlp/distilbert/quantization/ptq_static/README.md @@ -0,0 +1,80 @@ +Step-by-Step +============ + +This example load a DistilBERT model and confirm its accuracy and speed based on [GLUE data](https://gluebenchmark.com/). + +# Prerequisite + +## 1. Environment + +```shell +pip install neural-compressor +pip install -r requirements.txt +``` +> Note: Validated ONNX Runtime [Version](/docs/source/installation_guide.md#validated-software-environment). + +## 2. Prepare Dataset + +download the GLUE data with `prepare_data.sh` script. +```shell +export GLUE_DIR=path/to/glue_data +export TASK_NAME=MRPC + +bash prepare_data.sh --data_dir=$GLUE_DIR --task_name=$TASK_NAME +``` + +## 3. Prepare Model + +Please refer to [Bert-GLUE_OnnxRuntime_quantization guide](https://github.com/microsoft/onnxruntime/blob/master/onnxruntime/python/tools/quantization/notebooks/Bert-GLUE_OnnxRuntime_quantization.ipynb) for detailed model export. The following is a simple example. + +Use [Huggingface Transformers](https://github.com/huggingface/transformers/tree/v2.2.1) to fine-tune the model based on the [MRPC](https://github.com/huggingface/transformers/tree/master/examples/text-classification#mrpc) example with command like: +```shell +export OUT_DIR=/path/to/out_dir/ +python ./run_glue.py \ + --model_type distilbert \ + --model_name_or_path distilbert-base-uncased \ + --task_name $TASK_NAME \ + --do_train \ + --do_eval \ + --do_lower_case \ + --data_dir $GLUE_DIR/$TASK_NAME \ + --max_seq_length 128 \ + --per_gpu_eval_batch_size=8 \ + --per_gpu_train_batch_size=8 \ + --learning_rate 2e-5 \ + --num_train_epochs 3.0 \ + --save_steps 100000 \ + --output_dir $OUT_DIR +``` +Run the `prepare_model.sh` script + +Usage: +```shell +cd examples/onnxrt/language_translation/distilbert/ + +bash prepare_model.sh --input_dir=$OUT_DIR \ + --task_name=$TASK_NAME \ + --output_model=path/to/model # model path as *.onnx +``` + +# Run + +## 1. Quantization + +Static quantization with QDQ format: + +```bash +bash run_tuning.sh --input_model=path/to/model \ # model path as *.onnx + --output_model=path/to/model_tune \ # model path as *.onnx + --dataset_location=path/to/glue_data \ + --quant_format="QDQ" +``` + +## 2. Benchmark + +```bash +bash run_benchmark.sh --input_model=path/to/model \ # model path as *.onnx + --dataset_location=path/to/glue_data \ + --batch_size=batch_size \ + --mode=performance # or accuracy +``` diff --git a/examples/onnxrt/nlp/distilbert/quantization/ptq_static/export.py b/examples/onnxrt/nlp/distilbert/quantization/ptq_static/export.py new file mode 100644 index 00000000000..a3550163af9 --- /dev/null +++ b/examples/onnxrt/nlp/distilbert/quantization/ptq_static/export.py @@ -0,0 +1,60 @@ +import argparse + +import torch +from transformers import DistilBertForSequenceClassification + +def export_onnx_model(args, model, onnx_model_path): + with torch.no_grad(): + inputs = {'input_ids': torch.ones(1,args.max_len, dtype=torch.int64), + 'attention_mask': torch.ones(1,args.max_len, dtype=torch.int64)} + outputs = model(**inputs) + + symbolic_names = {0: 'batch_size', 1: 'max_seq_len'} + torch.onnx.export(model, # model being run + (inputs['input_ids'], + inputs['attention_mask']), # model input (or a tuple for + # multiple inputs) + onnx_model_path, # where to save the model (can be a file + # or file-like object) + opset_version=11, # the ONNX version to export the model + do_constant_folding=True, # whether to execute constant folding + input_names=['input_ids', # the model's input names + 'input_mask'], + output_names=['output'], # the model's output names + dynamic_axes={'input_ids': symbolic_names, # variable length axes + 'input_mask' : symbolic_names}) + print("ONNX Model exported to {0}".format(onnx_model_path)) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description='Export bert onnx model', + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument( + '--input_dir', + type=str, + help='input_dir of bert model, must contain config.json') + parser.add_argument( + '--task_name', + type=str, + choices=["MRPC", "MNLI"], + help='tasks names of bert model') + parser.add_argument( + '--max_len', + type=int, + default=128, + help='Maximum length of the sentence pairs') + parser.add_argument( + '--do_lower_case', + type=bool, + default=True, + help='whether lower the tokenizer') + parser.add_argument( + '--output_model', + type=str, + default='bert.onnx', + help='path to exported model file') + args = parser.parse_args() + + model = DistilBertForSequenceClassification.from_pretrained(args.input_dir) + export_onnx_model(args, model, args.output_model) diff --git a/examples/onnxrt/nlp/distilbert/quantization/ptq_static/main.py b/examples/onnxrt/nlp/distilbert/quantization/ptq_static/main.py new file mode 100644 index 00000000000..05736a9141c --- /dev/null +++ b/examples/onnxrt/nlp/distilbert/quantization/ptq_static/main.py @@ -0,0 +1,414 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# pylint:disable=redefined-outer-name,logging-format-interpolation + +import logging +import argparse +import os +import onnx +import onnxruntime +import transformers +import torch +import numpy as np +from dataclasses import dataclass +from typing import List, Optional, Union +from neural_compressor.data.dataloaders.onnxrt_dataloader import DefaultDataLoader + +logger = logging.getLogger(__name__) +logging.basicConfig(format = "%(asctime)s - %(levelname)s - %(name)s - %(message)s", + datefmt = "%m/%d/%Y %H:%M:%S", + level = logging.WARN) + +class ONNXRTBertDataset: + """Dataset used for model Bert. + Args: data_dir (str): The input data dir. + model_name_or_path (str): Path to pre-trained student model or shortcut name, + selected in the list: + max_seq_length (int, default=128): The maximum length after tokenization. + Sequences longer than this will be truncated, + sequences shorter will be padded. + do_lower_case (bool, default=True): Whether to lowercase the input when tokenizing. + task (str, default=mrpc): The name of the task to fine-tune. + Choices include mrpc, qqp, qnli, rte, + sts-b, cola, mnli, wnli. + model_type (str, default="bert"): model type, support "distilbert", "bert", + "mobilebert", "roberta". + dynamic_length (bool, default=False): Whether to use fixed sequence length. + evaluate (bool, default=True): Whether do evaluation or training. + transform (transform object, default=None): transform to process input data. + filter (Filter objects, default=None): filter out examples according + to specific conditions. + """ + def __init__(self, model, data_dir, model_name_or_path, max_seq_length=128,\ + do_lower_case=True, task="mrpc", model_type="bert", dynamic_length=False,\ + evaluate=True, transform=None, filter=None): + self.inputs = [inp.name for inp in onnx.load(model).graph.input] + task = task.lower() + model_type = model_type.lower() + assert task in ["mrpc", "qqp", "qnli", "rte", "sts-b", "cola", \ + "mnli", "wnli", "sst-2"], "Unsupported task type" + assert model_type in ["distilbert", "bert", "mobilebert", "roberta"], "Unsupported \ + model type" + self.dynamic_length = dynamic_length + self.model_type = model_type + self.max_seq_length = max_seq_length + tokenizer = transformers.AutoTokenizer.from_pretrained(model_name_or_path, + do_lower_case=do_lower_case) + self.dataset = load_and_cache_examples(data_dir, model_name_or_path, \ + max_seq_length, task, model_type, tokenizer, evaluate) + + def __len__(self): + return len(self.dataset) + + def __getitem__(self, index): + batch = tuple(t.detach().cpu().numpy() if not isinstance(t, np.ndarray) else t for t in self.dataset[index]) + return batch[:len(self.inputs)], batch[-1] + +def load_and_cache_examples(data_dir, model_name_or_path, max_seq_length, task, \ + model_type, tokenizer, evaluate): + from torch.utils.data import TensorDataset + + processor = transformers.glue_processors[task]() + output_mode = transformers.glue_output_modes[task] + # Load data features from cache or dataset file + if not os.path.exists("./dataset_cached"): + os.makedirs("./dataset_cached") + cached_features_file = os.path.join("./dataset_cached", "cached_{}_{}_{}_{}".format( + "dev" if evaluate else "train", + list(filter(None, model_name_or_path.split("/"))).pop(), + str(max_seq_length), + str(task))) + if os.path.exists(cached_features_file): + logger.info("Load features from cached file {}.".format(cached_features_file)) + features = torch.load(cached_features_file) + else: + logger.info("Create features from dataset file at {}.".format(data_dir)) + label_list = processor.get_labels() + examples = processor.get_dev_examples(data_dir) if evaluate else \ + processor.get_train_examples(data_dir) + features = convert_examples_to_features(examples, + tokenizer, + task=task, + label_list=label_list, + max_length=max_seq_length, + output_mode=output_mode, + ) + logger.info("Save features into cached file {}.".format(cached_features_file)) + torch.save(features, cached_features_file) + # Convert to Tensors and build dataset + all_input_ids = torch.tensor([f.input_ids for f in features], dtype=torch.long) + all_attention_mask = torch.tensor([f.attention_mask for f in features], dtype=torch.long) + all_token_type_ids = torch.tensor([f.token_type_ids for f in features], dtype=torch.long) + all_seq_lengths = torch.tensor([f.seq_length for f in features], dtype=torch.long) + if output_mode == "classification": + all_labels = torch.tensor([f.label for f in features], dtype=torch.long) + elif output_mode == "regression": + all_labels = torch.tensor([f.label for f in features], dtype=torch.float) + dataset = TensorDataset(all_input_ids, all_attention_mask, all_token_type_ids, \ + all_seq_lengths, all_labels) + return dataset + +def convert_examples_to_features( + examples, + tokenizer, + max_length=128, + task=None, + label_list=None, + output_mode="classification", + pad_token=0, + pad_token_segment_id=0, + mask_padding_with_zero=True, +): + processor = transformers.glue_processors[task]() + if label_list is None: + label_list = processor.get_labels() + logger.info("Use label list {} for task {}.".format(label_list, task)) + label_map = {label: i for i, label in enumerate(label_list)} + features = [] + for (ex_index, example) in enumerate(examples): + inputs = tokenizer.encode_plus( + example.text_a, + example.text_b, + add_special_tokens=True, + max_length=max_length, + return_token_type_ids=True, + truncation=True, + ) + input_ids, token_type_ids = inputs["input_ids"], inputs["token_type_ids"] + # The mask has 1 for real tokens and 0 for padding tokens. Only real + # tokens are attended to. + attention_mask = [1 if mask_padding_with_zero else 0] * len(input_ids) + + # Zero-pad up to the sequence length. + seq_length = len(input_ids) + padding_length = max_length - len(input_ids) + + input_ids = input_ids + ([pad_token] * padding_length) + attention_mask = attention_mask + \ + ([0 if mask_padding_with_zero else 1] * padding_length) + token_type_ids = token_type_ids + ([pad_token_segment_id] * padding_length) + + assert len(input_ids) == max_length, \ + "Error with input_ids length {} vs {}".format( + len(input_ids), max_length) + assert len(attention_mask) == max_length, \ + "Error with attention_mask length {} vs {}".format( + len(attention_mask), max_length + ) + assert len(token_type_ids) == max_length, \ + "Error with token_type_ids length {} vs {}".format( + len(token_type_ids), max_length + ) + if output_mode == "classification": + label = label_map[example.label] + elif output_mode == "regression": + label = float(example.label) + else: + raise KeyError(output_mode) + + feats = InputFeatures( + input_ids=input_ids, + attention_mask=attention_mask, + token_type_ids=token_type_ids, + label=label, + seq_length=seq_length, + ) + features.append(feats) + return features + +@dataclass(frozen=True) +class InputFeatures: + """ + A single set of features of data. + Property names are the same names as the corresponding inputs to a model. + Args: + input_ids: Indices of input sequence tokens in the vocabulary. + attention_mask: Mask to avoid performing attention on padding token indices. + Mask values selected in ``[0, 1]``: Usually ``1`` for tokens that are NOT MASKED, + ``0`` for MASKED (padded) tokens. + token_type_ids: (Optional) Segment token indices to indicate first and second + portions of the inputs. Only some models use them. + label: (Optional) Label corresponding to the input. Int for classification problems, + float for regression problems. + seq_length: (Optional) The length of input sequence before padding. + """ + + input_ids: List[int] + attention_mask: Optional[List[int]] = None + token_type_ids: Optional[List[int]] = None + label: Optional[Union[int, float]] = None + seq_length: Optional[List[int]] = None + +class ONNXRTGLUE: + """Computes GLUE score. + + Args: + task (str, default=mrpc): The name of the task. + Choices include mrpc, qqp, qnli, rte, + sts-b, cola, mnli, wnli. + + """ + def __init__(self, task="mrpc"): + assert task in ["mrpc", "qqp", "qnli", "rte", "sts-b", "cola", \ + "mnli", "wnli", "sst-2"], "Unsupported task type" + self.pred_list = None + self.label_list = None + self.task = task + self.return_key = { + "cola": "mcc", + "mrpc": "acc", + "sts-b": "corr", + "qqp": "acc", + "mnli": "mnli/acc", + "qnli": "acc", + "rte": "acc", + "wnli": "acc", + "sst-2": "acc" + } + + def update(self, preds, labels): + """add preds and labels to storage""" + if isinstance(preds, list) and len(preds) == 1: + preds = preds[0] + if isinstance(labels, list) and len(labels) == 1: + labels = labels[0] + if self.pred_list is None: + self.pred_list = preds + self.label_list = labels + else: + self.pred_list = np.append(self.pred_list, preds, axis=0) + self.label_list = np.append(self.label_list, labels, axis=0) + + def reset(self): + """clear preds and labels storage""" + self.pred_list = None + self.label_list = None + + def result(self): + """calculate metric""" + output_mode = transformers.glue_output_modes[self.task] + + if output_mode == "classification": + processed_preds = np.argmax(self.pred_list, axis=1) + elif output_mode == "regression": + processed_preds = np.squeeze(self.pred_list) + result = transformers.glue_compute_metrics(\ + self.task, processed_preds, self.label_list) + return result[self.return_key[self.task]] + +if __name__ == "__main__": + logger.info("Evaluating ONNXRuntime full precision accuracy and performance:") + parser = argparse.ArgumentParser( + description="BERT fine-tune examples for classification/regression tasks.", + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument( + "--model_path", + type=str, + help="Pre-trained model on onnx file" + ) + parser.add_argument( + "--benchmark", + action="store_true", \ + default=False + ) + parser.add_argument( + "--tune", + action="store_true", \ + default=False, + help="whether quantize the model" + ) + parser.add_argument( + "--output_model", + type=str, + help="output model path" + ) + parser.add_argument( + "--mode", + type=str, + help="benchmark mode of performance or accuracy" + ) + parser.add_argument( + "--model_name_or_path", + type=str, + help="pretrained model name or path" + ) + parser.add_argument( + "--data_path", + type=str, + help="input data path" + ) + parser.add_argument( + "--batch_size", + default=8, + type=int, + ) + parser.add_argument( + "--task", + type=str, + default="mrpc", + choices=["mrpc", "qqp", "qnli", "rte", "sts-b", "cola", \ + "mnli", "wnli", "sst-2"], + help="GLUE task name" + ) + parser.add_argument( + "--quant_format", + type=str, + default="QOperator", + choices=["QDQ", "QOperator"], + help="quantization format" + ) + parser.add_argument( + "--dynamic_length", + type=bool, + default=False, + help="dynamic length" + ) + parser.add_argument( + "--model_type", + type=str, + default="bert", + choices=["distilbert", "bert", "mobilebert", "roberta"], + help="model type" + ) + args = parser.parse_args() + + dataset = ONNXRTBertDataset(args.model_path, + data_dir=args.data_path, + model_name_or_path=args.model_name_or_path, + task=args.task, + model_type=args.model_type, + dynamic_length=args.dynamic_length) + dataloader = DefaultDataLoader(dataset, args.batch_size) + metric = ONNXRTGLUE(args.task) + + def eval_func(model): + metric.reset() + session = onnxruntime.InferenceSession(model.SerializeToString(), + providers=onnxruntime.get_available_providers()) + ort_inputs = {} + len_inputs = len(session.get_inputs()) + inputs_names = [session.get_inputs()[i].name for i in range(len_inputs)] + for idx, (inputs, labels) in enumerate(dataloader): + if not isinstance(labels, list): + labels = [labels] + inputs = inputs[:len_inputs] + for i in range(len_inputs): + ort_inputs.update({inputs_names[i]: inputs[i]}) + predictions = session.run(None, ort_inputs) + metric.update(predictions[0], labels) + return metric.result() + + if args.benchmark: + model = onnx.load(args.model_path) + if args.mode == "performance": + from neural_compressor.benchmark import fit + from neural_compressor.config import BenchmarkConfig + conf = BenchmarkConfig(iteration=100, + cores_per_instance=4, + num_of_instance=1) + fit(model, conf, b_dataloader=dataloader) + elif args.mode == "accuracy": + acc_result = eval_func(model) + print("Batch size = %d" % args.batch_size) + print("Accuracy: %.5f" % acc_result) + + if args.tune: + if onnxruntime.__version__ <= '1.13.1': + from onnxruntime.transformers import optimizer + from onnxruntime.transformers.fusion_options import FusionOptions + opt_options = FusionOptions("bert") + opt_options.enable_embed_layer_norm = False + + model_optimizer = optimizer.optimize_model( + args.model_path, + "bert", + num_heads=12, + hidden_size=768, + optimization_options=opt_options) + model = model_optimizer.model + else: + model = onnx.load(args.model_path) + + from neural_compressor import quantization, PostTrainingQuantConfig + config = PostTrainingQuantConfig(approach="static", + quant_format=args.quant_format, + recipes={"optypes_to_exclude_output_quant": ["MatMul", "Gemm", "Attention", "FusedGemm"]}) + q_model = quantization.fit(model, + config, + eval_func=eval_func, + calib_dataloader=dataloader) + q_model.save(args.output_model) diff --git a/examples/onnxrt/nlp/distilbert/quantization/ptq_static/prepare_data.sh b/examples/onnxrt/nlp/distilbert/quantization/ptq_static/prepare_data.sh new file mode 100644 index 00000000000..8e434a5c521 --- /dev/null +++ b/examples/onnxrt/nlp/distilbert/quantization/ptq_static/prepare_data.sh @@ -0,0 +1,34 @@ +#!/bin/bash +set -x + +function main { + init_params "$@" + download_data + +} + +# init params +function init_params { + + for var in "$@" + do + case $var in + --data_dir=*) + data_dir=$(echo $var |cut -f2 -d=) + ;; + --task_name=*) + task_name=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_tuning +function download_data { + wget https://raw.githubusercontent.com/huggingface/transformers/f98ef14d161d7bcdc9808b5ec399981481411cc1/utils/download_glue_data.py + python download_glue_data.py --data_dir=${data_dir} --tasks=${task_name} +} + +main "$@" + diff --git a/examples/onnxrt/nlp/distilbert/quantization/ptq_static/prepare_model.sh b/examples/onnxrt/nlp/distilbert/quantization/ptq_static/prepare_model.sh new file mode 100644 index 00000000000..8d6eb064930 --- /dev/null +++ b/examples/onnxrt/nlp/distilbert/quantization/ptq_static/prepare_model.sh @@ -0,0 +1,39 @@ +#!/bin/bash +set -x + +function main { + init_params "$@" + export_model + +} + +# init params +function init_params { + + for var in "$@" + do + case $var in + --input_dir=*) + input_dir=$(echo $var |cut -f2 -d=) + ;; + --task_name=*) + task_name=$(echo $var |cut -f2 -d=) + ;; + --max_len=*) + max_len=$(echo $var |cut -f2 -d=) + ;; + --output_model=*) + output_model=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_tuning +function export_model { + python export.py --input_dir ${input_dir} --task_name ${task_name} --output_model ${output_model} +} + +main "$@" + diff --git a/examples/onnxrt/nlp/distilbert/quantization/ptq_static/readme.md b/examples/onnxrt/nlp/distilbert/quantization/ptq_static/readme.md new file mode 100644 index 00000000000..254b28b2962 --- /dev/null +++ b/examples/onnxrt/nlp/distilbert/quantization/ptq_static/readme.md @@ -0,0 +1,70 @@ +# Evaluate performance of ONNX Runtime(DistilBERT) +>ONNX runtime quantization is under active development. please use 1.6.0+ to get more quantization support. + +This example load a language translation model and confirm its accuracy and speed based on [GLUE data](https://gluebenchmark.com/). + +### Environment +onnx: 1.7.0 +onnxruntime: 1.6.0+ + +### Prepare dataset +download the GLUE data with `prepare_data.sh` script. +```shell +export GLUE_DIR=/path/to/glue_data +export TASK_NAME=MRPC + +bash prepare_data.sh --data_dir=$GLUE_DIR --task_name=$TASK_NAME +``` + +### Prepare model +Please refer to [Bert-GLUE_OnnxRuntime_quantization guide](https://github.com/microsoft/onnxruntime/blob/master/onnxruntime/python/tools/quantization/notebooks/Bert-GLUE_OnnxRuntime_quantization.ipynb) for detailed model export. The following is a simple example. + +Use [Huggingface Transformers](https://github.com/huggingface/transformers/tree/v2.2.1) to fine-tune the model based on the [MRPC](https://github.com/huggingface/transformers/tree/master/examples/text-classification#mrpc) example with command like: +```shell +export OUT_DIR=/path/to/out_dir/ +python ./run_glue.py \ + --model_type distilbert \ + --model_name_or_path distilbert-base-uncased \ + --task_name $TASK_NAME \ + --do_train \ + --do_eval \ + --do_lower_case \ + --data_dir $GLUE_DIR/$TASK_NAME \ + --max_seq_length 128 \ + --per_gpu_eval_batch_size=8 \ + --per_gpu_train_batch_size=8 \ + --learning_rate 2e-5 \ + --num_train_epochs 3.0 \ + --save_steps 100000 \ + --output_dir $OUT_DIR +``` +Run the `prepare_model.sh` script + +Usage: +```shell +cd examples/onnxrt/language_translation/distilbert/ + +bash prepare_model.sh --input_dir=$OUT_DIR \ + --task_name=$TASK_NAME \ + --output_model=path/to/model # model path as *.onnx +``` + +### Quantization + +QDQ mode: + +```bash +bash run_tuning.sh --input_model=path/to/model \ # model path as *.onnx + --output_model=path/to/model_tune \ + --dataset_location=path/to/glue/data \ + --quant_format="QDQ" +``` + +### Benchmark + +```bash +bash run_benchmark.sh --input_model=path/to/model \ # model path as *.onnx + --dataset_location=path/to/glue/data \ + --batch_size=batch_size \ + --mode=performance # or accuracy +``` diff --git a/examples/onnxrt/nlp/distilbert/quantization/ptq_static/requirements.txt b/examples/onnxrt/nlp/distilbert/quantization/ptq_static/requirements.txt new file mode 100644 index 00000000000..90affea18d9 --- /dev/null +++ b/examples/onnxrt/nlp/distilbert/quantization/ptq_static/requirements.txt @@ -0,0 +1,8 @@ +torch +transformers +onnx +onnxruntime +coloredlogs +sympy +onnxruntime-extensions; python_version < '3.10' +tokenizers>=0.12.0 diff --git a/examples/onnxrt/nlp/distilbert/quantization/ptq_static/run_benchmark.sh b/examples/onnxrt/nlp/distilbert/quantization/ptq_static/run_benchmark.sh new file mode 100644 index 00000000000..f358d9a59d5 --- /dev/null +++ b/examples/onnxrt/nlp/distilbert/quantization/ptq_static/run_benchmark.sh @@ -0,0 +1,62 @@ +#!/bin/bash +set -x + +function main { + + init_params "$@" + run_benchmark + +} + +# init params +function init_params { + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --mode=*) + mode=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + --batch_size=*) + batch_size=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_benchmark +function run_benchmark { + if [[ ${mode} == "accuracy" ]]; then + dynamic_length=False + elif [[ ${mode} == "performance" ]]; then + dynamic_length=True + else + echo "Error: No such mode: ${mode}" + exit 1 + fi + + model_name_or_path="distilbert-base-uncased" + task_name="mrpc" + model_type="distilbert" + + python main.py \ + --model_path ${input_model} \ + --model_name_or_path ${model_name_or_path} \ + --data_path ${dataset_location} \ + --task ${task_name} \ + --batch_size ${batch_size} \ + --model_type ${model_type} \ + --mode ${mode} \ + --dynamic_length ${dynamic_length} \ + --benchmark + +} + +main "$@" + diff --git a/examples/onnxrt/nlp/distilbert/quantization/ptq_static/run_tuning.sh b/examples/onnxrt/nlp/distilbert/quantization/ptq_static/run_tuning.sh new file mode 100644 index 00000000000..4cea1ec6647 --- /dev/null +++ b/examples/onnxrt/nlp/distilbert/quantization/ptq_static/run_tuning.sh @@ -0,0 +1,53 @@ +#!/bin/bash +set -x + +function main { + init_params "$@" + run_tuning +} + +# init params +function init_params { + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --output_model=*) + output_model=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + --quant_format=*) + quant_format=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_tuning +function run_tuning { + model_name_or_path="distilbert-base-uncased" + batch_size=8 + task_name="mrpc" + model_type="distilbert" + + python main.py \ + --model_path ${input_model} \ + --output_model ${output_model} \ + --model_name_or_path ${model_name_or_path} \ + --data_path ${dataset_location} \ + --task ${task_name} \ + --batch_size ${batch_size} \ + --quant_format ${quant_format} \ + --model_type ${model_type} \ + --tune +} + +main "$@" + + + diff --git a/examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic/README.md b/examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic/README.md index d0b66ed5f3c..3a87b222aff 100644 --- a/examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic/README.md +++ b/examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic/README.md @@ -15,10 +15,13 @@ pip install -r requirements.txt ## 2. Prepare Model Supported model identifier from [huggingface.co](https://huggingface.co/): -| Model Identifier | -|:-----------------------------------------------:| -| mrm8488/spanbert-finetuned-squadv1 | -| salti/bert-base-multilingual-cased-finetuned-squad | +| Model Identifier | +|:------------------------------------------------:| +| mrm8488/spanbert-finetuned-squadv1 | +|salti/bert-base-multilingual-cased-finetuned-squad| +| distilbert-base-uncased-distilled-squad | +|bert-large-uncased-whole-word-masking-finetuned-squad| +| deepset/roberta-large-squad2 | ```bash @@ -32,7 +35,7 @@ Download SQuAD dataset from [SQuAD dataset link](https://rajpurkar.github.io/SQu ## 1. Quantization -Quantize model with dynamic quantization: +Dynamic quantization: ```bash bash run_tuning.sh --input_model=/path/to/model \ # model path as *.onnx diff --git a/examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic/export.py b/examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic/export.py index a708a045c30..7e6ef28bfba 100644 --- a/examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic/export.py +++ b/examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic/export.py @@ -6,24 +6,41 @@ def export_onnx_model(args, model): with torch.no_grad(): symbolic_names = {0: 'batch_size', 1: 'max_seq_len'} - inputs = {'input_ids': torch.ones(1, args.max_len, dtype=torch.int64), - 'attention_mask': torch.ones(1, args.max_len, dtype=torch.int64), - 'token_type_ids': torch.ones(1, args.max_len, dtype=torch.int64)} - torch.onnx.export(model, # model being run - (inputs['input_ids'], # model input (or a tuple for multiple inputs) - inputs['attention_mask'], - inputs['token_type_ids']), - args.output_model, # where to save the model (can be a file or file-like object) - opset_version=11, # the ONNX version to export the model - do_constant_folding=True, # whether to execute constant folding - input_names=['input_ids', # the model's input names - 'attention_mask', - 'token_type_ids'], - output_names=['start_logits', - 'end_logits'], - dynamic_axes={'input_ids': symbolic_names, # variable length axes - 'attention_mask' : symbolic_names, - 'token_type_ids' : symbolic_names}) + if args.model_name_or_path in ['distilbert-base-uncased-distilled-squad', + 'deepset/roberta-large-squad2']: + inputs = {'input_ids': torch.ones(1, args.max_len, dtype=torch.int64), + 'attention_mask': torch.ones(1, args.max_len, dtype=torch.int64)} + torch.onnx.export(model, # model being run + (inputs['input_ids'], # model input (or a tuple for multiple inputs) + inputs['attention_mask']), + args.output_model, # where to save the model (can be a file or file-like object) + opset_version=14, # the ONNX version to export the model + do_constant_folding=True, # whether to execute constant folding + input_names=['input_ids', # the model's input names + 'attention_mask'], + output_names=['start_logits', + 'end_logits'], + dynamic_axes={'input_ids': symbolic_names, # variable length axes + 'attention_mask' : symbolic_names}) + else: + inputs = {'input_ids': torch.ones(1, args.max_len, dtype=torch.int64), + 'attention_mask': torch.ones(1, args.max_len, dtype=torch.int64), + 'token_type_ids': torch.ones(1, args.max_len, dtype=torch.int64)} + torch.onnx.export(model, # model being run + (inputs['input_ids'], # model input (or a tuple for multiple inputs) + inputs['attention_mask'], + inputs['token_type_ids']), + args.output_model, # where to save the model (can be a file or file-like object) + opset_version=14, # the ONNX version to export the model + do_constant_folding=True, # whether to execute constant folding + input_names=['input_ids', # the model's input names + 'attention_mask', + 'token_type_ids'], + output_names=['start_logits', + 'end_logits'], + dynamic_axes={'input_ids': symbolic_names, # variable length axes + 'attention_mask' : symbolic_names, + 'token_type_ids' : symbolic_names}) print("ONNX Model exported to {0}".format(args.output_model)) @@ -35,7 +52,10 @@ def export_onnx_model(args, model): '--model_name_or_path', type=str, choices=['mrm8488/spanbert-finetuned-squadv1', - 'salti/bert-base-multilingual-cased-finetuned-squad'], + 'salti/bert-base-multilingual-cased-finetuned-squad', + 'distilbert-base-uncased-distilled-squad', + 'bert-large-uncased-whole-word-masking-finetuned-squad', + 'deepset/roberta-large-squad2'], help='pretrained model name or path ') parser.add_argument( '--max_len', diff --git a/examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic/main.py b/examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic/main.py index e68963fc93c..b1a9dd5e3e2 100644 --- a/examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic/main.py +++ b/examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic/main.py @@ -34,10 +34,9 @@ from transformers import AutoTokenizer, EvalPrediction, HfArgumentParser, PreTrainedTokenizer, TrainingArguments from transformers.utils import check_min_version from transformers.utils.versions import require_version -from onnxruntime import InferenceSession +import onnxruntime from evaluate import load -# from optimum.onnxruntime.model import ORTModel from utils_model import ORTModel from utils_qa import postprocess_qa_predictions @@ -53,8 +52,6 @@ logger = logging.getLogger(__name__) -FP32_CONFIG = {'activation': {'dtype': ['fp32']}, 'weight': {'dtype': ['fp32']}} - @dataclass class ModelArguments: """ @@ -236,7 +233,8 @@ def __init__( ): self.dataset = dataset self.label_names = ["labels"] if label_names is None else label_names - self.session = InferenceSession(model.SerializeToString()) + self.session = onnxruntime.InferenceSession(model.SerializeToString(), + providers=onnxruntime.get_available_providers()) self.onnx_input_names = {input_key.name: idx for idx, input_key in enumerate(self.session.get_inputs())} self._process_dataset() @@ -446,7 +444,6 @@ def eval_func(model, *args): ort_model = ORTModel( model, - execution_provider='CPUExecutionProvider', compute_metrics=compute_metrics, label_names=["start_positions", "end_positions"], ) @@ -456,26 +453,28 @@ def eval_func(model, *args): return metrics['f1'] if model_args.tune: - from onnxruntime.transformers import optimizer - from onnxruntime.transformers.onnx_model_bert import BertOptimizationOptions - opt_options = BertOptimizationOptions('bert') - opt_options.enable_embed_layer_norm = False - - model_optimizer = optimizer.optimize_model( - model_args.input_model, - 'bert', - num_heads=model_args.num_heads, - hidden_size=model_args.hidden_size, - optimization_options=opt_options) - model = model_optimizer.model + if onnxruntime.__version__ <= '1.13.1': + from onnxruntime.transformers import optimizer + from onnxruntime.transformers.fusion_options import FusionOptions + opt_options = FusionOptions('bert') + opt_options.enable_embed_layer_norm = False + + model_optimizer = optimizer.optimize_model( + model_args.input_model, + 'bert', + num_heads=model_args.num_heads, + hidden_size=model_args.hidden_size, + optimization_options=opt_options) + model = model_optimizer.model + else: + model = onnx.load(model_args.input_model) from neural_compressor import quantization, PostTrainingQuantConfig calib_dataset = SQuADDataset(eval_dataset, model, label_names=["start_positions", "end_positions"]) config = PostTrainingQuantConfig(approach='dynamic') q_model = quantization.fit(model, config, - eval_func=eval_func - ) + eval_func=eval_func) q_model.save(model_args.save_path) if model_args.benchmark: diff --git a/examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic/requirements.txt b/examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic/requirements.txt index 177f24bcf91..eea2a741d3c 100644 --- a/examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic/requirements.txt +++ b/examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic/requirements.txt @@ -1,9 +1,10 @@ onnx onnxruntime onnxruntime-extensions; python_version < '3.10' -transformers==4.21.0 +transformers tensorboard numpy==1.23.5 datasets >= 1.8.0 torch >= 1.9.0 -evaluate \ No newline at end of file +evaluate +tqdm \ No newline at end of file diff --git a/examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic/run_benchmark.sh b/examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic/run_benchmark.sh index c4dd80b3629..087eaa3a022 100644 --- a/examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic/run_benchmark.sh +++ b/examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic/run_benchmark.sh @@ -32,8 +32,15 @@ function run_benchmark { if [[ "${input_model}" =~ "spanbert" ]]; then model_name_or_path="mrm8488/spanbert-finetuned-squadv1" - elif [[ "${input_model}" =~ "bert-base" ]]; then + elif [[ "${input_model}" =~ "bert-base-multilingual" ]]; then model_name_or_path="salti/bert-base-multilingual-cased-finetuned-squad" + elif [[ "${input_model}" =~ "distilbert-base-uncased" ]]; then + model_name_or_path="distilbert-base-uncased-distilled-squad" + elif [[ "${input_model}" =~ "bert-large-uncased" ]]; then + model_name_or_path="bert-large-uncased-whole-word-masking-finetuned-squad" + elif [[ "${input_model}" =~ "roberta-large" ]]; then + model_name_or_path="deepset/roberta-large-squad2" + extra_cmd='--version_2_with_negative=True' fi python main.py \ @@ -44,7 +51,8 @@ function run_benchmark { --overwrite_output_dir \ --dataset_name=squad \ --batch_size=${batch_size} \ - --benchmark + --benchmark \ + ${extra_cmd} } diff --git a/examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic/run_tuning.sh b/examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic/run_tuning.sh index d427bcd1d04..d2d6bdfa6e7 100644 --- a/examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic/run_tuning.sh +++ b/examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic/run_tuning.sh @@ -29,10 +29,23 @@ function run_tuning { model_name_or_path="mrm8488/spanbert-finetuned-squadv1" num_heads=12 hidden_size=768 - elif [[ "${input_model}" =~ "bert-base" ]]; then + elif [[ "${input_model}" =~ "bert-base-multilingual" ]]; then model_name_or_path="salti/bert-base-multilingual-cased-finetuned-squad" num_heads=12 hidden_size=768 + elif [[ "${input_model}" =~ "distilbert-base-uncased" ]]; then + model_name_or_path="distilbert-base-uncased-distilled-squad" + num_heads=12 + hidden_size=768 + elif [[ "${input_model}" =~ "bert-large-uncased" ]]; then + model_name_or_path="bert-large-uncased-whole-word-masking-finetuned-squad" + num_heads=16 + hidden_size=1024 + elif [[ "${input_model}" =~ "roberta-large" ]]; then + model_name_or_path="deepset/roberta-large-squad2" + num_heads=16 + hidden_size=1024 + extra_cmd='--version_2_with_negative=True' fi python main.py \ @@ -44,7 +57,8 @@ function run_tuning { --model_name_or_path=${model_name_or_path} \ --num_heads ${num_heads} \ --hidden_size ${hidden_size} \ - --tune + --tune \ + ${extra_cmd} } main "$@" \ No newline at end of file diff --git a/examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic/utils_model.py b/examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic/utils_model.py index 2d1b0db6c21..e77d4558526 100644 --- a/examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic/utils_model.py +++ b/examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic/utils_model.py @@ -21,7 +21,7 @@ from transformers import EvalPrediction from transformers.trainer_pt_utils import nested_concat from transformers.trainer_utils import EvalLoopOutput -from onnxruntime import InferenceSession +import onnxruntime logger = logging.getLogger(__name__) @@ -29,7 +29,6 @@ class ORTModel: def __init__( self, model, - execution_provider: Optional[str] = "CPUExecutionProvider", compute_metrics: Optional[Callable[[EvalPrediction], Dict]] = None, label_names: Optional[List[str]] = None, ): @@ -37,8 +36,6 @@ def __init__( Args: model: onnx.onnx_ml_pb2.ModelProto. - execution_provider (:obj:`str`, `optional`): - ONNX Runtime execution provider to use. compute_metrics (`Callable[[EvalPrediction], Dict]`, `optional`): The function that will be used to compute metrics at evaluation. Must take an `EvalPrediction` and return a dictionary string to metric values. @@ -47,7 +44,8 @@ def __init__( """ self.compute_metrics = compute_metrics self.label_names = ["labels"] if label_names is None else label_names - self.session = InferenceSession(model.SerializeToString(), providers=[execution_provider]) + self.session = onnxruntime.InferenceSession(model.SerializeToString(), + providers=onnxruntime.get_available_providers()) self.onnx_input_names = {input_key.name: idx for idx, input_key in enumerate(self.session.get_inputs())} def evaluation_loop(self, dataset: Dataset): diff --git a/examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static/README.md b/examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static/README.md index 8b82776c42d..c89f3a40acc 100644 --- a/examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static/README.md +++ b/examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static/README.md @@ -17,8 +17,9 @@ Supported model identifier from [huggingface.co](https://huggingface.co/): | Model Identifier | |:-----------------------------------------------:| -| mrm8488/spanbert-finetuned-squadv1 | -| salti/bert-base-multilingual-cased-finetuned-squad | +| mrm8488/spanbert-finetuned-squadv1 | +|salti/bert-base-multilingual-cased-finetuned-squad | +|bert-large-uncased-whole-word-masking-finetuned-squad| ```bash @@ -32,11 +33,12 @@ Download SQuAD dataset from [SQuAD dataset link](https://rajpurkar.github.io/SQu ## 1. Quantization -Quantize model with static quantization: +Static quantization with QOperator format: ```bash bash run_tuning.sh --input_model=/path/to/model \ # model path as *.onnx - --output_model=/path/to/model_tune + --output_model=/path/to/model_tune \ + --quant_format="QOperator" ``` diff --git a/examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static/export.py b/examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static/export.py index a708a045c30..ea5b3180d0d 100644 --- a/examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static/export.py +++ b/examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static/export.py @@ -35,7 +35,8 @@ def export_onnx_model(args, model): '--model_name_or_path', type=str, choices=['mrm8488/spanbert-finetuned-squadv1', - 'salti/bert-base-multilingual-cased-finetuned-squad'], + 'salti/bert-base-multilingual-cased-finetuned-squad', + 'bert-large-uncased-whole-word-masking-finetuned-squad'], help='pretrained model name or path ') parser.add_argument( '--max_len', diff --git a/examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static/main.py b/examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static/main.py index 0359332db79..4239bc866d7 100644 --- a/examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static/main.py +++ b/examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static/main.py @@ -34,10 +34,9 @@ from transformers import AutoTokenizer, EvalPrediction, HfArgumentParser, PreTrainedTokenizer, TrainingArguments from transformers.utils import check_min_version from transformers.utils.versions import require_version -from onnxruntime import InferenceSession +import onnxruntime from evaluate import load -# from optimum.onnxruntime.model import ORTModel from utils_model import ORTModel from utils_qa import postprocess_qa_predictions @@ -53,8 +52,6 @@ logger = logging.getLogger(__name__) -FP32_CONFIG = {'activation': {'dtype': ['fp32']}, 'weight': {'dtype': ['fp32']}} - @dataclass class ModelArguments: """ @@ -115,6 +112,10 @@ class ModelArguments: default=1, metadata={"help": ("batch size for benchmark")}, ) + quant_format: str = field( + default="QOperator", + metadata={"help": ("quant format")}, + ) @dataclass @@ -236,7 +237,8 @@ def __init__( ): self.dataset = dataset self.label_names = ["labels"] if label_names is None else label_names - self.session = InferenceSession(model.SerializeToString()) + self.session = onnxruntime.InferenceSession(model.SerializeToString(), + providers=onnxruntime.get_available_providers()) self.onnx_input_names = {input_key.name: idx for idx, input_key in enumerate(self.session.get_inputs())} self._process_dataset() @@ -446,7 +448,6 @@ def eval_func(model, *args): ort_model = ORTModel( model, - execution_provider='CPUExecutionProvider', compute_metrics=compute_metrics, label_names=["start_positions", "end_positions"], ) @@ -456,20 +457,24 @@ def eval_func(model, *args): return metrics['f1'] if model_args.tune: - from onnxruntime.transformers import optimizer - from onnxruntime.transformers.onnx_model_bert import BertOptimizationOptions - opt_options = BertOptimizationOptions('bert') - opt_options.enable_embed_layer_norm = False - - model_optimizer = optimizer.optimize_model( - model_args.input_model, - 'bert', - num_heads=model_args.num_heads, - hidden_size=model_args.hidden_size, - optimization_options=opt_options) - model = model_optimizer.model + if onnxruntime.__version__ <= '1.13.1': + from onnxruntime.transformers import optimizer + from onnxruntime.transformers.fusion_options import FusionOptions + opt_options = FusionOptions('bert') + opt_options.enable_embed_layer_norm = False + + model_optimizer = optimizer.optimize_model( + model_args.input_model, + 'bert', + num_heads=model_args.num_heads, + hidden_size=model_args.hidden_size, + optimization_options=opt_options) + model = model_optimizer.model + else: + model = onnx.load(model_args.input_model) from neural_compressor import quantization, PostTrainingQuantConfig + from neural_compressor.utils.constant import FP32 calib_dataset = SQuADDataset(eval_dataset, model, label_names=["start_positions", "end_positions"]) fp32_op_names = None if model_args.model_name_or_path == 'mrm8488/spanbert-finetuned-squadv1': @@ -477,7 +482,9 @@ def eval_func(model, *args): elif model_args.model_name_or_path == 'salti/bert-base-multilingual-cased-finetuned-squad': fp32_op_names = ['MatMul_660', 'MatMul_566', 'Unsqueeze_91'] config = PostTrainingQuantConfig(approach='static', - op_name_dict={op_name:FP32_CONFIG for op_name in fp32_op_names if fp32_op_names}) + quant_format=model_args.quant_format, + op_name_dict={op_name:FP32 for op_name in fp32_op_names} \ + if fp32_op_names is not None else None) q_model = quantization.fit(model, config, eval_func=eval_func, diff --git a/examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static/requirements.txt b/examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static/requirements.txt index 177f24bcf91..eea2a741d3c 100644 --- a/examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static/requirements.txt +++ b/examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static/requirements.txt @@ -1,9 +1,10 @@ onnx onnxruntime onnxruntime-extensions; python_version < '3.10' -transformers==4.21.0 +transformers tensorboard numpy==1.23.5 datasets >= 1.8.0 torch >= 1.9.0 -evaluate \ No newline at end of file +evaluate +tqdm \ No newline at end of file diff --git a/examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static/run_benchmark.sh b/examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static/run_benchmark.sh index c4dd80b3629..a4f21521e29 100644 --- a/examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static/run_benchmark.sh +++ b/examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static/run_benchmark.sh @@ -32,8 +32,10 @@ function run_benchmark { if [[ "${input_model}" =~ "spanbert" ]]; then model_name_or_path="mrm8488/spanbert-finetuned-squadv1" - elif [[ "${input_model}" =~ "bert-base" ]]; then + elif [[ "${input_model}" =~ "bert-base-multilingual" ]]; then model_name_or_path="salti/bert-base-multilingual-cased-finetuned-squad" + elif [[ "${input_model}" =~ "bert-large-uncased" ]]; then + model_name_or_path="bert-large-uncased-whole-word-masking-finetuned-squad" fi python main.py \ diff --git a/examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static/run_tuning.sh b/examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static/run_tuning.sh index d427bcd1d04..b2da8396706 100644 --- a/examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static/run_tuning.sh +++ b/examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static/run_tuning.sh @@ -17,6 +17,9 @@ function init_params { --output_model=*) output_model=$(echo $var |cut -f2 -d=) ;; + --quant_format=*) + quant_format=$(echo $var |cut -f2 -d=) + ;; esac done @@ -29,10 +32,14 @@ function run_tuning { model_name_or_path="mrm8488/spanbert-finetuned-squadv1" num_heads=12 hidden_size=768 - elif [[ "${input_model}" =~ "bert-base" ]]; then + elif [[ "${input_model}" =~ "bert-base-multilingual" ]]; then model_name_or_path="salti/bert-base-multilingual-cased-finetuned-squad" num_heads=12 hidden_size=768 + elif [[ "${input_model}" =~ "bert-large-uncased" ]]; then + model_name_or_path="bert-large-uncased-whole-word-masking-finetuned-squad" + num_heads=16 + hidden_size=1024 fi python main.py \ @@ -41,7 +48,8 @@ function run_tuning { --save_path ${output_model} \ --output_dir './output' \ --overwrite_output_dir \ - --model_name_or_path=${model_name_or_path} \ + --model_name_or_path ${model_name_or_path} \ + --quant_format ${quant_format} \ --num_heads ${num_heads} \ --hidden_size ${hidden_size} \ --tune diff --git a/examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static/utils_model.py b/examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static/utils_model.py index 2d1b0db6c21..5dc1c601862 100644 --- a/examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static/utils_model.py +++ b/examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static/utils_model.py @@ -21,7 +21,7 @@ from transformers import EvalPrediction from transformers.trainer_pt_utils import nested_concat from transformers.trainer_utils import EvalLoopOutput -from onnxruntime import InferenceSession +import onnxruntime logger = logging.getLogger(__name__) @@ -29,7 +29,6 @@ class ORTModel: def __init__( self, model, - execution_provider: Optional[str] = "CPUExecutionProvider", compute_metrics: Optional[Callable[[EvalPrediction], Dict]] = None, label_names: Optional[List[str]] = None, ): @@ -37,8 +36,6 @@ def __init__( Args: model: onnx.onnx_ml_pb2.ModelProto. - execution_provider (:obj:`str`, `optional`): - ONNX Runtime execution provider to use. compute_metrics (`Callable[[EvalPrediction], Dict]`, `optional`): The function that will be used to compute metrics at evaluation. Must take an `EvalPrediction` and return a dictionary string to metric values. @@ -47,7 +44,8 @@ def __init__( """ self.compute_metrics = compute_metrics self.label_names = ["labels"] if label_names is None else label_names - self.session = InferenceSession(model.SerializeToString(), providers=[execution_provider]) + self.session = onnxruntime.InferenceSession(model.SerializeToString(), + providers=onnxruntime.get_available_providers()) self.onnx_input_names = {input_key.name: idx for idx, input_key in enumerate(self.session.get_inputs())} def evaluation_loop(self, dataset: Dataset): diff --git a/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic/README.md b/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic/README.md index 8fbb4172966..45089397c4c 100644 --- a/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic/README.md +++ b/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic/README.md @@ -26,6 +26,11 @@ Supported model identifier from [huggingface.co](https://huggingface.co/): | Alireza1044/albert-base-v2-sst2 | | Intel/MiniLM-L12-H384-uncased-mrpc | | philschmid/MiniLM-L6-H384-uncased-sst2 | +| bert-base-cased-finetuned-mrpc | +| Intel/electra-small-discriminator-mrpc | +| M-FAC/bert-mini-finetuned-mrpc | +| Intel/xlnet-base-cased-mrpc | +| Intel/bart-large-mrpc | ```bash python export.py --model_name_or_path=Intel/bert-base-uncased-mrpc \ # or other supported model identifier @@ -45,7 +50,7 @@ bash prepare_data.sh --data_dir=$GLUE_DIR --task_name=$TASK_NAME ## 1. Quantization -Quantize model with dynamic quantization: +Dynamic quantization: ```bash bash run_tuning.sh --input_model=path/to/model \ # model path as *.onnx diff --git a/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic/export.py b/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic/export.py index 589fe3a345e..3c9490293a3 100644 --- a/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic/export.py +++ b/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic/export.py @@ -9,7 +9,8 @@ def export_onnx_model(args, model): if args.model_name_or_path in ['Intel/roberta-base-mrpc', 'Intel/xlm-roberta-base-mrpc', 'Intel/camembert-base-mrpc', - 'distilbert-base-uncased-finetuned-sst-2-english']: + 'distilbert-base-uncased-finetuned-sst-2-english', + 'Intel/xlnet-base-cased-mrpc']: inputs = {'input_ids': torch.ones(1, args.max_len, dtype=torch.int64), 'attention_mask': torch.ones(1, args.max_len, dtype=torch.int64)} torch.onnx.export(model, # model being run @@ -57,7 +58,12 @@ def export_onnx_model(args, model): 'distilbert-base-uncased-finetuned-sst-2-english', 'Alireza1044/albert-base-v2-sst2', 'philschmid/MiniLM-L6-H384-uncased-sst2', - 'Intel/MiniLM-L12-H384-uncased-mrpc'], + 'Intel/MiniLM-L12-H384-uncased-mrpc', + 'bert-base-cased-finetuned-mrpc', + 'Intel/electra-small-discriminator-mrpc', + 'M-FAC/bert-mini-finetuned-mrpc', + 'Intel/xlnet-base-cased-mrpc', + 'Intel/bart-large-mrpc'], help='pretrained model name or path') parser.add_argument( '--max_len', @@ -71,4 +77,8 @@ def export_onnx_model(args, model): args.model_name_or_path, config=AutoConfig.from_pretrained(args.model_name_or_path)) - export_onnx_model(args, model) \ No newline at end of file + if args.model_name_or_path == 'Intel/bart-large-mrpc': + import os + os.system('python -m transformers.onnx --model=Intel/bart-large-mrpc --feature=sequence-classification bart-large-mrpc') + else: + export_onnx_model(args, model) \ No newline at end of file diff --git a/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic/main.py b/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic/main.py index 0e7a0d31e0a..08aa6e1c236 100644 --- a/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic/main.py +++ b/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic/main.py @@ -329,7 +329,12 @@ def result(self): 'distilbert-base-uncased-finetuned-sst-2-english', 'Alireza1044/albert-base-v2-sst2', 'philschmid/MiniLM-L6-H384-uncased-sst2', - 'Intel/MiniLM-L12-H384-uncased-mrpc'], + 'Intel/MiniLM-L12-H384-uncased-mrpc', + 'bert-base-cased-finetuned-mrpc', + 'Intel/electra-small-discriminator-mrpc', + 'M-FAC/bert-mini-finetuned-mrpc', + 'Intel/xlnet-base-cased-mrpc', + 'Intel/bart-large-mrpc'], help="pretrained model name or path" ) parser.add_argument( @@ -361,8 +366,8 @@ def result(self): def eval_func(model, *args): metric.reset() - import tqdm - session = ort.InferenceSession(model.SerializeToString(), None) + session = ort.InferenceSession(model.SerializeToString(), + providers=ort.get_available_providers()) ort_inputs = {} len_inputs = len(session.get_inputs()) inputs_names = [session.get_inputs()[i].name for i in range(len_inputs)] @@ -392,18 +397,22 @@ def eval_func(model, *args): if args.tune: - from onnxruntime.transformers import optimizer - from onnxruntime.transformers.onnx_model_bert import BertOptimizationOptions - opt_options = BertOptimizationOptions('bert') - opt_options.enable_embed_layer_norm = False + if ort.__version__ <= '1.13.1': + from onnxruntime.transformers import optimizer + from onnxruntime.transformers.fusion_options import FusionOptions + model_type = 'bart' if args.model_name_or_path == 'Intel/bart-large-mrpc' else 'bert' + opt_options = FusionOptions(model_type) + opt_options.enable_embed_layer_norm = False - model_optimizer = optimizer.optimize_model( - args.model_path, - 'bert', - num_heads=args.num_heads, - hidden_size=args.hidden_size, - optimization_options=opt_options) - model = model_optimizer.model + model_optimizer = optimizer.optimize_model( + args.model_path, + model_type, + num_heads=args.num_heads, + hidden_size=args.hidden_size, + optimization_options=opt_options) + model = model_optimizer.model + else: + model = onnx.load(args.model_path) from neural_compressor import quantization, PostTrainingQuantConfig config = PostTrainingQuantConfig(approach='dynamic') diff --git a/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic/requirements.txt b/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic/requirements.txt index 9467c562bb5..f17f7de8b06 100644 --- a/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic/requirements.txt +++ b/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic/requirements.txt @@ -1,5 +1,5 @@ torch -transformers==4.16.0 +transformers onnx onnxruntime coloredlogs diff --git a/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic/run_benchmark.sh b/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic/run_benchmark.sh index c2169fb5148..4c765724153 100644 --- a/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic/run_benchmark.sh +++ b/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic/run_benchmark.sh @@ -33,7 +33,7 @@ function init_params { # run_benchmark function run_benchmark { - if [[ "${input_model}" =~ "bert-base" ]]; then + if [[ "${input_model}" =~ "bert-base-uncased" ]]; then model_name_or_path="Intel/bert-base-uncased-mrpc" TASK_NAME='mrpc' fi @@ -65,6 +65,26 @@ function run_benchmark { model_name_or_path="Intel/MiniLM-L12-H384-uncased-mrpc" TASK_NAME='mrpc' fi + if [[ "${input_model}" =~ "bert-base-cased" ]]; then + model_name_or_path="bert-base-cased-finetuned-mrpc" + TASK_NAME='mrpc' + fi + if [[ "${input_model}" =~ "xlnet-base-cased" ]]; then + model_name_or_path="Intel/xlnet-base-cased-mrpc" + TASK_NAME='mrpc' + fi + if [[ "${input_model}" =~ "bert-mini" ]]; then + model_name_or_path="M-FAC/bert-mini-finetuned-mrpc" + TASK_NAME='mrpc' + fi + if [[ "${input_model}" =~ "electra-small-discriminator" ]]; then + model_name_or_path="Intel/electra-small-discriminator-mrpc" + TASK_NAME='mrpc' + fi + if [[ "${input_model}" =~ "bart" ]]; then + model_name_or_path="Intel/bart-large-mrpc" + TASK_NAME='mrpc' + fi python main.py \ --model_name_or_path ${model_name_or_path} \ diff --git a/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic/run_tuning.sh b/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic/run_tuning.sh index 21214538e77..850d3790f4a 100644 --- a/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic/run_tuning.sh +++ b/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_dynamic/run_tuning.sh @@ -28,7 +28,7 @@ function init_params { # run_tuning function run_tuning { - if [[ "${input_model}" =~ "bert-base" ]]; then + if [[ "${input_model}" =~ "bert-base-uncased" ]]; then model_name_or_path="Intel/bert-base-uncased-mrpc" TASK_NAME='mrpc' num_heads=12 @@ -76,6 +76,36 @@ function run_tuning { num_heads=12 hidden_size=384 fi + if [[ "${input_model}" =~ "bert-base-cased" ]]; then + model_name_or_path="bert-base-cased-finetuned-mrpc" + TASK_NAME='mrpc' + num_heads=12 + hidden_size=384 + fi + if [[ "${input_model}" =~ "xlnet-base-cased" ]]; then + model_name_or_path="Intel/xlnet-base-cased-mrpc" + TASK_NAME='mrpc' + num_heads=12 + hidden_size=768 + fi + if [[ "${input_model}" =~ "bert-mini" ]]; then + model_name_or_path="M-FAC/bert-mini-finetuned-mrpc" + TASK_NAME='mrpc' + num_heads=4 + hidden_size=256 + fi + if [[ "${input_model}" =~ "electra-small-discriminator" ]]; then + model_name_or_path="Intel/electra-small-discriminator-mrpc" + TASK_NAME='mrpc' + num_heads=4 + hidden_size=256 + fi + if [[ "${input_model}" =~ "bart" ]]; then + model_name_or_path="Intel/bart-large-mrpc" + TASK_NAME='mrpc' + num_heads=16 + hidden_size=4096 + fi python main.py \ --model_name_or_path ${model_name_or_path} \ diff --git a/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static/README.md b/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static/README.md index ec04e3f22a6..4a665c205f9 100644 --- a/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static/README.md +++ b/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static/README.md @@ -20,12 +20,13 @@ Supported model identifier from [huggingface.co](https://huggingface.co/): |:-----------------------------------------------:| | Intel/bert-base-uncased-mrpc | | Intel/roberta-base-mrpc | -| Intel/xlm-roberta-base-mrpc | -| Intel/camembert-base-mrpc | | distilbert-base-uncased-finetuned-sst-2-english | -| Alireza1044/albert-base-v2-sst2 | | Intel/MiniLM-L12-H384-uncased-mrpc | | philschmid/MiniLM-L6-H384-uncased-sst2 | +| bert-base-cased-finetuned-mrpc | +| Intel/electra-small-discriminator-mrpc | +| M-FAC/bert-mini-finetuned-mrpc | +| Intel/xlnet-base-cased-mrpc | ```bash python export.py --model_name_or_path=Intel/bert-base-uncased-mrpc \ # or other supported model identifier @@ -45,11 +46,13 @@ bash prepare_data.sh --data_dir=$GLUE_DIR --task_name=$TASK_NAME ## 1. Quantization -Quantize model with static quantization: +Static quantization with QOperator format: ```bash bash run_tuning.sh --input_model=/path/to/model \ # model path as *.onnx - --output_model=/path/to/model_tune + --output_model=/path/to/model_tune \ + --dataset_location=path/to/glue/data \ + --quant_format="QOperator" ``` ## 2. Benchmark diff --git a/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static/main.py b/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static/main.py index c05ac5a63fa..e065aa27eb2 100644 --- a/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static/main.py +++ b/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static/main.py @@ -324,12 +324,13 @@ def result(self): type=str, choices=['Intel/bert-base-uncased-mrpc', 'Intel/roberta-base-mrpc', - 'Intel/xlm-roberta-base-mrpc', - 'Intel/camembert-base-mrpc', 'distilbert-base-uncased-finetuned-sst-2-english', - 'Alireza1044/albert-base-v2-sst2', 'philschmid/MiniLM-L6-H384-uncased-sst2', - 'Intel/MiniLM-L12-H384-uncased-mrpc'], + 'Intel/MiniLM-L12-H384-uncased-mrpc', + 'bert-base-cased-finetuned-mrpc', + 'Intel/electra-small-discriminator-mrpc', + 'M-FAC/bert-mini-finetuned-mrpc', + 'Intel/xlnet-base-cased-mrpc'], help="pretrained model name or path" ) parser.add_argument( @@ -349,6 +350,13 @@ def result(self): default=768, type=int, ) + parser.add_argument( + '--quant_format', + type=str, + default='QOperator', + choices=['QOperator', 'QDQ'], + help="quantization format" + ) args = parser.parse_args() @@ -361,8 +369,8 @@ def result(self): def eval_func(model, *args): metric.reset() - import tqdm - session = ort.InferenceSession(model.SerializeToString(), None) + session = ort.InferenceSession(model.SerializeToString(), + providers=ort.get_available_providers()) ort_inputs = {} len_inputs = len(session.get_inputs()) inputs_names = [session.get_inputs()[i].name for i in range(len_inputs)] @@ -392,22 +400,25 @@ def eval_func(model, *args): if args.tune: - from onnxruntime.transformers import optimizer - from onnxruntime.transformers.onnx_model_bert import BertOptimizationOptions - opt_options = BertOptimizationOptions('bert') - opt_options.enable_embed_layer_norm = False + if ort.__version__ <= '1.13.1': + from onnxruntime.transformers import optimizer + from onnxruntime.transformers.fusion_options import FusionOptions + opt_options = FusionOptions('bert') + opt_options.enable_embed_layer_norm = False - model_optimizer = optimizer.optimize_model( - args.model_path, - 'bert', - num_heads=args.num_heads, - hidden_size=args.hidden_size, - optimization_options=opt_options) - model = model_optimizer.model + model_optimizer = optimizer.optimize_model( + args.model_path, + 'bert', + num_heads=args.num_heads, + hidden_size=args.hidden_size, + optimization_options=opt_options) + model = model_optimizer.model + else: + model = onnx.load(args.model_path) from neural_compressor import quantization, PostTrainingQuantConfig config = PostTrainingQuantConfig(approach='static', - quant_level='auto') + quant_format=args.quant_format) q_model = quantization.fit(model, config, eval_func=eval_func, diff --git a/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static/requirements.txt b/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static/requirements.txt index 9467c562bb5..f17f7de8b06 100644 --- a/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static/requirements.txt +++ b/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static/requirements.txt @@ -1,5 +1,5 @@ torch -transformers==4.16.0 +transformers onnx onnxruntime coloredlogs diff --git a/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static/run_benchmark.sh b/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static/run_benchmark.sh index c2169fb5148..ccae3635d31 100644 --- a/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static/run_benchmark.sh +++ b/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static/run_benchmark.sh @@ -33,7 +33,7 @@ function init_params { # run_benchmark function run_benchmark { - if [[ "${input_model}" =~ "bert-base" ]]; then + if [[ "${input_model}" =~ "bert-base-uncased" ]]; then model_name_or_path="Intel/bert-base-uncased-mrpc" TASK_NAME='mrpc' fi @@ -41,22 +41,10 @@ function run_benchmark { model_name_or_path="Intel/roberta-base-mrpc" TASK_NAME='mrpc' fi - if [[ "${input_model}" =~ "xlm-roberta-base" ]]; then - model_name_or_path="Intel/xlm-roberta-base-mrpc" - TASK_NAME='mrpc' - fi - if [[ "${input_model}" =~ "camembert-base" ]]; then - model_name_or_path="Intel/camembert-base-mrpc" - TASK_NAME='mrpc' - fi if [[ "${input_model}" =~ "distilbert-base" ]]; then model_name_or_path="distilbert-base-uncased-finetuned-sst-2-english" TASK_NAME='sst-2' fi - if [[ "${input_model}" =~ "albert-base" ]]; then - model_name_or_path="Alireza1044/albert-base-v2-sst2" - TASK_NAME='sst-2' - fi if [[ "${input_model}" =~ "MiniLM-L6" ]]; then model_name_or_path="philschmid/MiniLM-L6-H384-uncased-sst2" TASK_NAME='sst-2' @@ -65,6 +53,22 @@ function run_benchmark { model_name_or_path="Intel/MiniLM-L12-H384-uncased-mrpc" TASK_NAME='mrpc' fi + if [[ "${input_model}" =~ "bert-base-cased" ]]; then + model_name_or_path="bert-base-cased-finetuned-mrpc" + TASK_NAME='mrpc' + fi + if [[ "${input_model}" =~ "xlnet-base-cased" ]]; then + model_name_or_path="Intel/xlnet-base-cased-mrpc" + TASK_NAME='mrpc' + fi + if [[ "${input_model}" =~ "bert-mini" ]]; then + model_name_or_path="M-FAC/bert-mini-finetuned-mrpc" + TASK_NAME='mrpc' + fi + if [[ "${input_model}" =~ "electra-small-discriminator" ]]; then + model_name_or_path="Intel/electra-small-discriminator-mrpc" + TASK_NAME='mrpc' + fi python main.py \ --model_name_or_path ${model_name_or_path} \ diff --git a/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static/run_tuning.sh b/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static/run_tuning.sh index 21214538e77..130d54e3ce3 100644 --- a/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static/run_tuning.sh +++ b/examples/onnxrt/nlp/huggingface_model/text_classification/quantization/ptq_static/run_tuning.sh @@ -20,6 +20,9 @@ function init_params { --dataset_location=*) dataset_location=$(echo $var |cut -f2 -d=) ;; + --quant_format=*) + quant_format=$(echo $var |cut -f2 -d=) + ;; esac done @@ -28,7 +31,7 @@ function init_params { # run_tuning function run_tuning { - if [[ "${input_model}" =~ "bert-base" ]]; then + if [[ "${input_model}" =~ "bert-base-uncased" ]]; then model_name_or_path="Intel/bert-base-uncased-mrpc" TASK_NAME='mrpc' num_heads=12 @@ -40,30 +43,12 @@ function run_tuning { num_heads=12 hidden_size=768 fi - if [[ "${input_model}" =~ "xlm-roberta-base" ]]; then - model_name_or_path="Intel/xlm-roberta-base-mrpc" - TASK_NAME='mrpc' - num_heads=12 - hidden_size=768 - fi - if [[ "${input_model}" =~ "camembert-base" ]]; then - model_name_or_path="Intel/camembert-base-mrpc" - TASK_NAME='mrpc' - num_heads=12 - hidden_size=768 - fi if [[ "${input_model}" =~ "distilbert-base" ]]; then model_name_or_path="distilbert-base-uncased-finetuned-sst-2-english" TASK_NAME='sst-2' num_heads=12 hidden_size=768 fi - if [[ "${input_model}" =~ "albert-base" ]]; then - model_name_or_path="Alireza1044/albert-base-v2-sst2" - TASK_NAME='sst-2' - num_heads=12 - hidden_size=768 - fi if [[ "${input_model}" =~ "MiniLM-L6" ]]; then model_name_or_path="philschmid/MiniLM-L6-H384-uncased-sst2" TASK_NAME='sst-2' @@ -76,9 +61,34 @@ function run_tuning { num_heads=12 hidden_size=384 fi + if [[ "${input_model}" =~ "bert-base-cased" ]]; then + model_name_or_path="bert-base-cased-finetuned-mrpc" + TASK_NAME='mrpc' + num_heads=12 + hidden_size=384 + fi + if [[ "${input_model}" =~ "xlnet-base-cased" ]]; then + model_name_or_path="Intel/xlnet-base-cased-mrpc" + TASK_NAME='mrpc' + num_heads=12 + hidden_size=768 + fi + if [[ "${input_model}" =~ "bert-mini" ]]; then + model_name_or_path="M-FAC/bert-mini-finetuned-mrpc" + TASK_NAME='mrpc' + num_heads=4 + hidden_size=256 + fi + if [[ "${input_model}" =~ "electra-small-discriminator" ]]; then + model_name_or_path="Intel/electra-small-discriminator-mrpc" + TASK_NAME='mrpc' + num_heads=4 + hidden_size=256 + fi python main.py \ --model_name_or_path ${model_name_or_path} \ + --quant_format ${quant_format} \ --model_path ${input_model} \ --output_model ${output_model} \ --data_path ${dataset_location} \ diff --git a/examples/onnxrt/nlp/mobilebert/quantization/ptq_dynamic/README.md b/examples/onnxrt/nlp/mobilebert/quantization/ptq_dynamic/README.md new file mode 100644 index 00000000000..064651ffc33 --- /dev/null +++ b/examples/onnxrt/nlp/mobilebert/quantization/ptq_dynamic/README.md @@ -0,0 +1,82 @@ +Step-by-Step +============ + +This example load a MobileBERT model and confirm its accuracy and speed based on [GLUE data](https://gluebenchmark.com/). + +# Prerequisite + +## 1. Environment + +```shell +pip install neural-compressor +pip install -r requirements.txt +``` +> Note: Validated ONNX Runtime [Version](/docs/source/installation_guide.md#validated-software-environment). + +## 2. Prepare Dataset + +download the GLUE data with `prepare_data.sh` script. +```shell +export GLUE_DIR=path/to/glue_data +export TASK_NAME=MRPC + +bash prepare_data.sh --data_dir=$GLUE_DIR --task_name=$TASK_NAME +``` + +## 3. Prepare Model + +Please refer to [Bert-GLUE_OnnxRuntime_quantization guide](https://github.com/microsoft/onnxruntime/blob/master/onnxruntime/python/tools/quantization/notebooks/Bert-GLUE_OnnxRuntime_quantization.ipynb) for detailed model export. The following is a simple example. + +Use [Huggingface Transformers](https://github.com/huggingface/transformers/tree/v2.2.1) to fine-tune the model based on the [MRPC](https://github.com/huggingface/transformers/tree/master/examples/text-classification#mrpc) example with command like: +```shell +export OUT_DIR=/path/to/out_dir/ +python ./run_glue.py \ + --model_type mobilebert \ + --model_name_or_path google/mobilebert-uncased \ + --task_name $TASK_NAME \ + --do_train \ + --do_eval \ + --do_lower_case \ + --data_dir $GLUE_DIR/$TASK_NAME \ + --max_seq_length 128 \ + --per_gpu_eval_batch_size=8 \ + --per_gpu_train_batch_size=8 \ + --learning_rate 2e-5 \ + --num_train_epochs 5.0 \ + --save_steps 100000 \ + --output_dir $OUT_DIR +``` +Run the `prepare_model.sh` script. + +Usage: +```shell +cd examples/onnxrt/language_translation/mobilebert/ + +bash prepare_model.sh --input_dir=$OUT_DIR \ + --task_name=$TASK_NAME \ + --output_model=path/to/model # model path as *.onnx +``` + +# Run + +## 1. Quantization + +Dynamic quantization: + +```bash +bash run_tuning.sh --input_model=path/to/model \ # model path as *.onnx + --output_model=path/to/model_tune \ # model path as *.onnx + --dataset_location=path/to/glue_data +``` + +## 2. Benchmark + +```bash +bash run_benchmark.sh --input_model=path/to/model \ # model path as *.onnx + --dataset_location=path/to/glue_data \ + --batch_size=batch_size \ + --mode=performance # or accuracy +``` + + + diff --git a/examples/onnxrt/nlp/mobilebert/quantization/ptq_dynamic/export.py b/examples/onnxrt/nlp/mobilebert/quantization/ptq_dynamic/export.py new file mode 100644 index 00000000000..9cfb64c69a9 --- /dev/null +++ b/examples/onnxrt/nlp/mobilebert/quantization/ptq_dynamic/export.py @@ -0,0 +1,61 @@ +import argparse + +import torch +from transformers import MobileBertForSequenceClassification + +def export_onnx_model(args, model, onnx_model_path): + with torch.no_grad(): + inputs = {'input_ids': torch.ones(1,args.max_len, dtype=torch.int64), + 'attention_mask': torch.ones(1,args.max_len, dtype=torch.int64), + 'token_type_ids': torch.ones(1,args.max_len, dtype=torch.int64)} + outputs = model(**inputs) + + symbolic_names = {0: 'batch_size', 1: 'max_seq_len'} + torch.onnx.export(model, # model being run + (inputs['input_ids'], + inputs['attention_mask']), # model input (or a tuple for + # multiple inputs) + onnx_model_path, # where to save the model (can be a file + # or file-like object) + opset_version=11, # the ONNX version to export the model + do_constant_folding=True, # whether to execute constant folding + input_names=['input_ids', # the model's input names + 'input_mask'], + output_names=['output'], # the model's output names + dynamic_axes={'input_ids': symbolic_names, # variable length axes + 'input_mask' : symbolic_names}) + print("ONNX Model exported to {0}".format(onnx_model_path)) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description='Export bert onnx model', + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument( + '--input_dir', + type=str, + help='input_dir of bert model, must contain config.json') + parser.add_argument( + '--task_name', + type=str, + choices=["MRPC", "MNLI"], + help='tasks names of bert model') + parser.add_argument( + '--max_len', + type=int, + default=128, + help='Maximum length of the sentence pairs') + parser.add_argument( + '--do_lower_case', + type=bool, + default=True, + help='whether lower the tokenizer') + parser.add_argument( + '--output_model', + type=str, + default='bert.onnx', + help='path to exported model file') + args = parser.parse_args() + + model = MobileBertForSequenceClassification.from_pretrained(args.input_dir) + export_onnx_model(args, model, args.output_model) diff --git a/examples/onnxrt/nlp/mobilebert/quantization/ptq_dynamic/main.py b/examples/onnxrt/nlp/mobilebert/quantization/ptq_dynamic/main.py new file mode 100644 index 00000000000..7e07cb23457 --- /dev/null +++ b/examples/onnxrt/nlp/mobilebert/quantization/ptq_dynamic/main.py @@ -0,0 +1,412 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# pylint:disable=redefined-outer-name,logging-format-interpolation + +import logging +import argparse +import os +import onnx +import onnxruntime +import transformers +import torch +import numpy as np +from dataclasses import dataclass +from typing import List, Optional, Union +from neural_compressor.data.dataloaders.onnxrt_dataloader import DefaultDataLoader + +logger = logging.getLogger(__name__) +logging.basicConfig(format = "%(asctime)s - %(levelname)s - %(name)s - %(message)s", + datefmt = "%m/%d/%Y %H:%M:%S", + level = logging.WARN) + +class ONNXRTBertDataset: + """Dataset used for model Bert. + Args: data_dir (str): The input data dir. + model_name_or_path (str): Path to pre-trained student model or shortcut name, + selected in the list: + max_seq_length (int, default=128): The maximum length after tokenization. + Sequences longer than this will be truncated, + sequences shorter will be padded. + do_lower_case (bool, default=True): Whether to lowercase the input when tokenizing. + task (str, default=mrpc): The name of the task to fine-tune. + Choices include mrpc, qqp, qnli, rte, + sts-b, cola, mnli, wnli. + model_type (str, default="bert"): model type, support "distilbert", "bert", + "mobilebert", "roberta". + dynamic_length (bool, default=False): Whether to use fixed sequence length. + evaluate (bool, default=True): Whether do evaluation or training. + transform (transform object, default=None): transform to process input data. + filter (Filter objects, default=None): filter out examples according + to specific conditions. + """ + def __init__(self, model, data_dir, model_name_or_path, max_seq_length=128,\ + do_lower_case=True, task="mrpc", model_type="bert", dynamic_length=False,\ + evaluate=True, transform=None, filter=None): + self.inputs = [inp.name for inp in onnx.load(model).graph.input] + task = task.lower() + model_type = model_type.lower() + assert task in ["mrpc", "qqp", "qnli", "rte", "sts-b", "cola", \ + "mnli", "wnli", "sst-2"], "Unsupported task type" + assert model_type in ["distilbert", "bert", "mobilebert", "roberta"], "Unsupported \ + model type" + self.dynamic_length = dynamic_length + self.model_type = model_type + self.max_seq_length = max_seq_length + tokenizer = transformers.AutoTokenizer.from_pretrained(model_name_or_path, + do_lower_case=do_lower_case) + self.dataset = load_and_cache_examples(data_dir, model_name_or_path, \ + max_seq_length, task, model_type, tokenizer, evaluate) + + def __len__(self): + return len(self.dataset) + + def __getitem__(self, index): + batch = tuple(t.detach().cpu().numpy() if not isinstance(t, np.ndarray) else t for t in self.dataset[index]) + return batch[:len(self.inputs)], batch[-1] + +def load_and_cache_examples(data_dir, model_name_or_path, max_seq_length, task, \ + model_type, tokenizer, evaluate): + from torch.utils.data import TensorDataset + + processor = transformers.glue_processors[task]() + output_mode = transformers.glue_output_modes[task] + # Load data features from cache or dataset file + if not os.path.exists("./dataset_cached"): + os.makedirs("./dataset_cached") + cached_features_file = os.path.join("./dataset_cached", "cached_{}_{}_{}_{}".format( + "dev" if evaluate else "train", + list(filter(None, model_name_or_path.split("/"))).pop(), + str(max_seq_length), + str(task))) + if os.path.exists(cached_features_file): + logger.info("Load features from cached file {}.".format(cached_features_file)) + features = torch.load(cached_features_file) + else: + logger.info("Create features from dataset file at {}.".format(data_dir)) + label_list = processor.get_labels() + examples = processor.get_dev_examples(data_dir) if evaluate else \ + processor.get_train_examples(data_dir) + features = convert_examples_to_features(examples, + tokenizer, + task=task, + label_list=label_list, + max_length=max_seq_length, + output_mode=output_mode, + ) + logger.info("Save features into cached file {}.".format(cached_features_file)) + torch.save(features, cached_features_file) + # Convert to Tensors and build dataset + all_input_ids = torch.tensor([f.input_ids for f in features], dtype=torch.long) + all_attention_mask = torch.tensor([f.attention_mask for f in features], dtype=torch.long) + all_token_type_ids = torch.tensor([f.token_type_ids for f in features], dtype=torch.long) + all_seq_lengths = torch.tensor([f.seq_length for f in features], dtype=torch.long) + if output_mode == "classification": + all_labels = torch.tensor([f.label for f in features], dtype=torch.long) + elif output_mode == "regression": + all_labels = torch.tensor([f.label for f in features], dtype=torch.float) + dataset = TensorDataset(all_input_ids, all_attention_mask, all_token_type_ids, \ + all_seq_lengths, all_labels) + return dataset + +def convert_examples_to_features( + examples, + tokenizer, + max_length=128, + task=None, + label_list=None, + output_mode="classification", + pad_token=0, + pad_token_segment_id=0, + mask_padding_with_zero=True, +): + processor = transformers.glue_processors[task]() + if label_list is None: + label_list = processor.get_labels() + logger.info("Use label list {} for task {}.".format(label_list, task)) + label_map = {label: i for i, label in enumerate(label_list)} + features = [] + for (ex_index, example) in enumerate(examples): + inputs = tokenizer.encode_plus( + example.text_a, + example.text_b, + add_special_tokens=True, + max_length=max_length, + return_token_type_ids=True, + truncation=True, + ) + input_ids, token_type_ids = inputs["input_ids"], inputs["token_type_ids"] + # The mask has 1 for real tokens and 0 for padding tokens. Only real + # tokens are attended to. + attention_mask = [1 if mask_padding_with_zero else 0] * len(input_ids) + + # Zero-pad up to the sequence length. + seq_length = len(input_ids) + padding_length = max_length - len(input_ids) + + input_ids = input_ids + ([pad_token] * padding_length) + attention_mask = attention_mask + \ + ([0 if mask_padding_with_zero else 1] * padding_length) + token_type_ids = token_type_ids + ([pad_token_segment_id] * padding_length) + + assert len(input_ids) == max_length, \ + "Error with input_ids length {} vs {}".format( + len(input_ids), max_length) + assert len(attention_mask) == max_length, \ + "Error with attention_mask length {} vs {}".format( + len(attention_mask), max_length + ) + assert len(token_type_ids) == max_length, \ + "Error with token_type_ids length {} vs {}".format( + len(token_type_ids), max_length + ) + if output_mode == "classification": + label = label_map[example.label] + elif output_mode == "regression": + label = float(example.label) + else: + raise KeyError(output_mode) + + feats = InputFeatures( + input_ids=input_ids, + attention_mask=attention_mask, + token_type_ids=token_type_ids, + label=label, + seq_length=seq_length, + ) + features.append(feats) + return features + +@dataclass(frozen=True) +class InputFeatures: + """ + A single set of features of data. + Property names are the same names as the corresponding inputs to a model. + Args: + input_ids: Indices of input sequence tokens in the vocabulary. + attention_mask: Mask to avoid performing attention on padding token indices. + Mask values selected in ``[0, 1]``: Usually ``1`` for tokens that are NOT MASKED, + ``0`` for MASKED (padded) tokens. + token_type_ids: (Optional) Segment token indices to indicate first and second + portions of the inputs. Only some models use them. + label: (Optional) Label corresponding to the input. Int for classification problems, + float for regression problems. + seq_length: (Optional) The length of input sequence before padding. + """ + + input_ids: List[int] + attention_mask: Optional[List[int]] = None + token_type_ids: Optional[List[int]] = None + label: Optional[Union[int, float]] = None + seq_length: Optional[List[int]] = None + +class ONNXRTGLUE: + """Computes GLUE score. + + Args: + task (str, default=mrpc): The name of the task. + Choices include mrpc, qqp, qnli, rte, + sts-b, cola, mnli, wnli. + + """ + def __init__(self, task="mrpc"): + assert task in ["mrpc", "qqp", "qnli", "rte", "sts-b", "cola", \ + "mnli", "wnli", "sst-2"], "Unsupported task type" + self.pred_list = None + self.label_list = None + self.task = task + self.return_key = { + "cola": "mcc", + "mrpc": "acc", + "sts-b": "corr", + "qqp": "acc", + "mnli": "mnli/acc", + "qnli": "acc", + "rte": "acc", + "wnli": "acc", + "sst-2": "acc" + } + + def update(self, preds, labels): + """add preds and labels to storage""" + if isinstance(preds, list) and len(preds) == 1: + preds = preds[0] + if isinstance(labels, list) and len(labels) == 1: + labels = labels[0] + if self.pred_list is None: + self.pred_list = preds + self.label_list = labels + else: + self.pred_list = np.append(self.pred_list, preds, axis=0) + self.label_list = np.append(self.label_list, labels, axis=0) + + def reset(self): + """clear preds and labels storage""" + self.pred_list = None + self.label_list = None + + def result(self): + """calculate metric""" + output_mode = transformers.glue_output_modes[self.task] + + if output_mode == "classification": + processed_preds = np.argmax(self.pred_list, axis=1) + elif output_mode == "regression": + processed_preds = np.squeeze(self.pred_list) + result = transformers.glue_compute_metrics(\ + self.task, processed_preds, self.label_list) + return result[self.return_key[self.task]] + +if __name__ == "__main__": + logger.info("Evaluating ONNXRuntime full precision accuracy and performance:") + parser = argparse.ArgumentParser( + description="BERT fine-tune examples for classification/regression tasks.", + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument( + "--model_path", + type=str, + help="Pre-trained model on onnx file" + ) + parser.add_argument( + "--benchmark", + action="store_true", \ + default=False + ) + parser.add_argument( + "--tune", + action="store_true", \ + default=False, + help="whether quantize the model" + ) + parser.add_argument( + "--output_model", + type=str, + help="output model path" + ) + parser.add_argument( + "--mode", + type=str, + help="benchmark mode of performance or accuracy" + ) + parser.add_argument( + "--model_name_or_path", + type=str, + help="pretrained model name or path" + ) + parser.add_argument( + "--data_path", + type=str, + help="input data path" + ) + parser.add_argument( + "--batch_size", + default=8, + type=int, + ) + parser.add_argument( + "--task", + type=str, + default="mrpc", + choices=["mrpc", "qqp", "qnli", "rte", "sts-b", "cola", \ + "mnli", "wnli", "sst-2"], + help="GLUE task name" + ) + parser.add_argument( + "--dynamic_length", + type=bool, + default=False, + help="dynamic length" + ) + parser.add_argument( + "--max_seq_length", + type=int, + default=128, + help="max sequence length" + ) + parser.add_argument( + "--model_type", + type=str, + default="bert", + choices=["distilbert", "bert", "mobilebert", "roberta"], + help="model type" + ) + args = parser.parse_args() + + dataset = ONNXRTBertDataset(args.model_path, + data_dir=args.data_path, + model_name_or_path=args.model_name_or_path, + max_seq_length=args.max_seq_length, + task=args.task, + model_type=args.model_type, + dynamic_length=args.dynamic_length) + dataloader = DefaultDataLoader(dataset, args.batch_size) + metric = ONNXRTGLUE(args.task) + + def eval_func(model): + metric.reset() + session = onnxruntime.InferenceSession(model.SerializeToString(), + providers=onnxruntime.get_available_providers()) + ort_inputs = {} + len_inputs = len(session.get_inputs()) + inputs_names = [session.get_inputs()[i].name for i in range(len_inputs)] + for idx, (inputs, labels) in enumerate(dataloader): + if not isinstance(labels, list): + labels = [labels] + inputs = inputs[:len_inputs] + for i in range(len_inputs): + ort_inputs.update({inputs_names[i]: inputs[i]}) + predictions = session.run(None, ort_inputs) + metric.update(predictions[0], labels) + return metric.result() + + if args.benchmark: + model = onnx.load(args.model_path) + if args.mode == "performance": + from neural_compressor.benchmark import fit + from neural_compressor.config import BenchmarkConfig + conf = BenchmarkConfig(iteration=100, + cores_per_instance=4, + num_of_instance=1) + fit(model, conf, b_dataloader=dataloader) + elif args.mode == "accuracy": + acc_result = eval_func(model) + print("Batch size = %d" % args.batch_size) + print("Accuracy: %.5f" % acc_result) + + if args.tune: + if onnxruntime.__version__ <= '1.13.1': + from onnxruntime.transformers import optimizer + from onnxruntime.transformers.fusion_options import FusionOptions + opt_options = FusionOptions("bert") + opt_options.enable_embed_layer_norm = False + + model_optimizer = optimizer.optimize_model( + args.model_path, + "bert", + num_heads=4, + hidden_size=512, + optimization_options=opt_options) + model = model_optimizer.model + else: + model = onnx.load(args.model_path) + + from neural_compressor import quantization, PostTrainingQuantConfig + config = PostTrainingQuantConfig(approach="dynamic") + q_model = quantization.fit(model, + config, + eval_func=eval_func) + q_model.save(args.output_model) + diff --git a/examples/onnxrt/nlp/mobilebert/quantization/ptq_dynamic/prepare_data.sh b/examples/onnxrt/nlp/mobilebert/quantization/ptq_dynamic/prepare_data.sh new file mode 100644 index 00000000000..8e434a5c521 --- /dev/null +++ b/examples/onnxrt/nlp/mobilebert/quantization/ptq_dynamic/prepare_data.sh @@ -0,0 +1,34 @@ +#!/bin/bash +set -x + +function main { + init_params "$@" + download_data + +} + +# init params +function init_params { + + for var in "$@" + do + case $var in + --data_dir=*) + data_dir=$(echo $var |cut -f2 -d=) + ;; + --task_name=*) + task_name=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_tuning +function download_data { + wget https://raw.githubusercontent.com/huggingface/transformers/f98ef14d161d7bcdc9808b5ec399981481411cc1/utils/download_glue_data.py + python download_glue_data.py --data_dir=${data_dir} --tasks=${task_name} +} + +main "$@" + diff --git a/examples/onnxrt/nlp/mobilebert/quantization/ptq_dynamic/prepare_model.sh b/examples/onnxrt/nlp/mobilebert/quantization/ptq_dynamic/prepare_model.sh new file mode 100644 index 00000000000..8d6eb064930 --- /dev/null +++ b/examples/onnxrt/nlp/mobilebert/quantization/ptq_dynamic/prepare_model.sh @@ -0,0 +1,39 @@ +#!/bin/bash +set -x + +function main { + init_params "$@" + export_model + +} + +# init params +function init_params { + + for var in "$@" + do + case $var in + --input_dir=*) + input_dir=$(echo $var |cut -f2 -d=) + ;; + --task_name=*) + task_name=$(echo $var |cut -f2 -d=) + ;; + --max_len=*) + max_len=$(echo $var |cut -f2 -d=) + ;; + --output_model=*) + output_model=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_tuning +function export_model { + python export.py --input_dir ${input_dir} --task_name ${task_name} --output_model ${output_model} +} + +main "$@" + diff --git a/examples/onnxrt/nlp/mobilebert/quantization/ptq_dynamic/requirements.txt b/examples/onnxrt/nlp/mobilebert/quantization/ptq_dynamic/requirements.txt new file mode 100644 index 00000000000..1fb753da72e --- /dev/null +++ b/examples/onnxrt/nlp/mobilebert/quantization/ptq_dynamic/requirements.txt @@ -0,0 +1,7 @@ +torch +transformers +onnx +onnxruntime +coloredlogs +sympy +onnxruntime-extensions; python_version < '3.10' diff --git a/examples/onnxrt/nlp/mobilebert/quantization/ptq_dynamic/run_benchmark.sh b/examples/onnxrt/nlp/mobilebert/quantization/ptq_dynamic/run_benchmark.sh new file mode 100644 index 00000000000..cc58c5c322b --- /dev/null +++ b/examples/onnxrt/nlp/mobilebert/quantization/ptq_dynamic/run_benchmark.sh @@ -0,0 +1,62 @@ +#!/bin/bash +set -x + +function main { + + init_params "$@" + run_benchmark + +} + +# init params +function init_params { + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --mode=*) + mode=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + --batch_size=*) + batch_size=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_benchmark +function run_benchmark { + if [[ ${mode} == "accuracy" ]]; then + dynamic_length=False + elif [[ ${mode} == "performance" ]]; then + dynamic_length=True + else + echo "Error: No such mode: ${mode}" + exit 1 + fi + + model_name_or_path="google/mobilebert-uncased" + task_name="mrpc" + model_type="mobilebert" + + python main.py \ + --model_path ${input_model} \ + --model_name_or_path ${model_name_or_path} \ + --data_path ${dataset_location} \ + --task ${task_name} \ + --batch_size ${batch_size} \ + --model_type ${model_type} \ + --mode ${mode} \ + --dynamic_length ${dynamic_length} \ + --benchmark + +} + +main "$@" + diff --git a/examples/onnxrt/nlp/mobilebert/quantization/ptq_dynamic/run_tuning.sh b/examples/onnxrt/nlp/mobilebert/quantization/ptq_dynamic/run_tuning.sh new file mode 100644 index 00000000000..9e7a992f2a2 --- /dev/null +++ b/examples/onnxrt/nlp/mobilebert/quantization/ptq_dynamic/run_tuning.sh @@ -0,0 +1,49 @@ +#!/bin/bash +set -x + +function main { + init_params "$@" + run_tuning +} + +# init params +function init_params { + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --output_model=*) + output_model=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_tuning +function run_tuning { + model_name_or_path="google/mobilebert-uncased" + batch_size=8 + task_name="mrpc" + model_type="mobilebert" + + python main.py \ + --model_path ${input_model} \ + --output_model ${output_model} \ + --model_name_or_path ${model_name_or_path} \ + --data_path ${dataset_location} \ + --task ${task_name} \ + --batch_size ${batch_size} \ + --model_type ${model_type} \ + --tune +} + +main "$@" + + + diff --git a/examples/onnxrt/nlp/mobilebert/quantization/ptq_static/README.md b/examples/onnxrt/nlp/mobilebert/quantization/ptq_static/README.md new file mode 100644 index 00000000000..7f6dccf5ef7 --- /dev/null +++ b/examples/onnxrt/nlp/mobilebert/quantization/ptq_static/README.md @@ -0,0 +1,83 @@ +Step-by-Step +============ + +This example load a MobileBERT model and confirm its accuracy and speed based on [GLUE data](https://gluebenchmark.com/). + +# Prerequisite + +## 1. Environment + +```shell +pip install neural-compressor +pip install -r requirements.txt +``` +> Note: Validated ONNX Runtime [Version](/docs/source/installation_guide.md#validated-software-environment). + +## 2. Prepare Dataset + +download the GLUE data with `prepare_data.sh` script. +```shell +export GLUE_DIR=path/to/glue_data +export TASK_NAME=MRPC + +bash prepare_data.sh --data_dir=$GLUE_DIR --task_name=$TASK_NAME +``` + +## 3. Prepare Model + +Please refer to [Bert-GLUE_OnnxRuntime_quantization guide](https://github.com/microsoft/onnxruntime/blob/master/onnxruntime/python/tools/quantization/notebooks/Bert-GLUE_OnnxRuntime_quantization.ipynb) for detailed model export. The following is a simple example. + +Use [Huggingface Transformers](https://github.com/huggingface/transformers/tree/v2.2.1) to fine-tune the model based on the [MRPC](https://github.com/huggingface/transformers/tree/master/examples/text-classification#mrpc) example with command like: +```shell +export OUT_DIR=/path/to/out_dir/ +python ./run_glue.py \ + --model_type mobilebert \ + --model_name_or_path google/mobilebert-uncased \ + --task_name $TASK_NAME \ + --do_train \ + --do_eval \ + --do_lower_case \ + --data_dir $GLUE_DIR/$TASK_NAME \ + --max_seq_length 128 \ + --per_gpu_eval_batch_size=8 \ + --per_gpu_train_batch_size=8 \ + --learning_rate 2e-5 \ + --num_train_epochs 5.0 \ + --save_steps 100000 \ + --output_dir $OUT_DIR +``` +Run the `prepare_model.sh` script. + +Usage: +```shell +cd examples/onnxrt/language_translation/mobilebert/ + +bash prepare_model.sh --input_dir=$OUT_DIR \ + --task_name=$TASK_NAME \ + --output_model=path/to/model # model path as *.onnx +``` + +# Run + +## 1. Quantization + +Static quantization with QDQ format: + +```bash +bash run_tuning.sh --input_model=path/to/model \ # model path as *.onnx + --output_model=path/to/model_tune \ # model path as *.onnx + --dataset_location=path/to/glue_data \ + --quant_format="QDQ" +``` + +## 2. Benchmark + +```bash +bash run_benchmark.sh --input_model=path/to/model \ # model path as *.onnx + --dataset_location=path/to/glue_data \ + --batch_size=batch_size \ + --mode=performance # or accuracy +``` + + + diff --git a/examples/onnxrt/nlp/mobilebert/quantization/ptq_static/export.py b/examples/onnxrt/nlp/mobilebert/quantization/ptq_static/export.py new file mode 100644 index 00000000000..9cfb64c69a9 --- /dev/null +++ b/examples/onnxrt/nlp/mobilebert/quantization/ptq_static/export.py @@ -0,0 +1,61 @@ +import argparse + +import torch +from transformers import MobileBertForSequenceClassification + +def export_onnx_model(args, model, onnx_model_path): + with torch.no_grad(): + inputs = {'input_ids': torch.ones(1,args.max_len, dtype=torch.int64), + 'attention_mask': torch.ones(1,args.max_len, dtype=torch.int64), + 'token_type_ids': torch.ones(1,args.max_len, dtype=torch.int64)} + outputs = model(**inputs) + + symbolic_names = {0: 'batch_size', 1: 'max_seq_len'} + torch.onnx.export(model, # model being run + (inputs['input_ids'], + inputs['attention_mask']), # model input (or a tuple for + # multiple inputs) + onnx_model_path, # where to save the model (can be a file + # or file-like object) + opset_version=11, # the ONNX version to export the model + do_constant_folding=True, # whether to execute constant folding + input_names=['input_ids', # the model's input names + 'input_mask'], + output_names=['output'], # the model's output names + dynamic_axes={'input_ids': symbolic_names, # variable length axes + 'input_mask' : symbolic_names}) + print("ONNX Model exported to {0}".format(onnx_model_path)) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description='Export bert onnx model', + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument( + '--input_dir', + type=str, + help='input_dir of bert model, must contain config.json') + parser.add_argument( + '--task_name', + type=str, + choices=["MRPC", "MNLI"], + help='tasks names of bert model') + parser.add_argument( + '--max_len', + type=int, + default=128, + help='Maximum length of the sentence pairs') + parser.add_argument( + '--do_lower_case', + type=bool, + default=True, + help='whether lower the tokenizer') + parser.add_argument( + '--output_model', + type=str, + default='bert.onnx', + help='path to exported model file') + args = parser.parse_args() + + model = MobileBertForSequenceClassification.from_pretrained(args.input_dir) + export_onnx_model(args, model, args.output_model) diff --git a/examples/onnxrt/nlp/mobilebert/quantization/ptq_static/main.py b/examples/onnxrt/nlp/mobilebert/quantization/ptq_static/main.py new file mode 100644 index 00000000000..9b5674a50d0 --- /dev/null +++ b/examples/onnxrt/nlp/mobilebert/quantization/ptq_static/main.py @@ -0,0 +1,422 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# pylint:disable=redefined-outer-name,logging-format-interpolation + +import logging +import argparse +import os +import onnx +import onnxruntime +import transformers +import torch +import numpy as np +from dataclasses import dataclass +from typing import List, Optional, Union +from neural_compressor.data.dataloaders.onnxrt_dataloader import DefaultDataLoader + +logger = logging.getLogger(__name__) +logging.basicConfig(format = "%(asctime)s - %(levelname)s - %(name)s - %(message)s", + datefmt = "%m/%d/%Y %H:%M:%S", + level = logging.WARN) + +class ONNXRTBertDataset: + """Dataset used for model Bert. + Args: data_dir (str): The input data dir. + model_name_or_path (str): Path to pre-trained student model or shortcut name, + selected in the list: + max_seq_length (int, default=128): The maximum length after tokenization. + Sequences longer than this will be truncated, + sequences shorter will be padded. + do_lower_case (bool, default=True): Whether to lowercase the input when tokenizing. + task (str, default=mrpc): The name of the task to fine-tune. + Choices include mrpc, qqp, qnli, rte, + sts-b, cola, mnli, wnli. + model_type (str, default="bert"): model type, support "distilbert", "bert", + "mobilebert", "roberta". + dynamic_length (bool, default=False): Whether to use fixed sequence length. + evaluate (bool, default=True): Whether do evaluation or training. + transform (transform object, default=None): transform to process input data. + filter (Filter objects, default=None): filter out examples according + to specific conditions. + """ + def __init__(self, model, data_dir, model_name_or_path, max_seq_length=128,\ + do_lower_case=True, task="mrpc", model_type="bert", dynamic_length=False,\ + evaluate=True, transform=None, filter=None): + self.inputs = [inp.name for inp in onnx.load(model).graph.input] + task = task.lower() + model_type = model_type.lower() + assert task in ["mrpc", "qqp", "qnli", "rte", "sts-b", "cola", \ + "mnli", "wnli", "sst-2"], "Unsupported task type" + assert model_type in ["distilbert", "bert", "mobilebert", "roberta"], "Unsupported \ + model type" + self.dynamic_length = dynamic_length + self.model_type = model_type + self.max_seq_length = max_seq_length + tokenizer = transformers.AutoTokenizer.from_pretrained(model_name_or_path, + do_lower_case=do_lower_case) + self.dataset = load_and_cache_examples(data_dir, model_name_or_path, \ + max_seq_length, task, model_type, tokenizer, evaluate) + + def __len__(self): + return len(self.dataset) + + def __getitem__(self, index): + batch = tuple(t.detach().cpu().numpy() if not isinstance(t, np.ndarray) else t for t in self.dataset[index]) + return batch[:len(self.inputs)], batch[-1] + +def load_and_cache_examples(data_dir, model_name_or_path, max_seq_length, task, \ + model_type, tokenizer, evaluate): + from torch.utils.data import TensorDataset + + processor = transformers.glue_processors[task]() + output_mode = transformers.glue_output_modes[task] + # Load data features from cache or dataset file + if not os.path.exists("./dataset_cached"): + os.makedirs("./dataset_cached") + cached_features_file = os.path.join("./dataset_cached", "cached_{}_{}_{}_{}".format( + "dev" if evaluate else "train", + list(filter(None, model_name_or_path.split("/"))).pop(), + str(max_seq_length), + str(task))) + if os.path.exists(cached_features_file): + logger.info("Load features from cached file {}.".format(cached_features_file)) + features = torch.load(cached_features_file) + else: + logger.info("Create features from dataset file at {}.".format(data_dir)) + label_list = processor.get_labels() + examples = processor.get_dev_examples(data_dir) if evaluate else \ + processor.get_train_examples(data_dir) + features = convert_examples_to_features(examples, + tokenizer, + task=task, + label_list=label_list, + max_length=max_seq_length, + output_mode=output_mode, + ) + logger.info("Save features into cached file {}.".format(cached_features_file)) + torch.save(features, cached_features_file) + # Convert to Tensors and build dataset + all_input_ids = torch.tensor([f.input_ids for f in features], dtype=torch.long) + all_attention_mask = torch.tensor([f.attention_mask for f in features], dtype=torch.long) + all_token_type_ids = torch.tensor([f.token_type_ids for f in features], dtype=torch.long) + all_seq_lengths = torch.tensor([f.seq_length for f in features], dtype=torch.long) + if output_mode == "classification": + all_labels = torch.tensor([f.label for f in features], dtype=torch.long) + elif output_mode == "regression": + all_labels = torch.tensor([f.label for f in features], dtype=torch.float) + dataset = TensorDataset(all_input_ids, all_attention_mask, all_token_type_ids, \ + all_seq_lengths, all_labels) + return dataset + +def convert_examples_to_features( + examples, + tokenizer, + max_length=128, + task=None, + label_list=None, + output_mode="classification", + pad_token=0, + pad_token_segment_id=0, + mask_padding_with_zero=True, +): + processor = transformers.glue_processors[task]() + if label_list is None: + label_list = processor.get_labels() + logger.info("Use label list {} for task {}.".format(label_list, task)) + label_map = {label: i for i, label in enumerate(label_list)} + features = [] + for (ex_index, example) in enumerate(examples): + inputs = tokenizer.encode_plus( + example.text_a, + example.text_b, + add_special_tokens=True, + max_length=max_length, + return_token_type_ids=True, + truncation=True, + ) + input_ids, token_type_ids = inputs["input_ids"], inputs["token_type_ids"] + # The mask has 1 for real tokens and 0 for padding tokens. Only real + # tokens are attended to. + attention_mask = [1 if mask_padding_with_zero else 0] * len(input_ids) + + # Zero-pad up to the sequence length. + seq_length = len(input_ids) + padding_length = max_length - len(input_ids) + + input_ids = input_ids + ([pad_token] * padding_length) + attention_mask = attention_mask + \ + ([0 if mask_padding_with_zero else 1] * padding_length) + token_type_ids = token_type_ids + ([pad_token_segment_id] * padding_length) + + assert len(input_ids) == max_length, \ + "Error with input_ids length {} vs {}".format( + len(input_ids), max_length) + assert len(attention_mask) == max_length, \ + "Error with attention_mask length {} vs {}".format( + len(attention_mask), max_length + ) + assert len(token_type_ids) == max_length, \ + "Error with token_type_ids length {} vs {}".format( + len(token_type_ids), max_length + ) + if output_mode == "classification": + label = label_map[example.label] + elif output_mode == "regression": + label = float(example.label) + else: + raise KeyError(output_mode) + + feats = InputFeatures( + input_ids=input_ids, + attention_mask=attention_mask, + token_type_ids=token_type_ids, + label=label, + seq_length=seq_length, + ) + features.append(feats) + return features + +@dataclass(frozen=True) +class InputFeatures: + """ + A single set of features of data. + Property names are the same names as the corresponding inputs to a model. + Args: + input_ids: Indices of input sequence tokens in the vocabulary. + attention_mask: Mask to avoid performing attention on padding token indices. + Mask values selected in ``[0, 1]``: Usually ``1`` for tokens that are NOT MASKED, + ``0`` for MASKED (padded) tokens. + token_type_ids: (Optional) Segment token indices to indicate first and second + portions of the inputs. Only some models use them. + label: (Optional) Label corresponding to the input. Int for classification problems, + float for regression problems. + seq_length: (Optional) The length of input sequence before padding. + """ + + input_ids: List[int] + attention_mask: Optional[List[int]] = None + token_type_ids: Optional[List[int]] = None + label: Optional[Union[int, float]] = None + seq_length: Optional[List[int]] = None + +class ONNXRTGLUE: + """Computes GLUE score. + + Args: + task (str, default=mrpc): The name of the task. + Choices include mrpc, qqp, qnli, rte, + sts-b, cola, mnli, wnli. + + """ + def __init__(self, task="mrpc"): + assert task in ["mrpc", "qqp", "qnli", "rte", "sts-b", "cola", \ + "mnli", "wnli", "sst-2"], "Unsupported task type" + self.pred_list = None + self.label_list = None + self.task = task + self.return_key = { + "cola": "mcc", + "mrpc": "acc", + "sts-b": "corr", + "qqp": "acc", + "mnli": "mnli/acc", + "qnli": "acc", + "rte": "acc", + "wnli": "acc", + "sst-2": "acc" + } + + def update(self, preds, labels): + """add preds and labels to storage""" + if isinstance(preds, list) and len(preds) == 1: + preds = preds[0] + if isinstance(labels, list) and len(labels) == 1: + labels = labels[0] + if self.pred_list is None: + self.pred_list = preds + self.label_list = labels + else: + self.pred_list = np.append(self.pred_list, preds, axis=0) + self.label_list = np.append(self.label_list, labels, axis=0) + + def reset(self): + """clear preds and labels storage""" + self.pred_list = None + self.label_list = None + + def result(self): + """calculate metric""" + output_mode = transformers.glue_output_modes[self.task] + + if output_mode == "classification": + processed_preds = np.argmax(self.pred_list, axis=1) + elif output_mode == "regression": + processed_preds = np.squeeze(self.pred_list) + result = transformers.glue_compute_metrics(\ + self.task, processed_preds, self.label_list) + return result[self.return_key[self.task]] + +if __name__ == "__main__": + logger.info("Evaluating ONNXRuntime full precision accuracy and performance:") + parser = argparse.ArgumentParser( + description="BERT fine-tune examples for classification/regression tasks.", + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument( + "--model_path", + type=str, + help="Pre-trained model on onnx file" + ) + parser.add_argument( + "--benchmark", + action="store_true", \ + default=False + ) + parser.add_argument( + "--tune", + action="store_true", \ + default=False, + help="whether quantize the model" + ) + parser.add_argument( + "--output_model", + type=str, + help="output model path" + ) + parser.add_argument( + "--mode", + type=str, + help="benchmark mode of performance or accuracy" + ) + parser.add_argument( + "--model_name_or_path", + type=str, + help="pretrained model name or path" + ) + parser.add_argument( + "--data_path", + type=str, + help="input data path" + ) + parser.add_argument( + "--batch_size", + default=8, + type=int, + ) + parser.add_argument( + "--task", + type=str, + default="mrpc", + choices=["mrpc", "qqp", "qnli", "rte", "sts-b", "cola", \ + "mnli", "wnli", "sst-2"], + help="GLUE task name" + ) + parser.add_argument( + "--quant_format", + type=str, + default="QOperator", + choices=["QDQ", "QOperator"], + help="quantization format" + ) + parser.add_argument( + "--dynamic_length", + type=bool, + default=False, + help="dynamic length" + ) + parser.add_argument( + "--max_seq_length", + type=int, + default=128, + help="max sequence length" + ) + parser.add_argument( + "--model_type", + type=str, + default="bert", + choices=["distilbert", "bert", "mobilebert", "roberta"], + help="model type" + ) + args = parser.parse_args() + + dataset = ONNXRTBertDataset(args.model_path, + data_dir=args.data_path, + model_name_or_path=args.model_name_or_path, + max_seq_length=args.max_seq_length, + task=args.task, + model_type=args.model_type, + dynamic_length=args.dynamic_length) + dataloader = DefaultDataLoader(dataset, args.batch_size) + metric = ONNXRTGLUE(args.task) + + def eval_func(model): + metric.reset() + session = onnxruntime.InferenceSession(model.SerializeToString(), + providers=onnxruntime.get_available_providers()) + ort_inputs = {} + len_inputs = len(session.get_inputs()) + inputs_names = [session.get_inputs()[i].name for i in range(len_inputs)] + for idx, (inputs, labels) in enumerate(dataloader): + if not isinstance(labels, list): + labels = [labels] + inputs = inputs[:len_inputs] + for i in range(len_inputs): + ort_inputs.update({inputs_names[i]: inputs[i]}) + predictions = session.run(None, ort_inputs) + metric.update(predictions[0], labels) + return metric.result() + + if args.benchmark: + model = onnx.load(args.model_path) + if args.mode == "performance": + from neural_compressor.benchmark import fit + from neural_compressor.config import BenchmarkConfig + conf = BenchmarkConfig(iteration=100, + cores_per_instance=4, + num_of_instance=1) + fit(model, conf, b_dataloader=dataloader) + elif args.mode == "accuracy": + acc_result = eval_func(model) + print("Batch size = %d" % args.batch_size) + print("Accuracy: %.5f" % acc_result) + + if args.tune: + if onnxruntime.__version__ <= '1.13.1': + from onnxruntime.transformers import optimizer + from onnxruntime.transformers.fusion_options import FusionOptions + opt_options = FusionOptions("bert") + opt_options.enable_embed_layer_norm = False + + model_optimizer = optimizer.optimize_model( + args.model_path, + "bert", + num_heads=4, + hidden_size=512, + optimization_options=opt_options) + model = model_optimizer.model + else: + model = onnx.load(args.model_path) + + from neural_compressor import quantization, PostTrainingQuantConfig + config = PostTrainingQuantConfig(approach="static", + quant_format=args.quant_format, + recipes={"optypes_to_exclude_output_quant": ["MatMul"]}) + q_model = quantization.fit(model, + config, + eval_func=eval_func, + calib_dataloader=dataloader) + q_model.save(args.output_model) + diff --git a/examples/onnxrt/nlp/mobilebert/quantization/ptq_static/prepare_data.sh b/examples/onnxrt/nlp/mobilebert/quantization/ptq_static/prepare_data.sh new file mode 100644 index 00000000000..8e434a5c521 --- /dev/null +++ b/examples/onnxrt/nlp/mobilebert/quantization/ptq_static/prepare_data.sh @@ -0,0 +1,34 @@ +#!/bin/bash +set -x + +function main { + init_params "$@" + download_data + +} + +# init params +function init_params { + + for var in "$@" + do + case $var in + --data_dir=*) + data_dir=$(echo $var |cut -f2 -d=) + ;; + --task_name=*) + task_name=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_tuning +function download_data { + wget https://raw.githubusercontent.com/huggingface/transformers/f98ef14d161d7bcdc9808b5ec399981481411cc1/utils/download_glue_data.py + python download_glue_data.py --data_dir=${data_dir} --tasks=${task_name} +} + +main "$@" + diff --git a/examples/onnxrt/nlp/mobilebert/quantization/ptq_static/prepare_model.sh b/examples/onnxrt/nlp/mobilebert/quantization/ptq_static/prepare_model.sh new file mode 100644 index 00000000000..8d6eb064930 --- /dev/null +++ b/examples/onnxrt/nlp/mobilebert/quantization/ptq_static/prepare_model.sh @@ -0,0 +1,39 @@ +#!/bin/bash +set -x + +function main { + init_params "$@" + export_model + +} + +# init params +function init_params { + + for var in "$@" + do + case $var in + --input_dir=*) + input_dir=$(echo $var |cut -f2 -d=) + ;; + --task_name=*) + task_name=$(echo $var |cut -f2 -d=) + ;; + --max_len=*) + max_len=$(echo $var |cut -f2 -d=) + ;; + --output_model=*) + output_model=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_tuning +function export_model { + python export.py --input_dir ${input_dir} --task_name ${task_name} --output_model ${output_model} +} + +main "$@" + diff --git a/examples/onnxrt/nlp/mobilebert/quantization/ptq_static/requirements.txt b/examples/onnxrt/nlp/mobilebert/quantization/ptq_static/requirements.txt new file mode 100644 index 00000000000..1fb753da72e --- /dev/null +++ b/examples/onnxrt/nlp/mobilebert/quantization/ptq_static/requirements.txt @@ -0,0 +1,7 @@ +torch +transformers +onnx +onnxruntime +coloredlogs +sympy +onnxruntime-extensions; python_version < '3.10' diff --git a/examples/onnxrt/nlp/mobilebert/quantization/ptq_static/run_benchmark.sh b/examples/onnxrt/nlp/mobilebert/quantization/ptq_static/run_benchmark.sh new file mode 100644 index 00000000000..cc58c5c322b --- /dev/null +++ b/examples/onnxrt/nlp/mobilebert/quantization/ptq_static/run_benchmark.sh @@ -0,0 +1,62 @@ +#!/bin/bash +set -x + +function main { + + init_params "$@" + run_benchmark + +} + +# init params +function init_params { + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --mode=*) + mode=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + --batch_size=*) + batch_size=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_benchmark +function run_benchmark { + if [[ ${mode} == "accuracy" ]]; then + dynamic_length=False + elif [[ ${mode} == "performance" ]]; then + dynamic_length=True + else + echo "Error: No such mode: ${mode}" + exit 1 + fi + + model_name_or_path="google/mobilebert-uncased" + task_name="mrpc" + model_type="mobilebert" + + python main.py \ + --model_path ${input_model} \ + --model_name_or_path ${model_name_or_path} \ + --data_path ${dataset_location} \ + --task ${task_name} \ + --batch_size ${batch_size} \ + --model_type ${model_type} \ + --mode ${mode} \ + --dynamic_length ${dynamic_length} \ + --benchmark + +} + +main "$@" + diff --git a/examples/onnxrt/nlp/mobilebert/quantization/ptq_static/run_tuning.sh b/examples/onnxrt/nlp/mobilebert/quantization/ptq_static/run_tuning.sh new file mode 100644 index 00000000000..94045bad9e4 --- /dev/null +++ b/examples/onnxrt/nlp/mobilebert/quantization/ptq_static/run_tuning.sh @@ -0,0 +1,53 @@ +#!/bin/bash +set -x + +function main { + init_params "$@" + run_tuning +} + +# init params +function init_params { + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --output_model=*) + output_model=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + --quant_format=*) + quant_format=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_tuning +function run_tuning { + model_name_or_path="google/mobilebert-uncased" + batch_size=8 + task_name="mrpc" + model_type="mobilebert" + + python main.py \ + --model_path ${input_model} \ + --output_model ${output_model} \ + --model_name_or_path ${model_name_or_path} \ + --data_path ${dataset_location} \ + --task ${task_name} \ + --batch_size ${batch_size} \ + --quant_format ${quant_format} \ + --model_type ${model_type} \ + --tune +} + +main "$@" + + + diff --git a/examples/onnxrt/nlp/onnx_model_zoo/BiDAF/quantization/ptq/README.md b/examples/onnxrt/nlp/onnx_model_zoo/BiDAF/quantization/ptq_dynamic/README.md similarity index 83% rename from examples/onnxrt/nlp/onnx_model_zoo/BiDAF/quantization/ptq/README.md rename to examples/onnxrt/nlp/onnx_model_zoo/BiDAF/quantization/ptq_dynamic/README.md index fe8cc802109..567bbf71ea6 100644 --- a/examples/onnxrt/nlp/onnx_model_zoo/BiDAF/quantization/ptq/README.md +++ b/examples/onnxrt/nlp/onnx_model_zoo/BiDAF/quantization/ptq_dynamic/README.md @@ -23,15 +23,23 @@ wget https://github.com/onnx/models/raw/main/text/machine_comprehension/bidirect ## 3. Prepare Dataset Download SQuAD dataset from [SQuAD dataset link](https://rajpurkar.github.io/SQuAD-explorer/). +Dataset directories: + +```bash +squad +├── dev-v1.1.json +└── train-v1.1.json +``` + # Run ## 1. Quantization -Quantize model with dynamic quantization: +Dynamic quantization: ```bash bash run_tuning.sh --input_model=path/to/model \ # model path as *.onnx - --dataset_location=path/to/squad_v1/dev-v1.1.json + --dataset_location=path/to/squad/dev-v1.1.json --output_model=path/to/model_tune ``` @@ -39,6 +47,6 @@ bash run_tuning.sh --input_model=path/to/model \ # model path as *.onnx ```bash bash run_benchmark.sh --input_model=path/to/model \ # model path as *.onnx - --dataset_location=path/to/squad_v1/dev-v1.1.json + --dataset_location=path/to/squad/dev-v1.1.json --mode=performance # or accuracy ``` diff --git a/examples/onnxrt/nlp/onnx_model_zoo/BiDAF/quantization/ptq/main.py b/examples/onnxrt/nlp/onnx_model_zoo/BiDAF/quantization/ptq_dynamic/main.py similarity index 98% rename from examples/onnxrt/nlp/onnx_model_zoo/BiDAF/quantization/ptq/main.py rename to examples/onnxrt/nlp/onnx_model_zoo/BiDAF/quantization/ptq_dynamic/main.py index 3eef3c68151..30391852c2d 100644 --- a/examples/onnxrt/nlp/onnx_model_zoo/BiDAF/quantization/ptq/main.py +++ b/examples/onnxrt/nlp/onnx_model_zoo/BiDAF/quantization/ptq_dynamic/main.py @@ -158,7 +158,8 @@ def result(self): def eval_func(model): metric.reset() - session = ort.InferenceSession(model.SerializeToString(), None) + session = ort.InferenceSession(model.SerializeToString(), + providers=ort.get_available_providers()) ort_inputs = {} len_inputs = len(session.get_inputs()) inputs_names = [session.get_inputs()[i].name for i in range(len_inputs)] diff --git a/examples/onnxrt/nlp/onnx_model_zoo/BiDAF/quantization/ptq/requirements.txt b/examples/onnxrt/nlp/onnx_model_zoo/BiDAF/quantization/ptq_dynamic/requirements.txt similarity index 94% rename from examples/onnxrt/nlp/onnx_model_zoo/BiDAF/quantization/ptq/requirements.txt rename to examples/onnxrt/nlp/onnx_model_zoo/BiDAF/quantization/ptq_dynamic/requirements.txt index 203bd54a0ba..a3beb86afbf 100644 --- a/examples/onnxrt/nlp/onnx_model_zoo/BiDAF/quantization/ptq/requirements.txt +++ b/examples/onnxrt/nlp/onnx_model_zoo/BiDAF/quantization/ptq_dynamic/requirements.txt @@ -4,3 +4,4 @@ coloredlogs sympy onnxruntime-extensions; python_version < '3.10' nltk +tqdm diff --git a/examples/onnxrt/nlp/onnx_model_zoo/BiDAF/quantization/ptq/run_benchmark.sh b/examples/onnxrt/nlp/onnx_model_zoo/BiDAF/quantization/ptq_dynamic/run_benchmark.sh similarity index 100% rename from examples/onnxrt/nlp/onnx_model_zoo/BiDAF/quantization/ptq/run_benchmark.sh rename to examples/onnxrt/nlp/onnx_model_zoo/BiDAF/quantization/ptq_dynamic/run_benchmark.sh diff --git a/examples/onnxrt/nlp/onnx_model_zoo/BiDAF/quantization/ptq/run_tuning.sh b/examples/onnxrt/nlp/onnx_model_zoo/BiDAF/quantization/ptq_dynamic/run_tuning.sh similarity index 100% rename from examples/onnxrt/nlp/onnx_model_zoo/BiDAF/quantization/ptq/run_tuning.sh rename to examples/onnxrt/nlp/onnx_model_zoo/BiDAF/quantization/ptq_dynamic/run_tuning.sh diff --git a/examples/onnxrt/nlp/onnx_model_zoo/bert-squad/quantization/ptq/README.md b/examples/onnxrt/nlp/onnx_model_zoo/bert-squad/quantization/ptq_dynamic/README.md similarity index 85% rename from examples/onnxrt/nlp/onnx_model_zoo/bert-squad/quantization/ptq/README.md rename to examples/onnxrt/nlp/onnx_model_zoo/bert-squad/quantization/ptq_dynamic/README.md index ad1fc643a0c..1746a09f5b7 100644 --- a/examples/onnxrt/nlp/onnx_model_zoo/bert-squad/quantization/ptq/README.md +++ b/examples/onnxrt/nlp/onnx_model_zoo/bert-squad/quantization/ptq_dynamic/README.md @@ -30,23 +30,31 @@ wget https://github.com/onnx/models/raw/main/text/machine_comprehension/bert-squ ## 3. Prepare Dataset Download SQuAD dataset from [SQuAD dataset link](https://rajpurkar.github.io/SQuAD-explorer/). +Dataset directories: + +```bash +squad +├── dev-v1.1.json +└── train-v1.1.json +``` + # Run ## 1. Quantization -Quantize model with dynamic quantization: +Dynamic quantization: ```bash bash run_tuning.sh --input_model=/path/to/model \ # model path as *.onnx --output_model=/path/to/model_tune \ - --dataset_location=/path/to/SQuAD/dataset + --dataset_location=/path/to/squad ``` ## 2. Benchmark ```bash bash run_benchmark.sh --input_model=/path/to/model \ # model path as *.onnx - --dataset_location=/path/to/SQuAD/dataset \ + --dataset_location=/path/to/squad \ --batch_size=batch_size \ --mode=performance # or accuracy ``` diff --git a/examples/onnxrt/nlp/onnx_model_zoo/bert-squad/quantization/ptq/main.py b/examples/onnxrt/nlp/onnx_model_zoo/bert-squad/quantization/ptq_dynamic/main.py similarity index 99% rename from examples/onnxrt/nlp/onnx_model_zoo/bert-squad/quantization/ptq/main.py rename to examples/onnxrt/nlp/onnx_model_zoo/bert-squad/quantization/ptq_dynamic/main.py index 60aa2fca259..8c241347cde 100644 --- a/examples/onnxrt/nlp/onnx_model_zoo/bert-squad/quantization/ptq/main.py +++ b/examples/onnxrt/nlp/onnx_model_zoo/bert-squad/quantization/ptq_dynamic/main.py @@ -120,7 +120,6 @@ def eval_func(model): if args.tune: from neural_compressor import quantization, PostTrainingQuantConfig - config = PostTrainingQuantConfig(approach='dynamic') q_model = quantization.fit(model, config, diff --git a/examples/onnxrt/nlp/onnx_model_zoo/bert-squad/quantization/ptq/requirements.txt b/examples/onnxrt/nlp/onnx_model_zoo/bert-squad/quantization/ptq_dynamic/requirements.txt similarity index 100% rename from examples/onnxrt/nlp/onnx_model_zoo/bert-squad/quantization/ptq/requirements.txt rename to examples/onnxrt/nlp/onnx_model_zoo/bert-squad/quantization/ptq_dynamic/requirements.txt diff --git a/examples/onnxrt/nlp/onnx_model_zoo/bert-squad/quantization/ptq/run_benchmark.sh b/examples/onnxrt/nlp/onnx_model_zoo/bert-squad/quantization/ptq_dynamic/run_benchmark.sh similarity index 100% rename from examples/onnxrt/nlp/onnx_model_zoo/bert-squad/quantization/ptq/run_benchmark.sh rename to examples/onnxrt/nlp/onnx_model_zoo/bert-squad/quantization/ptq_dynamic/run_benchmark.sh diff --git a/examples/onnxrt/nlp/onnx_model_zoo/bert-squad/quantization/ptq/run_onnx_squad.py b/examples/onnxrt/nlp/onnx_model_zoo/bert-squad/quantization/ptq_dynamic/run_onnx_squad.py similarity index 100% rename from examples/onnxrt/nlp/onnx_model_zoo/bert-squad/quantization/ptq/run_onnx_squad.py rename to examples/onnxrt/nlp/onnx_model_zoo/bert-squad/quantization/ptq_dynamic/run_onnx_squad.py diff --git a/examples/onnxrt/nlp/onnx_model_zoo/bert-squad/quantization/ptq/run_tuning.sh b/examples/onnxrt/nlp/onnx_model_zoo/bert-squad/quantization/ptq_dynamic/run_tuning.sh similarity index 100% rename from examples/onnxrt/nlp/onnx_model_zoo/bert-squad/quantization/ptq/run_tuning.sh rename to examples/onnxrt/nlp/onnx_model_zoo/bert-squad/quantization/ptq_dynamic/run_tuning.sh diff --git a/examples/onnxrt/nlp/onnx_model_zoo/bert-squad/quantization/ptq/squad_evaluate.py b/examples/onnxrt/nlp/onnx_model_zoo/bert-squad/quantization/ptq_dynamic/squad_evaluate.py similarity index 100% rename from examples/onnxrt/nlp/onnx_model_zoo/bert-squad/quantization/ptq/squad_evaluate.py rename to examples/onnxrt/nlp/onnx_model_zoo/bert-squad/quantization/ptq_dynamic/squad_evaluate.py diff --git a/examples/onnxrt/nlp/onnx_model_zoo/bert-squad/quantization/ptq/tokenization.py b/examples/onnxrt/nlp/onnx_model_zoo/bert-squad/quantization/ptq_dynamic/tokenization.py similarity index 100% rename from examples/onnxrt/nlp/onnx_model_zoo/bert-squad/quantization/ptq/tokenization.py rename to examples/onnxrt/nlp/onnx_model_zoo/bert-squad/quantization/ptq_dynamic/tokenization.py diff --git a/examples/onnxrt/nlp/onnx_model_zoo/gpt2/quantization/ptq/README.md b/examples/onnxrt/nlp/onnx_model_zoo/gpt2/quantization/ptq_dynamic/README.md similarity index 77% rename from examples/onnxrt/nlp/onnx_model_zoo/gpt2/quantization/ptq/README.md rename to examples/onnxrt/nlp/onnx_model_zoo/gpt2/quantization/ptq_dynamic/README.md index 1e60257e9cd..bf50643272a 100644 --- a/examples/onnxrt/nlp/onnx_model_zoo/gpt2/quantization/ptq/README.md +++ b/examples/onnxrt/nlp/onnx_model_zoo/gpt2/quantization/ptq_dynamic/README.md @@ -24,22 +24,31 @@ python export.py ## 3. Prepare Dataset Please download [WikiText-2 dataset](https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-raw-v1.zip). +Dataset directories: + +```bash +wikitext-2-raw +├── wiki.test.raw +├── wiki.train.raw +└── wiki.valid.raw +``` + # Run ## 1. Quantization -Quantize model with dynamic quantization: +Dynamic quantization: ```bash -bash run_tuning.sh --dataset_location=/path/to/wikitext-2-raw/ \ +bash run_tuning.sh --dataset_location=/path/to/wikitext-2-raw/wiki.test.raw \ --input_model=path/to/model \ # model path as *.onnx - --output_model=path/to/model_tune + --output_model=path/to/model_tune # model path as *.onnx ``` ## 2. Benchmark ```bash -bash run_benchmark.sh --dataset_location=/path/to/wikitext-2-raw/ \ +bash run_benchmark.sh --dataset_location=/path/to/wikitext-2-raw/wiki.test.raw \ --input_model=path/to/model \ # model path as *.onnx --batch_size=batch_size \ --mode=performance # or accuracy diff --git a/examples/onnxrt/nlp/onnx_model_zoo/gpt2/quantization/ptq/export.py b/examples/onnxrt/nlp/onnx_model_zoo/gpt2/quantization/ptq_dynamic/export.py similarity index 100% rename from examples/onnxrt/nlp/onnx_model_zoo/gpt2/quantization/ptq/export.py rename to examples/onnxrt/nlp/onnx_model_zoo/gpt2/quantization/ptq_dynamic/export.py diff --git a/examples/onnxrt/nlp/onnx_model_zoo/gpt2/quantization/ptq/gpt2.py b/examples/onnxrt/nlp/onnx_model_zoo/gpt2/quantization/ptq_dynamic/gpt2.py similarity index 86% rename from examples/onnxrt/nlp/onnx_model_zoo/gpt2/quantization/ptq/gpt2.py rename to examples/onnxrt/nlp/onnx_model_zoo/gpt2/quantization/ptq_dynamic/gpt2.py index 4ea6fa269db..f861724dea8 100644 --- a/examples/onnxrt/nlp/onnx_model_zoo/gpt2/quantization/ptq/gpt2.py +++ b/examples/onnxrt/nlp/onnx_model_zoo/gpt2/quantization/ptq_dynamic/gpt2.py @@ -86,7 +86,10 @@ def __len__(self): return len(self.examples) def __getitem__(self, item): - return torch.tensor(self.examples[item]) + inputs = torch.tensor(self.examples[item]) + inputs = np.array(inputs) + inputs = np.expand_dims(inputs, axis=0) + return inputs, inputs def load_and_cache_examples(args, tokenizer, evaluate=False): dataset = TextDataset(tokenizer, args, file_path=args.data_path, block_size=args.block_size) @@ -95,14 +98,9 @@ def load_and_cache_examples(args, tokenizer, evaluate=False): def evaluate(args, model, tokenizer, prefix=""): eval_dataset = load_and_cache_examples(args, tokenizer, evaluate=True) - args.eval_batch_size = args.per_gpu_eval_batch_size * max(1, args.n_gpu) eval_sampler = SequentialSampler(eval_dataset) eval_dataloader = DataLoader(eval_dataset, sampler=eval_sampler, batch_size=args.eval_batch_size) - # multi-gpu evaluate - if args.n_gpu > 1: - model = torch.nn.DataParallel(model) - # Eval! logger.info("***** Running evaluation {} *****".format(prefix)) logger.info(" Num examples = %d", len(eval_dataset)) @@ -120,15 +118,13 @@ def evaluate(args, model, tokenizer, prefix=""): inputs_names = [session.get_inputs()[i].name for i in range(len_inputs)] ort_inputs = {} - for idx, batch in enumerate(tqdm(eval_dataloader, desc="Evaluating")): + for idx, (inputs, labels) in enumerate(tqdm(eval_dataloader, desc="Evaluating")): if nb_eval_steps >= args.warmup_steps: start = timeit.default_timer() - inputs, labels = (batch, batch) inputs = inputs.to(args.device) labels = labels.to(args.device) for i in range(len_inputs): inputs = np.array(inputs) - inputs = np.expand_dims(inputs, axis=0) ort_inputs.update({inputs_names[i]: inputs}) predictions = session.run(None, ort_inputs) lm_logits = predictions[0] @@ -193,7 +189,7 @@ def main(): help="Optional input sequence length after tokenization." "The training dataset will be truncated in block of this size for training." "Default to the model max input length for single sentence inputs (take into account special tokens).") - parser.add_argument("--per_gpu_eval_batch_size", default=1, type=int, + parser.add_argument("--eval_batch_size", default=1, type=int, help="Batch size per GPU/CPU for evaluation.") parser.add_argument('--overwrite_cache', action='store_true', help="Overwrite the cached training and evaluation sets") @@ -213,8 +209,7 @@ def main(): help='For accuracy measurement only.') args = parser.parse_args() - device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu") - args.n_gpu = torch.cuda.device_count() + device = torch.device("cpu") args.device = device # Setup logging @@ -241,25 +236,37 @@ def eval_func(model): return evaluate(args, model, tokenizer) if args.benchmark: - evaluate(args, model, tokenizer) + if args.mode == 'performance': + from neural_compressor.benchmark import fit + from neural_compressor.config import BenchmarkConfig + from neural_compressor.data.dataloaders.onnxrt_dataloader import DefaultDataLoader + conf = BenchmarkConfig(iteration=100, + cores_per_instance=4, + num_of_instance=1) + b_dataloader = DefaultDataLoader(ds, args.eval_batch_size) + fit(model, conf, b_dataloader=b_dataloader) + else: + evaluate(args, model, tokenizer) if args.tune: - # GPT2 optimizer - from onnxruntime.transformers import optimizer - from onnxruntime.transformers.onnx_model_bert import BertOptimizationOptions - opt_options = BertOptimizationOptions('gpt2') - opt_options.enable_embed_layer_norm = False - - model_optimizer = optimizer.optimize_model( - args.model_path, - 'gpt2', - num_heads=12, - hidden_size=768, - optimization_options=opt_options) - model = model_optimizer.model - - from neural_compressor import quantization, PostTrainingQuantConfig - from neural_compressor.config import AccuracyCriterion + if ort.__version__ <= '1.13.1': + from onnxruntime.transformers import optimizer + from onnxruntime.transformers.fusion_options import FusionOptions + opt_options = FusionOptions('gpt2') + opt_options.enable_embed_layer_norm = False + + model_optimizer = optimizer.optimize_model( + args.model_path, + 'gpt2', + num_heads=12, + hidden_size=768, + optimization_options=opt_options) + model = model_optimizer.model + else: + model = onnx.load(args.model_path) + + from neural_compressor import quantization + from neural_compressor.config import AccuracyCriterion, PostTrainingQuantConfig accuracy_criterion = AccuracyCriterion() accuracy_criterion.higher_is_better = False accuracy_criterion.relative = 0.11 diff --git a/examples/onnxrt/nlp/onnx_model_zoo/gpt2/quantization/ptq/requirements.txt b/examples/onnxrt/nlp/onnx_model_zoo/gpt2/quantization/ptq_dynamic/requirements.txt similarity index 95% rename from examples/onnxrt/nlp/onnx_model_zoo/gpt2/quantization/ptq/requirements.txt rename to examples/onnxrt/nlp/onnx_model_zoo/gpt2/quantization/ptq_dynamic/requirements.txt index e7071dde892..0a0cd437a53 100644 --- a/examples/onnxrt/nlp/onnx_model_zoo/gpt2/quantization/ptq/requirements.txt +++ b/examples/onnxrt/nlp/onnx_model_zoo/gpt2/quantization/ptq_dynamic/requirements.txt @@ -5,3 +5,4 @@ coloredlogs torch sympy onnxruntime-extensions; python_version < '3.10' +tqdm diff --git a/examples/onnxrt/nlp/onnx_model_zoo/gpt2/quantization/ptq/run_benchmark.sh b/examples/onnxrt/nlp/onnx_model_zoo/gpt2/quantization/ptq_dynamic/run_benchmark.sh similarity index 88% rename from examples/onnxrt/nlp/onnx_model_zoo/gpt2/quantization/ptq/run_benchmark.sh rename to examples/onnxrt/nlp/onnx_model_zoo/gpt2/quantization/ptq_dynamic/run_benchmark.sh index 8198c3a9fa7..d8df42a2232 100644 --- a/examples/onnxrt/nlp/onnx_model_zoo/gpt2/quantization/ptq/run_benchmark.sh +++ b/examples/onnxrt/nlp/onnx_model_zoo/gpt2/quantization/ptq_dynamic/run_benchmark.sh @@ -54,12 +54,11 @@ function define_mode { function run_benchmark { model_type='gpt2' model_name_or_path='gpt2' - test_data='wiki.test.raw' python gpt2.py --model_path ${input_model} \ - --data_path ${dataset_location}${test_data} \ + --data_path ${dataset_location} \ --model_type ${model_type} \ --model_name_or_path ${model_name_or_path} \ - --per_gpu_eval_batch_size ${batch_size} \ + --eval_batch_size ${batch_size} \ --benchmark \ --mode ${mode} \ ${mode_cmd} diff --git a/examples/onnxrt/nlp/onnx_model_zoo/gpt2/quantization/ptq/run_tuning.sh b/examples/onnxrt/nlp/onnx_model_zoo/gpt2/quantization/ptq_dynamic/run_tuning.sh similarity index 88% rename from examples/onnxrt/nlp/onnx_model_zoo/gpt2/quantization/ptq/run_tuning.sh rename to examples/onnxrt/nlp/onnx_model_zoo/gpt2/quantization/ptq_dynamic/run_tuning.sh index 4785027fcb6..426fce9b9ba 100644 --- a/examples/onnxrt/nlp/onnx_model_zoo/gpt2/quantization/ptq/run_tuning.sh +++ b/examples/onnxrt/nlp/onnx_model_zoo/gpt2/quantization/ptq_dynamic/run_tuning.sh @@ -29,9 +29,8 @@ function init_params { function run_tuning { model_type='gpt2' model_name_or_path='gpt2' - test_data='wiki.test.raw' python gpt2.py --model_path ${input_model} \ - --data_path ${dataset_location}${test_data} \ + --data_path ${dataset_location} \ --model_type ${model_type} \ --model_name_or_path ${model_name_or_path} \ --tune \ diff --git a/examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq/README.md b/examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_dynamic/README.md similarity index 84% rename from examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq/README.md rename to examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_dynamic/README.md index 548e3f95501..efa5703a598 100644 --- a/examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq/README.md +++ b/examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_dynamic/README.md @@ -32,11 +32,19 @@ python -m tf2onnx.convert --opset 11 --tflite mobilebert_float_384_20200602.tfli ## 3. Prepare Dataset Download SQuAD dataset from [SQuAD dataset link](https://rajpurkar.github.io/SQuAD-explorer/). +Dataset directories: + +```bash +squad +├── dev-v1.1.json +└── train-v1.1.json +``` + # Run ## 1. Quantization -Quantize model with dynamic quantization: +Dynamic quantization: ```bash bash run_tuning.sh --input_model=/path/to/model \ # model path as *.onnx @@ -44,15 +52,6 @@ bash run_tuning.sh --input_model=/path/to/model \ # model path as *.onnx --dataset_location=/path/to/SQuAD/dataset ``` -Quantize model with QDQ mode: - -```bash -bash run_tuning.sh --input_model=/path/to/model \ # model path as *.onnx - --output_model=/path/to/model_tune \ - --dataset_location=/path/to/SQuAD/dataset \ - --quant_format='QDQ' -``` - ## 2. Benchmark ```bash diff --git a/examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_dynamic/main.py b/examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_dynamic/main.py new file mode 100644 index 00000000000..ff6fa8a6ca0 --- /dev/null +++ b/examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_dynamic/main.py @@ -0,0 +1,163 @@ +import numpy as np +import onnxruntime +import onnx +import tokenization +import os +from run_onnx_squad import * +import json +from run_onnx_squad import read_squad_examples, convert_examples_to_features, write_predictions +from torch.utils.data import Dataset +from torch.utils.data import DataLoader +import tqdm +from squad_evaluate import evaluate + +max_seq_length = 384 +doc_stride = 128 +max_query_length = 64 +n_best_size = 20 +max_answer_length = 30 + +def parse_dummy_input(model, benchmark_nums, max_seq_length): + session = onnxruntime.InferenceSession(model.SerializeToString(), None, + providers=onnxruntime.get_available_providers()) + shapes = [] + lows = [] + highs = [] + for i in range(len(session.get_inputs())): + input_name = session.get_inputs()[i].name + input_shapes = session.get_inputs()[i].shape + shape = [benchmark_nums] + shape.append(max_seq_length) + if input_name == "input_ids": + low = 0.0 + high = 1000.0 + else: + low = 0.0 + high = 2.0 + shapes.append(tuple(shape)) + lows.append(low) + highs.append(high) + return shapes, lows, highs + +class squadDataset(Dataset): + def __init__(self, input_ids, input_mask, segment_ids, bs): + self.input_ids = input_ids + self.input_mask = input_mask + self.segment_ids = segment_ids + self.bs = bs + + def __getitem__(self, index): + return (self.input_ids[index:index + self.bs][0], self.input_mask[index:index + self.bs][0], self.segment_ids[index:index + self.bs][0]), 0 + + def __len__(self): + assert len(self.input_ids) == len(self.input_mask) + assert len(self.input_ids) == len(self.segment_ids) + return len(self.input_ids) + +def evaluate_squad(model, dataloader, input_ids, eval_examples, extra_data, input_file): + session = onnxruntime.InferenceSession(model.SerializeToString(), None, + providers=onnxruntime.get_available_providers()) + for output_meta in session.get_outputs(): + print(output_meta) + for input_meta in session.get_inputs(): + print(input_meta) + n = len(input_ids) + bs = 1 + all_results = [] + start = timer() + for idx, (batch, label) in tqdm.tqdm(enumerate(dataloader), desc="eval"): + data = {"input_ids": np.array(batch[0]), + "input_mask": np.array(batch[1]), + "segment_ids": np.array(batch[2])} + result = session.run(["end_logits","start_logits"], data) + in_batch = result[0].shape[0] + start_logits = [float(x) for x in result[1][0].flat] + end_logits = [float(x) for x in result[0][0].flat] + for i in range(0, in_batch): + unique_id = len(all_results) + all_results.append(RawResult(unique_id=unique_id, start_logits=start_logits,end_logits=end_logits)) + + # postprocessing + output_dir = './output' + os.makedirs(output_dir, exist_ok=True) + output_prediction_file = os.path.join(output_dir, "predictions_mobilebert_fp32.json") + output_nbest_file = os.path.join(output_dir, "nbest_predictions_mobilebert_fp32.json") + write_predictions(eval_examples, extra_data, all_results, + n_best_size, max_answer_length, + True, output_prediction_file, output_nbest_file) + + with open(input_file) as dataset_file: + dataset_json = json.load(dataset_file) + expected_version = '1.1' + if (dataset_json['version'] != expected_version): + print('Evaluation expects v-' + expected_version + + ', but got dataset with v-' + dataset_json['version'], + file=sys.stderr) + dataset = dataset_json['data'] + with open(output_prediction_file) as prediction_file: + predictions = json.load(prediction_file) + res = evaluate(dataset, predictions) + return res['f1'] + +def main(): + parser = argparse.ArgumentParser(description='onnx squad') + parser.add_argument('--model_path', required=True, type=str, + help='model path') + parser.add_argument('--save_path', type=str, default='mobilbert_tune.onnx', + help='save tuned model path') + parser.add_argument('--data_path', type=str, + help='datseset path') + parser.add_argument('--tune', action='store_true', default=False, + help='run neural_compressor tune') + parser.add_argument('--benchmark', action='store_true', default=False, + help='run benchmark') + parser.add_argument('--mode', type=str, default='performance', + help="benchmark mode of performance or accuracy") + parser.add_argument('--benchmark_nums', type=int, default=1000, + help="Benchmark numbers of samples") + parser.add_argument('--batch_size', type=int, default=1, + help="batch size for benchmark") + args = parser.parse_args() + + model = onnx.load(args.model_path) + + predict_file = 'dev-v1.1.json' + input_file=os.path.join(args.data_path, predict_file) + eval_examples = read_squad_examples(input_file=input_file) + + vocab_file = os.path.join('uncased_L-12_H-768_A-12', 'vocab.txt') + tokenizer = tokenization.FullTokenizer(vocab_file=vocab_file, do_lower_case=True) + input_ids, input_mask, segment_ids, extra_data = convert_examples_to_features(eval_examples, tokenizer, + max_seq_length, doc_stride, max_query_length) + + dataset = squadDataset(input_ids, input_mask, segment_ids, 1) + eval_dataloader = DataLoader(dataset, batch_size=args.batch_size) + + def eval_func(model): + return evaluate_squad(model, eval_dataloader, input_ids, eval_examples, extra_data, input_file) + + if args.tune: + from neural_compressor import quantization, PostTrainingQuantConfig + config = PostTrainingQuantConfig(approach='dynamic', + calibration_sampling_size=[8]) + q_model = quantization.fit(model, + config, + eval_func=eval_func) + q_model.save(args.save_path) + + if args.benchmark: + if args.mode == 'performance': + from neural_compressor.benchmark import fit + from neural_compressor.config import BenchmarkConfig + conf = BenchmarkConfig(iteration=100, + cores_per_instance=4, + num_of_instance=7) + fit(model, conf, b_dataloader=eval_dataloader) + elif args.mode == 'accuracy': + acc_result = eval_func(model) + print("Batch size = %d" % args.batch_size) + print("Accuracy: %.5f" % acc_result) + + +if __name__ == "__main__": + main() diff --git a/examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq/requirements.txt b/examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_dynamic/requirements.txt similarity index 97% rename from examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq/requirements.txt rename to examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_dynamic/requirements.txt index 3611f3a20b1..ea5a4dfb3f4 100644 --- a/examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq/requirements.txt +++ b/examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_dynamic/requirements.txt @@ -5,3 +5,4 @@ tf2onnx torch onnxruntime-extensions; python_version < '3.10' pillow>=8.1.0 # not directly required, pinned by Snyk to avoid a vulnerability +tqdm diff --git a/examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_dynamic/run_benchmark.sh b/examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_dynamic/run_benchmark.sh new file mode 100644 index 00000000000..5bafd49e6ce --- /dev/null +++ b/examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_dynamic/run_benchmark.sh @@ -0,0 +1,42 @@ +#!/bin/bash +set -x + +function main { + init_params "$@" + run_benchmark + +} + +# init params +function init_params { + + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --mode=*) + mode=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_benchmark +function run_benchmark { + batch_size=1 + python main.py \ + --model_path ${input_model} \ + --mode ${mode} \ + --data_path ${dataset_location} \ + --batch_size ${batch_size} \ + --benchmark + +} + +main "$@" diff --git a/examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq/run_onnx_squad.py b/examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_dynamic/run_onnx_squad.py similarity index 99% rename from examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq/run_onnx_squad.py rename to examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_dynamic/run_onnx_squad.py index a4ecda0003b..076e1fd29ef 100644 --- a/examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq/run_onnx_squad.py +++ b/examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_dynamic/run_onnx_squad.py @@ -538,7 +538,9 @@ def main(): convert_examples_to_features(eval_examples, tokenizer, args.max_seq_length, args.doc_stride, args.max_query_length) - sess = onnxrt.InferenceSession(args.model, sess_options) + sess = onnxrt.InferenceSession(args.model, + sess_options, + providers=onnxrt.get_available_providers()) for input_meta in sess.get_inputs(): print(input_meta) n = len(input_ids) diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/DUC/quantization/ptq/run_tuning.sh b/examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_dynamic/run_tuning.sh similarity index 93% rename from examples/onnxrt/object_detection/onnx_model_zoo/DUC/quantization/ptq/run_tuning.sh rename to examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_dynamic/run_tuning.sh index d254d7a0470..7b955833a28 100644 --- a/examples/onnxrt/object_detection/onnx_model_zoo/DUC/quantization/ptq/run_tuning.sh +++ b/examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_dynamic/run_tuning.sh @@ -16,12 +16,12 @@ function init_params { --input_model=*) input_model=$(echo $var |cut -f2 -d=) ;; - --dataset_location=*) - dataset_location=$(echo $var |cut -f2 -d=) - ;; --output_model=*) output_model=$(echo $var |cut -f2 -d=) ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; esac done @@ -31,7 +31,7 @@ function init_params { function run_tuning { python main.py \ --model_path ${input_model} \ - --output_model ${output_model} \ + --save_path ${output_model} \ --data_path ${dataset_location} \ --tune } diff --git a/examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq/squad_evaluate.py b/examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_dynamic/squad_evaluate.py similarity index 100% rename from examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq/squad_evaluate.py rename to examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_dynamic/squad_evaluate.py diff --git a/examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq/tokenization.py b/examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_dynamic/tokenization.py similarity index 100% rename from examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq/tokenization.py rename to examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_dynamic/tokenization.py diff --git a/examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_static/README.md b/examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_static/README.md new file mode 100644 index 00000000000..3c68eb75b37 --- /dev/null +++ b/examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_static/README.md @@ -0,0 +1,63 @@ +Step-by-Step +============ + +This example load a language translation model and confirm its accuracy and speed based on [SQuAD]((https://rajpurkar.github.io/SQuAD-explorer/)) task. + +# Prerequisite + +## 1. Environment +```shell +pip install neural-compressor +pip install -r requirements.txt +``` +> Note: Validated ONNX Runtime [Version](/docs/source/installation_guide.md#validated-software-environment). + +## 2. Prepare Model + +Download pretrained bert model. We will refer to `vocab.txt` file. + +```bash +wget https://storage.googleapis.com/bert_models/2018_10_18/uncased_L-12_H-768_A-12.zip +unzip uncased_L-12_H-768_A-12.zip +``` + +Download MLPerf mobilebert model and convert it to onnx model with [tf2onnx](https://github.com/onnx/tensorflow-onnx) tool. + +```bash +wget https://github.com/fatihcakirs/mobile_models/raw/main/v0_7/tflite/mobilebert_float_384_20200602.tflite + +python -m tf2onnx.convert --opset 11 --tflite mobilebert_float_384_20200602.tflite --output mobilebert_SQuAD.onnx +``` + +## 3. Prepare Dataset +Download SQuAD dataset from [SQuAD dataset link](https://rajpurkar.github.io/SQuAD-explorer/). + +Dataset directories: + +```bash +squad +├── dev-v1.1.json +└── train-v1.1.json +``` + +# Run + +## 1. Quantization + +Static quantization with QDQ format: + +```bash +bash run_tuning.sh --input_model=/path/to/model \ # model path as *.onnx + --output_model=/path/to/model_tune \ + --dataset_location=/path/to/squad \ + --quant_format='QDQ' +``` + +## 2. Benchmark + +```bash +bash run_tuning.sh --input_model=/path/to/model \ # model path as *.onnx + --dataset_location=/path/to/squad \ + --batch_size=batch_size \ + --mode=performance # or accuracy +``` diff --git a/examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq/main.py b/examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_static/main.py similarity index 93% rename from examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq/main.py rename to examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_static/main.py index ffe7334b97e..21d072fc661 100644 --- a/examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq/main.py +++ b/examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_static/main.py @@ -116,7 +116,7 @@ def main(): parser.add_argument('--benchmark_nums', type=int, default=1000, help="Benchmark numbers of samples") parser.add_argument('--quant_format', type=str, default='Default', - choices=['default', 'QDQ'], + choices=['QOperator', 'QDQ'], help="quantization format") parser.add_argument('--batch_size', type=int, default=1, help="batch size for benchmark") @@ -141,13 +141,10 @@ def eval_func(model): if args.tune: from neural_compressor import quantization, PostTrainingQuantConfig - if args.quant_format == 'QDQ': - config = PostTrainingQuantConfig(approach='static', - calibration_sampling_size=[8], - quant_format=args.quant_format) - else: - config = PostTrainingQuantConfig(approach='dynamic', - calibration_sampling_size=[8]) + config = PostTrainingQuantConfig(approach='static', + calibration_sampling_size=[8], + quant_format=args.quant_format) + q_model = quantization.fit(model, config, calib_dataloader=eval_dataloader, diff --git a/examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_static/requirements.txt b/examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_static/requirements.txt new file mode 100644 index 00000000000..ea5a4dfb3f4 --- /dev/null +++ b/examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_static/requirements.txt @@ -0,0 +1,8 @@ +onnx +onnxruntime +intel-tensorflow==2.10.0 +tf2onnx +torch +onnxruntime-extensions; python_version < '3.10' +pillow>=8.1.0 # not directly required, pinned by Snyk to avoid a vulnerability +tqdm diff --git a/examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_static/run_benchmark.sh b/examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_static/run_benchmark.sh new file mode 100644 index 00000000000..5bafd49e6ce --- /dev/null +++ b/examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_static/run_benchmark.sh @@ -0,0 +1,42 @@ +#!/bin/bash +set -x + +function main { + init_params "$@" + run_benchmark + +} + +# init params +function init_params { + + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --mode=*) + mode=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_benchmark +function run_benchmark { + batch_size=1 + python main.py \ + --model_path ${input_model} \ + --mode ${mode} \ + --data_path ${dataset_location} \ + --batch_size ${batch_size} \ + --benchmark + +} + +main "$@" diff --git a/examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_static/run_onnx_squad.py b/examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_static/run_onnx_squad.py new file mode 100644 index 00000000000..076e1fd29ef --- /dev/null +++ b/examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_static/run_onnx_squad.py @@ -0,0 +1,581 @@ +# Copyright 2018 The Google AI Language Team Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Inference for squad/bert using onnx. + +This is going to do the samem as 'python run_squad.py --do_predict=True ...' using a squad/bert model +that was converted to onnx. Lots of code was taken from run_squad.py. +You run it with: + + +python onnx_squad.py --model $SQUAD_MODEL/squad.onnx \ + --vocab_file $BERT_BASE_DIR/uncased_L-12_H-768_A-12/vocab.txt + --predict_file $SQUAD_DATA/dev-v1.1.json \ + --bert_config_file $BERT_BASE_DIR/uncased_L-12_H-768_A-12/bert_config.json \ + --output /tmp/ +""" + +import argparse +import collections +import json +import logging +import math +import os +import sys +from timeit import default_timer as timer + +import numpy as np +import onnxruntime as onnxrt +import six +import tokenization + + +RawResult = collections.namedtuple("RawResult", ["unique_id", "start_logits", "end_logits"]) + +Feature = collections.namedtuple("Feature", ["unique_id", "tokens", "example_index", + "token_to_orig_map", "token_is_max_context"]) + + +class SquadExample(object): + """A single training/test example for simple sequence classification.""" + + def __init__(self, + qas_id, + question_text, + doc_tokens, + orig_answer_text=None, + start_position=None, + end_position=None): + self.qas_id = qas_id + self.question_text = question_text + self.doc_tokens = doc_tokens + self.orig_answer_text = orig_answer_text + self.start_position = start_position + self.end_position = end_position + + def __str__(self): + return self.__repr__() + + def __repr__(self): + s = [] + s.append("qas_id: %s" % (tokenization.printable_text(self.qas_id))) + s.append("question_text: %s" % (tokenization.printable_text(self.question_text))) + s.append("doc_tokens: [%s]" % (" ".join(self.doc_tokens))) + if self.start_position: + s.append("start_position: %d" % (self.start_position)) + if self.start_position: + s.append("end_position: %d" % (self.end_position)) + return ", ".join(s) + + +def _check_is_max_context(doc_spans, cur_span_index, position): + """Check if this is the 'max context' doc span for the token.""" + + # Because of the sliding window approach taken to scoring documents, a single + # token can appear in multiple documents. E.g. + # Doc: the man went to the store and bought a gallon of milk + # Span A: the man went to the + # Span B: to the store and bought + # Span C: and bought a gallon of + # ... + # + # Now the word 'bought' will have two scores from spans B and C. We only + # want to consider the score with "maximum context", which we define as + # the *minimum* of its left and right context (the *sum* of left and + # right context will always be the same, of course). + # + # In the example the maximum context for 'bought' would be span C since + # it has 1 left context and 3 right context, while span B has 4 left context + # and 0 right context. + best_score = None + best_span_index = None + for (span_index, doc_span) in enumerate(doc_spans): + end = doc_span.start + doc_span.length - 1 + if position < doc_span.start: + continue + if position > end: + continue + num_left_context = position - doc_span.start + num_right_context = end - position + score = min(num_left_context, num_right_context) + 0.01 * doc_span.length + if best_score is None or score > best_score: + best_score = score + best_span_index = span_index + + return cur_span_index == best_span_index + + +def convert_examples_to_features(examples, tokenizer, max_seq_length, doc_stride, max_query_length): + """Loads a data file into a list of `InputBatch`s.""" + + res_input_ids = [] + res_input_mask = [] + res_segment_ids = [] + extra = [] + unique_id = 0 + + for (example_index, example) in enumerate(examples): + query_tokens = tokenizer.tokenize(example.question_text) + + if len(query_tokens) > max_query_length: + query_tokens = query_tokens[0:max_query_length] + + tok_to_orig_index = [] + orig_to_tok_index = [] + all_doc_tokens = [] + for (i, token) in enumerate(example.doc_tokens): + orig_to_tok_index.append(len(all_doc_tokens)) + sub_tokens = tokenizer.tokenize(token) + for sub_token in sub_tokens: + tok_to_orig_index.append(i) + all_doc_tokens.append(sub_token) + + tok_start_position = None + tok_end_position = None + # The -3 accounts for [CLS], [SEP] and [SEP] + max_tokens_for_doc = max_seq_length - len(query_tokens) - 3 + + # We can have documents that are longer than the maximum sequence length. + # To deal with this we do a sliding window approach, where we take chunks + # of the up to our max length with a stride of `doc_stride`. + _DocSpan = collections.namedtuple("DocSpan", ["start", "length"]) + doc_spans = [] + start_offset = 0 + while start_offset < len(all_doc_tokens): + length = len(all_doc_tokens) - start_offset + if length > max_tokens_for_doc: + length = max_tokens_for_doc + doc_spans.append(_DocSpan(start=start_offset, length=length)) + if start_offset + length == len(all_doc_tokens): + break + start_offset += min(length, doc_stride) + + for (doc_span_index, doc_span) in enumerate(doc_spans): + tokens = [] + token_to_orig_map = {} + token_is_max_context = {} + segment_ids = [] + tokens.append("[CLS]") + segment_ids.append(0) + for token in query_tokens: + tokens.append(token) + segment_ids.append(0) + tokens.append("[SEP]") + segment_ids.append(0) + + for i in range(doc_span.length): + split_token_index = doc_span.start + i + token_to_orig_map[len(tokens)] = tok_to_orig_index[split_token_index] + + is_max_context = _check_is_max_context(doc_spans, doc_span_index, + split_token_index) + token_is_max_context[len(tokens)] = is_max_context + tokens.append(all_doc_tokens[split_token_index]) + segment_ids.append(1) + tokens.append("[SEP]") + segment_ids.append(1) + + input_ids = tokenizer.convert_tokens_to_ids(tokens) + + # The mask has 1 for real tokens and 0 for padding tokens. Only real + # tokens are attended to. + input_mask = [1] * len(input_ids) + + # Zero-pad up to the sequence length. + while len(input_ids) < max_seq_length: + input_ids.append(0) + input_mask.append(0) + segment_ids.append(0) + res_input_ids.append(np.array(input_ids, dtype=np.int32)) + res_input_mask.append(np.array(input_mask, dtype=np.int32)) + res_segment_ids.append(np.array(segment_ids, dtype=np.int32)) + feature = Feature(unique_id=unique_id, tokens=tokens, + example_index=example_index, token_to_orig_map=token_to_orig_map, + token_is_max_context=token_is_max_context) + extra.append(feature) + unique_id += 1 + return np.array(res_input_ids), np.array(res_input_mask), np.array(res_segment_ids), extra + + +def read_squad_examples(input_file): + """Read a SQuAD json file into a list of SquadExample.""" + with open(input_file, "r") as f: + input_data = json.load(f)["data"] + + def is_whitespace(c): + if c == " " or c == "\t" or c == "\r" or c == "\n" or ord(c) == 0x202F: + return True + return False + + examples = [] + for idx, entry in enumerate(input_data): + for paragraph in entry["paragraphs"]: + paragraph_text = paragraph["context"] + doc_tokens = [] + char_to_word_offset = [] + prev_is_whitespace = True + for c in paragraph_text: + if is_whitespace(c): + prev_is_whitespace = True + else: + if prev_is_whitespace: + doc_tokens.append(c) + else: + doc_tokens[-1] += c + prev_is_whitespace = False + char_to_word_offset.append(len(doc_tokens) - 1) + + for qa in paragraph["qas"]: + qas_id = qa["id"] + question_text = qa["question"] + start_position = None + end_position = None + orig_answer_text = None + example = SquadExample( + qas_id=qas_id, + question_text=question_text, + doc_tokens=doc_tokens, + orig_answer_text=orig_answer_text, + start_position=start_position, + end_position=end_position) + examples.append(example) + return examples + + +def write_predictions(all_examples, all_features, all_results, n_best_size, + max_answer_length, do_lower_case, output_prediction_file, + output_nbest_file): + """Write final predictions to the json file.""" + example_index_to_features = collections.defaultdict(list) + for feature in all_features: + example_index_to_features[feature.example_index].append(feature) + + unique_id_to_result = {} + for result in all_results: + unique_id_to_result[result.unique_id] = result + + _PrelimPrediction = collections.namedtuple( # pylint: disable=invalid-name + "PrelimPrediction", + ["feature_index", "start_index", "end_index", "start_logit", "end_logit"]) + + all_predictions = collections.OrderedDict() + all_nbest_json = collections.OrderedDict() + for (example_index, example) in enumerate(all_examples): + features = example_index_to_features[example_index] + prelim_predictions = [] + for (feature_index, feature) in enumerate(features): + if not feature.unique_id in unique_id_to_result: + print("feature not in unique_Id", feature.unique_id) + continue + result = unique_id_to_result[feature.unique_id] + + start_indexes = _get_best_indexes(result.start_logits, n_best_size) + end_indexes = _get_best_indexes(result.end_logits, n_best_size) + for start_index in start_indexes: + for end_index in end_indexes: + # We could hypothetically create invalid predictions, e.g., predict + # that the start of the span is in the question. We throw out all + # invalid predictions. + if start_index >= len(feature.tokens): + continue + if end_index >= len(feature.tokens): + continue + if start_index not in feature.token_to_orig_map: + continue + if end_index not in feature.token_to_orig_map: + continue + if not feature.token_is_max_context.get(start_index, False): + continue + if end_index < start_index: + continue + length = end_index - start_index + 1 + if length > max_answer_length: + continue + prelim_predictions.append( + _PrelimPrediction( + feature_index=feature_index, + start_index=start_index, + end_index=end_index, + start_logit=result.start_logits[start_index], + end_logit=result.end_logits[end_index])) + + prelim_predictions = sorted( + prelim_predictions, + key=lambda x: (x.start_logit + x.end_logit), + reverse=True) + + _NbestPrediction = collections.namedtuple( # pylint: disable=invalid-name + "NbestPrediction", ["text", "start_logit", "end_logit"]) + + seen_predictions = {} + nbest = [] + for pred in prelim_predictions: + if len(nbest) >= n_best_size: + break + feature = features[pred.feature_index] + + tok_tokens = feature.tokens[pred.start_index:(pred.end_index + 1)] + orig_doc_start = feature.token_to_orig_map[pred.start_index] + orig_doc_end = feature.token_to_orig_map[pred.end_index] + orig_tokens = example.doc_tokens[orig_doc_start:(orig_doc_end + 1)] + tok_text = " ".join(tok_tokens) + + # De-tokenize WordPieces that have been split off. + tok_text = tok_text.replace(" ##", "") + tok_text = tok_text.replace("##", "") + + # Clean whitespace + tok_text = tok_text.strip() + tok_text = " ".join(tok_text.split()) + orig_text = " ".join(orig_tokens) + + final_text = get_final_text(tok_text, orig_text, do_lower_case) + if final_text in seen_predictions: + continue + + seen_predictions[final_text] = True + nbest.append( + _NbestPrediction( + text=final_text, + start_logit=pred.start_logit, + end_logit=pred.end_logit)) + + # In very rare edge cases we could have no valid predictions. So we + # just create a nonce prediction in this case to avoid failure. + if not nbest: + nbest.append( + _NbestPrediction(text="empty", start_logit=0.0, end_logit=0.0)) + + assert len(nbest) >= 1 + + total_scores = [] + for entry in nbest: + total_scores.append(entry.start_logit + entry.end_logit) + + probs = _compute_softmax(total_scores) + + nbest_json = [] + for (i, entry) in enumerate(nbest): + output = collections.OrderedDict() + output["text"] = entry.text + output["probability"] = probs[i] + output["start_logit"] = float(entry.start_logit) + output["end_logit"] = float(entry.end_logit) + nbest_json.append(output) + + all_predictions[example.qas_id] = nbest_json[0]["text"] + all_nbest_json[example.qas_id] = nbest_json + + with open(output_prediction_file, "w") as writer: + writer.write(json.dumps(all_predictions, indent=4) + "\n") + + with open(output_nbest_file, "w") as writer: + writer.write(json.dumps(all_nbest_json, indent=4) + "\n") + + +def get_final_text(pred_text, orig_text, do_lower_case): + """Project the tokenized prediction back to the original text.""" + + # When we created the data, we kept track of the alignment between original + # (whitespace tokenized) tokens and our WordPiece tokenized tokens. So + # now `orig_text` contains the span of our original text corresponding to the + # span that we predicted. + # + # However, `orig_text` may contain extra characters that we don't want in + # our prediction. + # + # For example, let's say: + # pred_text = steve smith + # orig_text = Steve Smith's + # + # We don't want to return `orig_text` because it contains the extra "'s". + # + # We don't want to return `pred_text` because it's already been normalized + # (the SQuAD eval script also does punctuation stripping/lower casing but + # our tokenizer does additional normalization like stripping accent + # characters). + # + # What we really want to return is "Steve Smith". + # + # Therefore, we have to apply a semi-complicated alignment heruistic between + # `pred_text` and `orig_text` to get a character-to-charcter alignment. This + # can fail in certain cases in which case we just return `orig_text`. + + def _strip_spaces(text): + ns_chars = [] + ns_to_s_map = collections.OrderedDict() + for (i, c) in enumerate(text): + if c == " ": + continue + ns_to_s_map[len(ns_chars)] = i + ns_chars.append(c) + ns_text = "".join(ns_chars) + return (ns_text, ns_to_s_map) + + # We first tokenize `orig_text`, strip whitespace from the result + # and `pred_text`, and check if they are the same length. If they are + # NOT the same length, the heuristic has failed. If they are the same + # length, we assume the characters are one-to-one aligned. + tokenizer = tokenization.BasicTokenizer(do_lower_case=do_lower_case) + + tok_text = " ".join(tokenizer.tokenize(orig_text)) + + start_position = tok_text.find(pred_text) + if start_position == -1: + return orig_text + end_position = start_position + len(pred_text) - 1 + + (orig_ns_text, orig_ns_to_s_map) = _strip_spaces(orig_text) + (tok_ns_text, tok_ns_to_s_map) = _strip_spaces(tok_text) + + if len(orig_ns_text) != len(tok_ns_text): + return orig_text + + # We then project the characters in `pred_text` back to `orig_text` using + # the character-to-character alignment. + tok_s_to_ns_map = {} + for (i, tok_index) in six.iteritems(tok_ns_to_s_map): + tok_s_to_ns_map[tok_index] = i + + orig_start_position = None + if start_position in tok_s_to_ns_map: + ns_start_position = tok_s_to_ns_map[start_position] + if ns_start_position in orig_ns_to_s_map: + orig_start_position = orig_ns_to_s_map[ns_start_position] + + if orig_start_position is None: + return orig_text + + orig_end_position = None + if end_position in tok_s_to_ns_map: + ns_end_position = tok_s_to_ns_map[end_position] + if ns_end_position in orig_ns_to_s_map: + orig_end_position = orig_ns_to_s_map[ns_end_position] + + if orig_end_position is None: + return orig_text + + output_text = orig_text[orig_start_position:(orig_end_position + 1)] + return output_text + + +def _get_best_indexes(logits, n_best_size): + """Get the n-best logits from a list.""" + index_and_score = sorted(enumerate(logits), key=lambda x: x[1], reverse=True) + best_indexes = [] + for i in range(len(index_and_score)): + if i >= n_best_size: + break + best_indexes.append(index_and_score[i][0]) + return best_indexes + + +def _compute_softmax(scores): + """Compute softmax probability over raw logits.""" + if not scores: + return [] + + max_score = None + for score in scores: + if max_score is None or score > max_score: + max_score = score + + exp_scores = [] + total_sum = 0.0 + for score in scores: + x = math.exp(score - max_score) + exp_scores.append(x) + total_sum += x + + probs = [] + for score in exp_scores: + probs.append(score / total_sum) + return probs + + +def main(): + parser = argparse.ArgumentParser(description='onnx squad') + parser.add_argument('--model', required=True, help='model') + parser.add_argument('--vocab_file', required=True, help='vocab_file') + parser.add_argument('--bert_config_file', help='vocab_file') + parser.add_argument('--predict_file', required=True, help='predict_file') + parser.add_argument('--output_dir', help='output dir') + parser.add_argument('--max_seq_length', type=int, default=256, help='max_seq_length') + parser.add_argument('--max_query_length', type=int, default=64, help='max_query_length') + parser.add_argument('--max_answer_length', type=int, default=30, help='max_answer_length') + parser.add_argument('--n_best_size', type=int, default=20, help='n_best_size') + parser.add_argument('--doc_stride', type=int, default=128, help='doc_stride') + parser.add_argument('--batch_size', type=int, default=1, help='batch_size') + parser.add_argument('--profile', action='store_true', help='enable chrome timeline trace profiling.') + parser.add_argument('--log', type=int, help='log level.') + args = parser.parse_args() + + sess_options = None + if args.profile: + sess_options = onnxrt.SessionOptions() + sess_options.enable_profiling = True + sess_options.profile_file_prefix = os.path.basename(args.model) + if args.log: + sess_options = onnxrt.SessionOptions() + sess_options.session_log_verbosity_level = args.log + + tokenizer = tokenization.FullTokenizer(vocab_file=args.vocab_file, do_lower_case=True) + + eval_examples = read_squad_examples(input_file=args.predict_file) + input_ids, input_mask, segment_ids, extra_data = \ + convert_examples_to_features(eval_examples, tokenizer, args.max_seq_length, + args.doc_stride, args.max_query_length) + + sess = onnxrt.InferenceSession(args.model, + sess_options, + providers=onnxrt.get_available_providers()) + for input_meta in sess.get_inputs(): + print(input_meta) + n = len(input_ids) + bs = args.batch_size + all_results = [] + start = timer() + for idx in range(0, n, bs): + data = {"input_ids": input_ids[idx:idx + bs], + "input_mask": input_mask[idx:idx + bs], + "segment_ids": segment_ids[idx:idx + bs]} + result = sess.run(["start_logits","end_logits"], data) + in_batch = result[0].shape[1] + start_logits = [float(x) for x in result[0][0].flat] + end_logits = [float(x) for x in result[1][0].flat] + for i in range(0, in_batch): + unique_id = len(all_results) + # all_results.append(RawResult(unique_id=unique_id, start_logits=result[0][0][i], end_logits=result[1][0][i])) + all_results.append(RawResult(unique_id=unique_id, start_logits=start_logits, end_logits=end_logits)) + if unique_id > 0 and unique_id % 10000 == 0: + print("at {} {}sec per item".format(unique_id, (timer() - start) / unique_id)) + end = timer() + + print("total time: {}sec, {}sec per item".format(end - start, (end - start) / len(all_results))) + + if args.output_dir: + output_prediction_file = os.path.join(args.output_dir, "predictions.json") + output_nbest_file = os.path.join(args.output_dir, "nbest_predictions.json") + write_predictions(eval_examples, extra_data, all_results, + args.n_best_size, args.max_answer_length, + True, output_prediction_file, output_nbest_file) + if args.profile: + trace_file = sess.end_profiling() + print("trace file written to: {}".format(trace_file)) + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq/run_tuning.sh b/examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_static/run_tuning.sh similarity index 100% rename from examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq/run_tuning.sh rename to examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_static/run_tuning.sh diff --git a/examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_static/squad_evaluate.py b/examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_static/squad_evaluate.py new file mode 100644 index 00000000000..c582e68775f --- /dev/null +++ b/examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_static/squad_evaluate.py @@ -0,0 +1,108 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Source: https://github.com/allenai/bi-att-flow/blob/master/squad/evaluate-v1.1.py + +""" Official evaluation script for v1.1 of the SQuAD dataset. """ +from __future__ import print_function +from collections import Counter +import string +import re +import argparse +import json +import sys + + +def normalize_answer(s): + """Lower text and remove punctuation, articles and extra whitespace.""" + def remove_articles(text): + return re.sub(r'\b(a|an|the)\b', ' ', text) + + def white_space_fix(text): + return ' '.join(text.split()) + + def remove_punc(text): + exclude = set(string.punctuation) + return ''.join(ch for ch in text if ch not in exclude) + + def lower(text): + return text.lower() + + return white_space_fix(remove_articles(remove_punc(lower(s)))) + + +def f1_score(prediction, ground_truth): + prediction_tokens = normalize_answer(prediction).split() + ground_truth_tokens = normalize_answer(ground_truth).split() + common = Counter(prediction_tokens) & Counter(ground_truth_tokens) + num_same = sum(common.values()) + if num_same == 0: + return 0 + precision = 1.0 * num_same / len(prediction_tokens) + recall = 1.0 * num_same / len(ground_truth_tokens) + f1 = (2 * precision * recall) / (precision + recall) + return f1 + + +def exact_match_score(prediction, ground_truth): + return (normalize_answer(prediction) == normalize_answer(ground_truth)) + + +def metric_max_over_ground_truths(metric_fn, prediction, ground_truths): + scores_for_ground_truths = [] + for ground_truth in ground_truths: + score = metric_fn(prediction, ground_truth) + scores_for_ground_truths.append(score) + return max(scores_for_ground_truths) + + +def evaluate(dataset, predictions): + f1 = exact_match = total = 0 + for article in dataset: + for paragraph in article['paragraphs']: + for qa in paragraph['qas']: + total += 1 + if qa['id'] not in predictions: + message = 'Unanswered question ' + qa['id'] + \ + ' will receive score 0.' + print(message, file=sys.stderr) + continue + ground_truths = list(map(lambda x: x['text'], qa['answers'])) + prediction = predictions[qa['id']] + exact_match += metric_max_over_ground_truths( + exact_match_score, prediction, ground_truths) + f1 += metric_max_over_ground_truths( + f1_score, prediction, ground_truths) + + exact_match = 100.0 * exact_match / total + f1 = 100.0 * f1 / total + + return {'exact_match': exact_match, 'f1': f1} + + +if __name__ == '__main__': + expected_version = '1.1' + parser = argparse.ArgumentParser( + description='Evaluation for SQuAD ' + expected_version) + parser.add_argument('dataset_file', help='Dataset file') + parser.add_argument('prediction_file', help='Prediction File') + args = parser.parse_args() + with open(args.dataset_file) as dataset_file: + dataset_json = json.load(dataset_file) + if (dataset_json['version'] != expected_version): + print('Evaluation expects v-' + expected_version + + ', but got dataset with v-' + dataset_json['version'], + file=sys.stderr) + dataset = dataset_json['data'] + with open(args.prediction_file) as prediction_file: + predictions = json.load(prediction_file) + print(json.dumps(evaluate(dataset, predictions))) diff --git a/examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_static/tokenization.py b/examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_static/tokenization.py new file mode 100644 index 00000000000..52c92adb81f --- /dev/null +++ b/examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_static/tokenization.py @@ -0,0 +1,399 @@ +# coding=utf-8 +# Copyright 2018 The Google AI Language Team Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Tokenization classes.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections +import re +import unicodedata +import six +import tensorflow as tf + + +def validate_case_matches_checkpoint(do_lower_case, init_checkpoint): + """Checks whether the casing config is consistent with the checkpoint name.""" + + # The casing has to be passed in by the user and there is no explicit check + # as to whether it matches the checkpoint. The casing information probably + # should have been stored in the bert_config.json file, but it's not, so + # we have to heuristically detect it to validate. + + if not init_checkpoint: + return + + m = re.match("^.*?([A-Za-z0-9_-]+)/bert_model.ckpt", init_checkpoint) + if m is None: + return + + model_name = m.group(1) + + lower_models = [ + "uncased_L-24_H-1024_A-16", "uncased_L-12_H-768_A-12", + "multilingual_L-12_H-768_A-12", "chinese_L-12_H-768_A-12" + ] + + cased_models = [ + "cased_L-12_H-768_A-12", "cased_L-24_H-1024_A-16", + "multi_cased_L-12_H-768_A-12" + ] + + is_bad_config = False + if model_name in lower_models and not do_lower_case: + is_bad_config = True + actual_flag = "False" + case_name = "lowercased" + opposite_flag = "True" + + if model_name in cased_models and do_lower_case: + is_bad_config = True + actual_flag = "True" + case_name = "cased" + opposite_flag = "False" + + if is_bad_config: + raise ValueError( + "You passed in `--do_lower_case=%s` with `--init_checkpoint=%s`. " + "However, `%s` seems to be a %s model, so you " + "should pass in `--do_lower_case=%s` so that the fine-tuning matches " + "how the model was pre-training. If this error is wrong, please " + "just comment out this check." % (actual_flag, init_checkpoint, + model_name, case_name, opposite_flag)) + + +def convert_to_unicode(text): + """Converts `text` to Unicode (if it's not already), assuming utf-8 input.""" + if six.PY3: + if isinstance(text, str): + return text + elif isinstance(text, bytes): + return text.decode("utf-8", "ignore") + else: + raise ValueError("Unsupported string type: %s" % (type(text))) + elif six.PY2: + if isinstance(text, str): + return text.decode("utf-8", "ignore") + elif isinstance(text, unicode): + return text + else: + raise ValueError("Unsupported string type: %s" % (type(text))) + else: + raise ValueError("Not running on Python2 or Python 3?") + + +def printable_text(text): + """Returns text encoded in a way suitable for print or `tf.logging`.""" + + # These functions want `str` for both Python2 and Python3, but in one case + # it's a Unicode string and in the other it's a byte string. + if six.PY3: + if isinstance(text, str): + return text + elif isinstance(text, bytes): + return text.decode("utf-8", "ignore") + else: + raise ValueError("Unsupported string type: %s" % (type(text))) + elif six.PY2: + if isinstance(text, str): + return text + elif isinstance(text, unicode): + return text.encode("utf-8") + else: + raise ValueError("Unsupported string type: %s" % (type(text))) + else: + raise ValueError("Not running on Python2 or Python 3?") + + +def load_vocab(vocab_file): + """Loads a vocabulary file into a dictionary.""" + vocab = collections.OrderedDict() + index = 0 + with tf.io.gfile.GFile(vocab_file, "r") as reader: + while True: + token = convert_to_unicode(reader.readline()) + if not token: + break + token = token.strip() + vocab[token] = index + index += 1 + return vocab + + +def convert_by_vocab(vocab, items): + """Converts a sequence of [tokens|ids] using the vocab.""" + output = [] + for item in items: + output.append(vocab[item]) + return output + + +def convert_tokens_to_ids(vocab, tokens): + return convert_by_vocab(vocab, tokens) + + +def convert_ids_to_tokens(inv_vocab, ids): + return convert_by_vocab(inv_vocab, ids) + + +def whitespace_tokenize(text): + """Runs basic whitespace cleaning and splitting on a piece of text.""" + text = text.strip() + if not text: + return [] + tokens = text.split() + return tokens + + +class FullTokenizer(object): + """Runs end-to-end tokenziation.""" + + def __init__(self, vocab_file, do_lower_case=True): + self.vocab = load_vocab(vocab_file) + self.inv_vocab = {v: k for k, v in self.vocab.items()} + self.basic_tokenizer = BasicTokenizer(do_lower_case=do_lower_case) + self.wordpiece_tokenizer = WordpieceTokenizer(vocab=self.vocab) + + def tokenize(self, text): + split_tokens = [] + for token in self.basic_tokenizer.tokenize(text): + for sub_token in self.wordpiece_tokenizer.tokenize(token): + split_tokens.append(sub_token) + + return split_tokens + + def convert_tokens_to_ids(self, tokens): + return convert_by_vocab(self.vocab, tokens) + + def convert_ids_to_tokens(self, ids): + return convert_by_vocab(self.inv_vocab, ids) + + +class BasicTokenizer(object): + """Runs basic tokenization (punctuation splitting, lower casing, etc.).""" + + def __init__(self, do_lower_case=True): + """Constructs a BasicTokenizer. + + Args: + do_lower_case: Whether to lower case the input. + """ + self.do_lower_case = do_lower_case + + def tokenize(self, text): + """Tokenizes a piece of text.""" + text = convert_to_unicode(text) + text = self._clean_text(text) + + # This was added on November 1st, 2018 for the multilingual and Chinese + # models. This is also applied to the English models now, but it doesn't + # matter since the English models were not trained on any Chinese data + # and generally don't have any Chinese data in them (there are Chinese + # characters in the vocabulary because Wikipedia does have some Chinese + # words in the English Wikipedia.). + text = self._tokenize_chinese_chars(text) + + orig_tokens = whitespace_tokenize(text) + split_tokens = [] + for token in orig_tokens: + if self.do_lower_case: + token = token.lower() + token = self._run_strip_accents(token) + split_tokens.extend(self._run_split_on_punc(token)) + + output_tokens = whitespace_tokenize(" ".join(split_tokens)) + return output_tokens + + def _run_strip_accents(self, text): + """Strips accents from a piece of text.""" + text = unicodedata.normalize("NFD", text) + output = [] + for char in text: + cat = unicodedata.category(char) + if cat == "Mn": + continue + output.append(char) + return "".join(output) + + def _run_split_on_punc(self, text): + """Splits punctuation on a piece of text.""" + chars = list(text) + i = 0 + start_new_word = True + output = [] + while i < len(chars): + char = chars[i] + if _is_punctuation(char): + output.append([char]) + start_new_word = True + else: + if start_new_word: + output.append([]) + start_new_word = False + output[-1].append(char) + i += 1 + + return ["".join(x) for x in output] + + def _tokenize_chinese_chars(self, text): + """Adds whitespace around any CJK character.""" + output = [] + for char in text: + cp = ord(char) + if self._is_chinese_char(cp): + output.append(" ") + output.append(char) + output.append(" ") + else: + output.append(char) + return "".join(output) + + def _is_chinese_char(self, cp): + """Checks whether CP is the codepoint of a CJK character.""" + # This defines a "chinese character" as anything in the CJK Unicode block: + # https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block) + # + # Note that the CJK Unicode block is NOT all Japanese and Korean characters, + # despite its name. The modern Korean Hangul alphabet is a different block, + # as is Japanese Hiragana and Katakana. Those alphabets are used to write + # space-separated words, so they are not treated specially and handled + # like the all of the other languages. + if ((cp >= 0x4E00 and cp <= 0x9FFF) or # + (cp >= 0x3400 and cp <= 0x4DBF) or # + (cp >= 0x20000 and cp <= 0x2A6DF) or # + (cp >= 0x2A700 and cp <= 0x2B73F) or # + (cp >= 0x2B740 and cp <= 0x2B81F) or # + (cp >= 0x2B820 and cp <= 0x2CEAF) or + (cp >= 0xF900 and cp <= 0xFAFF) or # + (cp >= 0x2F800 and cp <= 0x2FA1F)): # + return True + + return False + + def _clean_text(self, text): + """Performs invalid character removal and whitespace cleanup on text.""" + output = [] + for char in text: + cp = ord(char) + if cp == 0 or cp == 0xfffd or _is_control(char): + continue + if _is_whitespace(char): + output.append(" ") + else: + output.append(char) + return "".join(output) + + +class WordpieceTokenizer(object): + """Runs WordPiece tokenziation.""" + + def __init__(self, vocab, unk_token="[UNK]", max_input_chars_per_word=200): + self.vocab = vocab + self.unk_token = unk_token + self.max_input_chars_per_word = max_input_chars_per_word + + def tokenize(self, text): + """Tokenizes a piece of text into its word pieces. + + This uses a greedy longest-match-first algorithm to perform tokenization + using the given vocabulary. + + For example: + input = "unaffable" + output = ["un", "##aff", "##able"] + + Args: + text: A single token or whitespace separated tokens. This should have + already been passed through `BasicTokenizer. + + Returns: + A list of wordpiece tokens. + """ + + text = convert_to_unicode(text) + + output_tokens = [] + for token in whitespace_tokenize(text): + chars = list(token) + if len(chars) > self.max_input_chars_per_word: + output_tokens.append(self.unk_token) + continue + + is_bad = False + start = 0 + sub_tokens = [] + while start < len(chars): + end = len(chars) + cur_substr = None + while start < end: + substr = "".join(chars[start:end]) + if start > 0: + substr = "##" + substr + if substr in self.vocab: + cur_substr = substr + break + end -= 1 + if cur_substr is None: + is_bad = True + break + sub_tokens.append(cur_substr) + start = end + + if is_bad: + output_tokens.append(self.unk_token) + else: + output_tokens.extend(sub_tokens) + return output_tokens + + +def _is_whitespace(char): + """Checks whether `chars` is a whitespace character.""" + # \t, \n, and \r are technically contorl characters but we treat them + # as whitespace since they are generally considered as such. + if char == " " or char == "\t" or char == "\n" or char == "\r": + return True + cat = unicodedata.category(char) + if cat == "Zs": + return True + return False + + +def _is_control(char): + """Checks whether `chars` is a control character.""" + # These are technically control characters but we count them as whitespace + # characters. + if char == "\t" or char == "\n" or char == "\r": + return False + cat = unicodedata.category(char) + if cat in ("Cc", "Cf"): + return True + return False + + +def _is_punctuation(char): + """Checks whether `chars` is a punctuation character.""" + cp = ord(char) + # We treat all non-letter/number ASCII as punctuation. + # Characters such as "^", "$", and "`" are not in the Unicode + # Punctuation class but we treat them as punctuation anyways, for + # consistency. + if ((cp >= 33 and cp <= 47) or (cp >= 58 and cp <= 64) or + (cp >= 91 and cp <= 96) or (cp >= 123 and cp <= 126)): + return True + cat = unicodedata.category(char) + if cat.startswith("P"): + return True + return False diff --git a/examples/onnxrt/nlp/roberta/quantization/ptq_dynamic/README.md b/examples/onnxrt/nlp/roberta/quantization/ptq_dynamic/README.md new file mode 100644 index 00000000000..7ff331df3c4 --- /dev/null +++ b/examples/onnxrt/nlp/roberta/quantization/ptq_dynamic/README.md @@ -0,0 +1,79 @@ +Step-by-Step +============ + +This example load a RoBERTa model and confirm its accuracy and speed based on [GLUE data](https://gluebenchmark.com/). + +# Prerequisite + +## 1. Environment + +```shell +pip install neural-compressor +pip install -r requirements.txt +``` +> Note: Validated ONNX Runtime [Version](/docs/source/installation_guide.md#validated-software-environment). + +## 2. Prepare Dataset + +download the GLUE data with `prepare_data.sh` script. +```shell +export GLUE_DIR=path/to/glue_data +export TASK_NAME=MRPC + +bash prepare_data.sh --data_dir=$GLUE_DIR --task_name=$TASK_NAME +``` + +## 3. Prepare Model + +Please refer to [Bert-GLUE_OnnxRuntime_quantization guide](https://github.com/microsoft/onnxruntime/blob/master/onnxruntime/python/tools/quantization/notebooks/Bert-GLUE_OnnxRuntime_quantization.ipynb) for detailed model export. The following is a simple example. + +Use [Huggingface Transformers](https://github.com/huggingface/transformers/tree/v2.2.1) to fine-tune the model based on the [MRPC](https://github.com/huggingface/transformers/tree/master/examples/text-classification#mrpc) example with command like: +```shell +export OUT_DIR=/path/to/out_dir/ +python ./run_glue.py \ + --model_type roberta \ + --model_name_or_path roberta-base \ + --task_name $TASK_NAME \ + --do_train \ + --do_eval \ + --do_lower_case \ + --data_dir $GLUE_DIR/$TASK_NAME \ + --max_seq_length 128 \ + --per_gpu_eval_batch_size=8 \ + --per_gpu_train_batch_size=8 \ + --learning_rate 2e-5 \ + --num_train_epochs 3.0 \ + --save_steps 100000 \ + --output_dir $OUT_DIR +``` +Run the `prepare_model.sh` script + +Usage: +```shell +cd examples/onnxrt/language_translation/mobilebert/ + +bash prepare_model.sh --input_dir=$OUT_DIR \ + --task_name=$TASK_NAME \ + --output_model=path/to/model # model path as *.onnx +``` + +# Run + +## 1. Quantization + +Dynamic quantization: + +```bash +bash run_tuning.sh --input_model=path/to/model \ # model path as *.onnx + --output_model=path/to/model_tune \ # model path as *.onnx + --dataset_location=path/to/glue_data +``` + +## 2. Benchmark + +```bash +bash run_benchmark.sh --input_model=path/to/model \ # model path as *.onnx + --dataset_location=path/to/glue_data \ + --batch_size=batch_size \ + --mode=performance # or accuracy +``` \ No newline at end of file diff --git a/examples/onnxrt/nlp/roberta/quantization/ptq_dynamic/export.py b/examples/onnxrt/nlp/roberta/quantization/ptq_dynamic/export.py new file mode 100644 index 00000000000..b25d436dd3c --- /dev/null +++ b/examples/onnxrt/nlp/roberta/quantization/ptq_dynamic/export.py @@ -0,0 +1,61 @@ +import argparse + +import torch +from transformers import RobertaForSequenceClassification + +def export_onnx_model(args, model, onnx_model_path): + with torch.no_grad(): + inputs = {'input_ids': torch.ones(1,args.max_len, dtype=torch.int64), + 'attention_mask': torch.ones(1,args.max_len, dtype=torch.int64)} + outputs = model(**inputs) + + symbolic_names = {0: 'batch_size', 1: 'max_seq_len'} + torch.onnx.export(model, # model being run + (inputs['input_ids'], + inputs['attention_mask']), # model input (or a tuple for + # multiple inputs) + onnx_model_path, # where to save the model (can be a file + # or file-like object) + opset_version=11, # the ONNX version to export the model + do_constant_folding=True, # whether to execute constant folding + input_names=['input_ids', # the model's input names + 'input_mask'], + output_names=['output'], # the model's output names + dynamic_axes={'input_ids': symbolic_names, # variable length axes + 'input_mask' : symbolic_names}) + print("ONNX Model exported to {0}".format(onnx_model_path)) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description='Export bert onnx model', + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument( + '--input_dir', + type=str, + help='input_dir of bert model, must contain config.json') + parser.add_argument( + '--task_name', + type=str, + choices=["MRPC", "MNLI"], + help='tasks names of bert model') + parser.add_argument( + '--max_len', + type=int, + default=128, + help='Maximum length of the sentence pairs') + parser.add_argument( + '--do_lower_case', + type=bool, + default=True, + help='whether lower the tokenizer') + parser.add_argument( + '--output_model', + type=str, + default='bert.onnx', + help='path to exported model file') + args = parser.parse_args() + + model = RobertaForSequenceClassification.from_pretrained(args.input_dir) + print(model) + # export_onnx_model(args, model, args.output_model) diff --git a/examples/onnxrt/nlp/roberta/quantization/ptq_dynamic/main.py b/examples/onnxrt/nlp/roberta/quantization/ptq_dynamic/main.py new file mode 100644 index 00000000000..0b24572f09a --- /dev/null +++ b/examples/onnxrt/nlp/roberta/quantization/ptq_dynamic/main.py @@ -0,0 +1,412 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# pylint:disable=redefined-outer-name,logging-format-interpolation + +import logging +import argparse +import os +import onnx +import onnxruntime +import transformers +import torch +import numpy as np +from dataclasses import dataclass +from typing import List, Optional, Union +from neural_compressor.data.dataloaders.onnxrt_dataloader import DefaultDataLoader + +logger = logging.getLogger(__name__) +logging.basicConfig(format = "%(asctime)s - %(levelname)s - %(name)s - %(message)s", + datefmt = "%m/%d/%Y %H:%M:%S", + level = logging.WARN) + +class ONNXRTBertDataset: + """Dataset used for model Bert. + Args: data_dir (str): The input data dir. + model_name_or_path (str): Path to pre-trained student model or shortcut name, + selected in the list: + max_seq_length (int, default=128): The maximum length after tokenization. + Sequences longer than this will be truncated, + sequences shorter will be padded. + do_lower_case (bool, default=True): Whether to lowercase the input when tokenizing. + task (str, default=mrpc): The name of the task to fine-tune. + Choices include mrpc, qqp, qnli, rte, + sts-b, cola, mnli, wnli. + model_type (str, default="bert"): model type, support "distilbert", "bert", + "mobilebert", "roberta". + dynamic_length (bool, default=False): Whether to use fixed sequence length. + evaluate (bool, default=True): Whether do evaluation or training. + transform (transform object, default=None): transform to process input data. + filter (Filter objects, default=None): filter out examples according + to specific conditions. + """ + def __init__(self, model, data_dir, model_name_or_path, max_seq_length=128,\ + do_lower_case=True, task="mrpc", model_type="bert", dynamic_length=False,\ + evaluate=True, transform=None, filter=None): + self.inputs = [inp.name for inp in onnx.load(model).graph.input] + task = task.lower() + model_type = model_type.lower() + assert task in ["mrpc", "qqp", "qnli", "rte", "sts-b", "cola", \ + "mnli", "wnli", "sst-2"], "Unsupported task type" + assert model_type in ["distilbert", "bert", "mobilebert", "roberta"], "Unsupported \ + model type" + self.dynamic_length = dynamic_length + self.model_type = model_type + self.max_seq_length = max_seq_length + tokenizer = transformers.AutoTokenizer.from_pretrained(model_name_or_path, + do_lower_case=do_lower_case) + self.dataset = load_and_cache_examples(data_dir, model_name_or_path, \ + max_seq_length, task, model_type, tokenizer, evaluate) + + def __len__(self): + return len(self.dataset) + + def __getitem__(self, index): + batch = tuple(t.detach().cpu().numpy() if not isinstance(t, np.ndarray) else t for t in self.dataset[index]) + return batch[:len(self.inputs)], batch[-1] + +def load_and_cache_examples(data_dir, model_name_or_path, max_seq_length, task, \ + model_type, tokenizer, evaluate): + from torch.utils.data import TensorDataset + + processor = transformers.glue_processors[task]() + output_mode = transformers.glue_output_modes[task] + # Load data features from cache or dataset file + if not os.path.exists("./dataset_cached"): + os.makedirs("./dataset_cached") + cached_features_file = os.path.join("./dataset_cached", "cached_{}_{}_{}_{}".format( + "dev" if evaluate else "train", + list(filter(None, model_name_or_path.split("/"))).pop(), + str(max_seq_length), + str(task))) + if os.path.exists(cached_features_file): + logger.info("Load features from cached file {}.".format(cached_features_file)) + features = torch.load(cached_features_file) + else: + logger.info("Create features from dataset file at {}.".format(data_dir)) + label_list = processor.get_labels() + examples = processor.get_dev_examples(data_dir) if evaluate else \ + processor.get_train_examples(data_dir) + features = convert_examples_to_features(examples, + tokenizer, + task=task, + label_list=label_list, + max_length=max_seq_length, + output_mode=output_mode, + ) + logger.info("Save features into cached file {}.".format(cached_features_file)) + torch.save(features, cached_features_file) + # Convert to Tensors and build dataset + all_input_ids = torch.tensor([f.input_ids for f in features], dtype=torch.long) + all_attention_mask = torch.tensor([f.attention_mask for f in features], dtype=torch.long) + all_token_type_ids = torch.tensor([f.token_type_ids for f in features], dtype=torch.long) + all_seq_lengths = torch.tensor([f.seq_length for f in features], dtype=torch.long) + if output_mode == "classification": + all_labels = torch.tensor([f.label for f in features], dtype=torch.long) + elif output_mode == "regression": + all_labels = torch.tensor([f.label for f in features], dtype=torch.float) + dataset = TensorDataset(all_input_ids, all_attention_mask, all_token_type_ids, \ + all_seq_lengths, all_labels) + return dataset + +def convert_examples_to_features( + examples, + tokenizer, + max_length=128, + task=None, + label_list=None, + output_mode="classification", + pad_token=0, + pad_token_segment_id=0, + mask_padding_with_zero=True, +): + processor = transformers.glue_processors[task]() + if label_list is None: + label_list = processor.get_labels() + logger.info("Use label list {} for task {}.".format(label_list, task)) + label_map = {label: i for i, label in enumerate(label_list)} + features = [] + for (ex_index, example) in enumerate(examples): + inputs = tokenizer.encode_plus( + example.text_a, + example.text_b, + add_special_tokens=True, + max_length=max_length, + return_token_type_ids=True, + truncation=True, + ) + input_ids, token_type_ids = inputs["input_ids"], inputs["token_type_ids"] + # The mask has 1 for real tokens and 0 for padding tokens. Only real + # tokens are attended to. + attention_mask = [1 if mask_padding_with_zero else 0] * len(input_ids) + + # Zero-pad up to the sequence length. + seq_length = len(input_ids) + padding_length = max_length - len(input_ids) + + input_ids = input_ids + ([pad_token] * padding_length) + attention_mask = attention_mask + \ + ([0 if mask_padding_with_zero else 1] * padding_length) + token_type_ids = token_type_ids + ([pad_token_segment_id] * padding_length) + + assert len(input_ids) == max_length, \ + "Error with input_ids length {} vs {}".format( + len(input_ids), max_length) + assert len(attention_mask) == max_length, \ + "Error with attention_mask length {} vs {}".format( + len(attention_mask), max_length + ) + assert len(token_type_ids) == max_length, \ + "Error with token_type_ids length {} vs {}".format( + len(token_type_ids), max_length + ) + if output_mode == "classification": + label = label_map[example.label] + elif output_mode == "regression": + label = float(example.label) + else: + raise KeyError(output_mode) + + feats = InputFeatures( + input_ids=input_ids, + attention_mask=attention_mask, + token_type_ids=token_type_ids, + label=label, + seq_length=seq_length, + ) + features.append(feats) + return features + +@dataclass(frozen=True) +class InputFeatures: + """ + A single set of features of data. + Property names are the same names as the corresponding inputs to a model. + Args: + input_ids: Indices of input sequence tokens in the vocabulary. + attention_mask: Mask to avoid performing attention on padding token indices. + Mask values selected in ``[0, 1]``: Usually ``1`` for tokens that are NOT MASKED, + ``0`` for MASKED (padded) tokens. + token_type_ids: (Optional) Segment token indices to indicate first and second + portions of the inputs. Only some models use them. + label: (Optional) Label corresponding to the input. Int for classification problems, + float for regression problems. + seq_length: (Optional) The length of input sequence before padding. + """ + + input_ids: List[int] + attention_mask: Optional[List[int]] = None + token_type_ids: Optional[List[int]] = None + label: Optional[Union[int, float]] = None + seq_length: Optional[List[int]] = None + +class ONNXRTGLUE: + """Computes GLUE score. + + Args: + task (str, default=mrpc): The name of the task. + Choices include mrpc, qqp, qnli, rte, + sts-b, cola, mnli, wnli. + + """ + def __init__(self, task="mrpc"): + assert task in ["mrpc", "qqp", "qnli", "rte", "sts-b", "cola", \ + "mnli", "wnli", "sst-2"], "Unsupported task type" + self.pred_list = None + self.label_list = None + self.task = task + self.return_key = { + "cola": "mcc", + "mrpc": "acc", + "sts-b": "corr", + "qqp": "acc", + "mnli": "mnli/acc", + "qnli": "acc", + "rte": "acc", + "wnli": "acc", + "sst-2": "acc" + } + + def update(self, preds, labels): + """add preds and labels to storage""" + if isinstance(preds, list) and len(preds) == 1: + preds = preds[0] + if isinstance(labels, list) and len(labels) == 1: + labels = labels[0] + if self.pred_list is None: + self.pred_list = preds + self.label_list = labels + else: + self.pred_list = np.append(self.pred_list, preds, axis=0) + self.label_list = np.append(self.label_list, labels, axis=0) + + def reset(self): + """clear preds and labels storage""" + self.pred_list = None + self.label_list = None + + def result(self): + """calculate metric""" + output_mode = transformers.glue_output_modes[self.task] + + if output_mode == "classification": + processed_preds = np.argmax(self.pred_list, axis=1) + elif output_mode == "regression": + processed_preds = np.squeeze(self.pred_list) + result = transformers.glue_compute_metrics(\ + self.task, processed_preds, self.label_list) + return result[self.return_key[self.task]] + +if __name__ == "__main__": + logger.info("Evaluating ONNXRuntime full precision accuracy and performance:") + parser = argparse.ArgumentParser( + description="BERT fine-tune examples for classification/regression tasks.", + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument( + "--model_path", + type=str, + help="Pre-trained model on onnx file" + ) + parser.add_argument( + "--benchmark", + action="store_true", \ + default=False + ) + parser.add_argument( + "--tune", + action="store_true", \ + default=False, + help="whether quantize the model" + ) + parser.add_argument( + "--output_model", + type=str, + help="output model path" + ) + parser.add_argument( + "--mode", + type=str, + help="benchmark mode of performance or accuracy" + ) + parser.add_argument( + "--model_name_or_path", + type=str, + help="pretrained model name or path" + ) + parser.add_argument( + "--data_path", + type=str, + help="input data path" + ) + parser.add_argument( + "--batch_size", + default=8, + type=int, + ) + parser.add_argument( + "--task", + type=str, + default="mrpc", + choices=["mrpc", "qqp", "qnli", "rte", "sts-b", "cola", \ + "mnli", "wnli", "sst-2"], + help="GLUE task name" + ) + parser.add_argument( + "--dynamic_length", + type=bool, + default=False, + help="dynamic length" + ) + parser.add_argument( + "--max_seq_length", + type=int, + default=128, + help="max sequence length" + ) + parser.add_argument( + "--model_type", + type=str, + default="bert", + choices=["distilbert", "bert", "mobilebert", "roberta"], + help="model type" + ) + args = parser.parse_args() + + dataset = ONNXRTBertDataset(args.model_path, + data_dir=args.data_path, + model_name_or_path=args.model_name_or_path, + max_seq_length=args.max_seq_length, + task=args.task, + model_type=args.model_type, + dynamic_length=args.dynamic_length) + dataloader = DefaultDataLoader(dataset, args.batch_size) + metric = ONNXRTGLUE(args.task) + + def eval_func(model): + metric.reset() + session = onnxruntime.InferenceSession(model.SerializeToString(), + providers=onnxruntime.get_available_providers()) + ort_inputs = {} + len_inputs = len(session.get_inputs()) + inputs_names = [session.get_inputs()[i].name for i in range(len_inputs)] + for idx, (inputs, labels) in enumerate(dataloader): + if not isinstance(labels, list): + labels = [labels] + inputs = inputs[:len_inputs] + for i in range(len_inputs): + ort_inputs.update({inputs_names[i]: inputs[i]}) + predictions = session.run(None, ort_inputs) + metric.update(predictions[0], labels) + return metric.result() + + if args.benchmark: + model = onnx.load(args.model_path) + if args.mode == "performance": + from neural_compressor.benchmark import fit + from neural_compressor.config import BenchmarkConfig + conf = BenchmarkConfig(iteration=100, + cores_per_instance=4, + num_of_instance=1) + fit(model, conf, b_dataloader=dataloader) + elif args.mode == "accuracy": + acc_result = eval_func(model) + print("Batch size = %d" % args.batch_size) + print("Accuracy: %.5f" % acc_result) + + if args.tune: + if onnxruntime.__version__ <= '1.13.1': + from onnxruntime.transformers import optimizer + from onnxruntime.transformers.fusion_options import FusionOptions + opt_options = FusionOptions("bert") + opt_options.enable_embed_layer_norm = False + + model_optimizer = optimizer.optimize_model( + args.model_path, + "bert", + num_heads=12, + hidden_size=768, + optimization_options=opt_options) + model = model_optimizer.model + else: + model = onnx.load(args.model_path) + + from neural_compressor import quantization, PostTrainingQuantConfig + config = PostTrainingQuantConfig(approach="dynamic") + q_model = quantization.fit(model, + config, + eval_func=eval_func) + q_model.save(args.output_model) + diff --git a/examples/onnxrt/nlp/roberta/quantization/ptq_dynamic/prepare_data.sh b/examples/onnxrt/nlp/roberta/quantization/ptq_dynamic/prepare_data.sh new file mode 100644 index 00000000000..8e434a5c521 --- /dev/null +++ b/examples/onnxrt/nlp/roberta/quantization/ptq_dynamic/prepare_data.sh @@ -0,0 +1,34 @@ +#!/bin/bash +set -x + +function main { + init_params "$@" + download_data + +} + +# init params +function init_params { + + for var in "$@" + do + case $var in + --data_dir=*) + data_dir=$(echo $var |cut -f2 -d=) + ;; + --task_name=*) + task_name=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_tuning +function download_data { + wget https://raw.githubusercontent.com/huggingface/transformers/f98ef14d161d7bcdc9808b5ec399981481411cc1/utils/download_glue_data.py + python download_glue_data.py --data_dir=${data_dir} --tasks=${task_name} +} + +main "$@" + diff --git a/examples/onnxrt/nlp/roberta/quantization/ptq_dynamic/prepare_model.sh b/examples/onnxrt/nlp/roberta/quantization/ptq_dynamic/prepare_model.sh new file mode 100644 index 00000000000..b2afbddae7f --- /dev/null +++ b/examples/onnxrt/nlp/roberta/quantization/ptq_dynamic/prepare_model.sh @@ -0,0 +1,41 @@ +#!/bin/bash +set -x + +function main { + init_params "$@" + export_model + +} + +# init params +function init_params { + + for var in "$@" + do + case $var in + --input_dir=*) + input_dir=$(echo $var |cut -f2 -d=) + ;; + --task_name=*) + task_name=$(echo $var |cut -f2 -d=) + ;; + --max_len=*) + max_len=$(echo $var |cut -f2 -d=) + ;; + --output_model=*) + output_model=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_tuning +function export_model { + # curl https://download.pytorch.org/tutorial/MRPC.zip --output MPRC.zip + # unzip -n MPRC.zip + python export.py --input_dir ${input_dir} --task_name ${task_name} --output_model ${output_model} +} + +main "$@" + diff --git a/examples/onnxrt/nlp/roberta/quantization/ptq_dynamic/requirements.txt b/examples/onnxrt/nlp/roberta/quantization/ptq_dynamic/requirements.txt new file mode 100644 index 00000000000..1fb753da72e --- /dev/null +++ b/examples/onnxrt/nlp/roberta/quantization/ptq_dynamic/requirements.txt @@ -0,0 +1,7 @@ +torch +transformers +onnx +onnxruntime +coloredlogs +sympy +onnxruntime-extensions; python_version < '3.10' diff --git a/examples/onnxrt/nlp/roberta/quantization/ptq_dynamic/run_benchmark.sh b/examples/onnxrt/nlp/roberta/quantization/ptq_dynamic/run_benchmark.sh new file mode 100644 index 00000000000..f0309360e14 --- /dev/null +++ b/examples/onnxrt/nlp/roberta/quantization/ptq_dynamic/run_benchmark.sh @@ -0,0 +1,62 @@ +#!/bin/bash +set -x + +function main { + + init_params "$@" + run_benchmark + +} + +# init params +function init_params { + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --mode=*) + mode=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + --batch_size=*) + batch_size=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_benchmark +function run_benchmark { + if [[ ${mode} == "accuracy" ]]; then + dynamic_length=False + elif [[ ${mode} == "performance" ]]; then + dynamic_length=True + else + echo "Error: No such mode: ${mode}" + exit 1 + fi + + model_name_or_path="roberta-base" + task_name="mrpc" + model_type="roberta" + + python main.py \ + --model_path ${input_model} \ + --model_name_or_path ${model_name_or_path} \ + --data_path ${dataset_location} \ + --task ${task_name} \ + --batch_size ${batch_size} \ + --model_type ${model_type} \ + --mode ${mode} \ + --dynamic_length ${dynamic_length} \ + --benchmark + +} + +main "$@" + diff --git a/examples/onnxrt/nlp/roberta/quantization/ptq_dynamic/run_tuning.sh b/examples/onnxrt/nlp/roberta/quantization/ptq_dynamic/run_tuning.sh new file mode 100644 index 00000000000..1d402dfb402 --- /dev/null +++ b/examples/onnxrt/nlp/roberta/quantization/ptq_dynamic/run_tuning.sh @@ -0,0 +1,49 @@ +#!/bin/bash +set -x + +function main { + init_params "$@" + run_tuning +} + +# init params +function init_params { + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --output_model=*) + output_model=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_tuning +function run_tuning { + model_name_or_path="roberta-base" + batch_size=8 + task_name="mrpc" + model_type="roberta" + + python main.py \ + --model_path ${input_model} \ + --output_model ${output_model} \ + --model_name_or_path ${model_name_or_path} \ + --data_path ${dataset_location} \ + --task ${task_name} \ + --batch_size ${batch_size} \ + --model_type ${model_type} \ + --tune +} + +main "$@" + + + diff --git a/examples/onnxrt/nlp/roberta/quantization/ptq_static/README.md b/examples/onnxrt/nlp/roberta/quantization/ptq_static/README.md new file mode 100644 index 00000000000..f58b3f19c27 --- /dev/null +++ b/examples/onnxrt/nlp/roberta/quantization/ptq_static/README.md @@ -0,0 +1,82 @@ +Step-by-Step +============ + +This example load a RoBERTa model and confirm its accuracy and speed based on [GLUE data](https://gluebenchmark.com/). + +# Prerequisite + +## 1. Environment + +```shell +pip install neural-compressor +pip install -r requirements.txt +``` +> Note: Validated ONNX Runtime [Version](/docs/source/installation_guide.md#validated-software-environment). + +## 2. Prepare Dataset + +download the GLUE data with `prepare_data.sh` script. +```shell +export GLUE_DIR=path/to/glue_data +export TASK_NAME=MRPC + +bash prepare_data.sh --data_dir=$GLUE_DIR --task_name=$TASK_NAME +``` + +## 3. Prepare Model + +Please refer to [Bert-GLUE_OnnxRuntime_quantization guide](https://github.com/microsoft/onnxruntime/blob/master/onnxruntime/python/tools/quantization/notebooks/Bert-GLUE_OnnxRuntime_quantization.ipynb) for detailed model export. The following is a simple example. + +Use [Huggingface Transformers](https://github.com/huggingface/transformers/tree/v2.2.1) to fine-tune the model based on the [MRPC](https://github.com/huggingface/transformers/tree/master/examples/text-classification#mrpc) example with command like: +```shell +export OUT_DIR=/path/to/out_dir/ +python ./run_glue.py \ + --model_type roberta \ + --model_name_or_path roberta-base \ + --task_name $TASK_NAME \ + --do_train \ + --do_eval \ + --do_lower_case \ + --data_dir $GLUE_DIR/$TASK_NAME \ + --max_seq_length 128 \ + --per_gpu_eval_batch_size=8 \ + --per_gpu_train_batch_size=8 \ + --learning_rate 2e-5 \ + --num_train_epochs 3.0 \ + --save_steps 100000 \ + --output_dir $OUT_DIR +``` +Run the `prepare_model.sh` script + +Usage: +```shell +cd examples/onnxrt/language_translation/mobilebert/ + +bash prepare_model.sh --input_dir=$OUT_DIR \ + --task_name=$TASK_NAME \ + --output_model=path/to/model # model path as *.onnx +``` + +# Run + +## 1. Quantization + +Static quantization with QDQ format: + +```bash +bash run_tuning.sh --input_model=path/to/model \ # model path as *.onnx + --output_model=path/to/model_tune \ # model path as *.onnx + --dataset_location=path/to/glue_data \ + --quant_format="QDQ" +``` + +## 2. Benchmark + +```bash +bash run_benchmark.sh --input_model=path/to/model \ # model path as *.onnx + --dataset_location=path/to/glue_data \ + --batch_size=batch_size \ + --mode=performance # or accuracy +``` + + diff --git a/examples/onnxrt/nlp/roberta/quantization/ptq_static/export.py b/examples/onnxrt/nlp/roberta/quantization/ptq_static/export.py new file mode 100644 index 00000000000..b25d436dd3c --- /dev/null +++ b/examples/onnxrt/nlp/roberta/quantization/ptq_static/export.py @@ -0,0 +1,61 @@ +import argparse + +import torch +from transformers import RobertaForSequenceClassification + +def export_onnx_model(args, model, onnx_model_path): + with torch.no_grad(): + inputs = {'input_ids': torch.ones(1,args.max_len, dtype=torch.int64), + 'attention_mask': torch.ones(1,args.max_len, dtype=torch.int64)} + outputs = model(**inputs) + + symbolic_names = {0: 'batch_size', 1: 'max_seq_len'} + torch.onnx.export(model, # model being run + (inputs['input_ids'], + inputs['attention_mask']), # model input (or a tuple for + # multiple inputs) + onnx_model_path, # where to save the model (can be a file + # or file-like object) + opset_version=11, # the ONNX version to export the model + do_constant_folding=True, # whether to execute constant folding + input_names=['input_ids', # the model's input names + 'input_mask'], + output_names=['output'], # the model's output names + dynamic_axes={'input_ids': symbolic_names, # variable length axes + 'input_mask' : symbolic_names}) + print("ONNX Model exported to {0}".format(onnx_model_path)) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description='Export bert onnx model', + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument( + '--input_dir', + type=str, + help='input_dir of bert model, must contain config.json') + parser.add_argument( + '--task_name', + type=str, + choices=["MRPC", "MNLI"], + help='tasks names of bert model') + parser.add_argument( + '--max_len', + type=int, + default=128, + help='Maximum length of the sentence pairs') + parser.add_argument( + '--do_lower_case', + type=bool, + default=True, + help='whether lower the tokenizer') + parser.add_argument( + '--output_model', + type=str, + default='bert.onnx', + help='path to exported model file') + args = parser.parse_args() + + model = RobertaForSequenceClassification.from_pretrained(args.input_dir) + print(model) + # export_onnx_model(args, model, args.output_model) diff --git a/examples/onnxrt/nlp/roberta/quantization/ptq_static/main.py b/examples/onnxrt/nlp/roberta/quantization/ptq_static/main.py new file mode 100644 index 00000000000..1e3435af7dc --- /dev/null +++ b/examples/onnxrt/nlp/roberta/quantization/ptq_static/main.py @@ -0,0 +1,422 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# pylint:disable=redefined-outer-name,logging-format-interpolation + +import logging +import argparse +import os +import onnx +import onnxruntime +import transformers +import torch +import numpy as np +from dataclasses import dataclass +from typing import List, Optional, Union +from neural_compressor.data.dataloaders.onnxrt_dataloader import DefaultDataLoader + +logger = logging.getLogger(__name__) +logging.basicConfig(format = "%(asctime)s - %(levelname)s - %(name)s - %(message)s", + datefmt = "%m/%d/%Y %H:%M:%S", + level = logging.WARN) + +class ONNXRTBertDataset: + """Dataset used for model Bert. + Args: data_dir (str): The input data dir. + model_name_or_path (str): Path to pre-trained student model or shortcut name, + selected in the list: + max_seq_length (int, default=128): The maximum length after tokenization. + Sequences longer than this will be truncated, + sequences shorter will be padded. + do_lower_case (bool, default=True): Whether to lowercase the input when tokenizing. + task (str, default=mrpc): The name of the task to fine-tune. + Choices include mrpc, qqp, qnli, rte, + sts-b, cola, mnli, wnli. + model_type (str, default="bert"): model type, support "distilbert", "bert", + "mobilebert", "roberta". + dynamic_length (bool, default=False): Whether to use fixed sequence length. + evaluate (bool, default=True): Whether do evaluation or training. + transform (transform object, default=None): transform to process input data. + filter (Filter objects, default=None): filter out examples according + to specific conditions. + """ + def __init__(self, model, data_dir, model_name_or_path, max_seq_length=128,\ + do_lower_case=True, task="mrpc", model_type="bert", dynamic_length=False,\ + evaluate=True, transform=None, filter=None): + self.inputs = [inp.name for inp in onnx.load(model).graph.input] + task = task.lower() + model_type = model_type.lower() + assert task in ["mrpc", "qqp", "qnli", "rte", "sts-b", "cola", \ + "mnli", "wnli", "sst-2"], "Unsupported task type" + assert model_type in ["distilbert", "bert", "mobilebert", "roberta"], "Unsupported \ + model type" + self.dynamic_length = dynamic_length + self.model_type = model_type + self.max_seq_length = max_seq_length + tokenizer = transformers.AutoTokenizer.from_pretrained(model_name_or_path, + do_lower_case=do_lower_case) + self.dataset = load_and_cache_examples(data_dir, model_name_or_path, \ + max_seq_length, task, model_type, tokenizer, evaluate) + + def __len__(self): + return len(self.dataset) + + def __getitem__(self, index): + batch = tuple(t.detach().cpu().numpy() if not isinstance(t, np.ndarray) else t for t in self.dataset[index]) + return batch[:len(self.inputs)], batch[-1] + +def load_and_cache_examples(data_dir, model_name_or_path, max_seq_length, task, \ + model_type, tokenizer, evaluate): + from torch.utils.data import TensorDataset + + processor = transformers.glue_processors[task]() + output_mode = transformers.glue_output_modes[task] + # Load data features from cache or dataset file + if not os.path.exists("./dataset_cached"): + os.makedirs("./dataset_cached") + cached_features_file = os.path.join("./dataset_cached", "cached_{}_{}_{}_{}".format( + "dev" if evaluate else "train", + list(filter(None, model_name_or_path.split("/"))).pop(), + str(max_seq_length), + str(task))) + if os.path.exists(cached_features_file): + logger.info("Load features from cached file {}.".format(cached_features_file)) + features = torch.load(cached_features_file) + else: + logger.info("Create features from dataset file at {}.".format(data_dir)) + label_list = processor.get_labels() + examples = processor.get_dev_examples(data_dir) if evaluate else \ + processor.get_train_examples(data_dir) + features = convert_examples_to_features(examples, + tokenizer, + task=task, + label_list=label_list, + max_length=max_seq_length, + output_mode=output_mode, + ) + logger.info("Save features into cached file {}.".format(cached_features_file)) + torch.save(features, cached_features_file) + # Convert to Tensors and build dataset + all_input_ids = torch.tensor([f.input_ids for f in features], dtype=torch.long) + all_attention_mask = torch.tensor([f.attention_mask for f in features], dtype=torch.long) + all_token_type_ids = torch.tensor([f.token_type_ids for f in features], dtype=torch.long) + all_seq_lengths = torch.tensor([f.seq_length for f in features], dtype=torch.long) + if output_mode == "classification": + all_labels = torch.tensor([f.label for f in features], dtype=torch.long) + elif output_mode == "regression": + all_labels = torch.tensor([f.label for f in features], dtype=torch.float) + dataset = TensorDataset(all_input_ids, all_attention_mask, all_token_type_ids, \ + all_seq_lengths, all_labels) + return dataset + +def convert_examples_to_features( + examples, + tokenizer, + max_length=128, + task=None, + label_list=None, + output_mode="classification", + pad_token=0, + pad_token_segment_id=0, + mask_padding_with_zero=True, +): + processor = transformers.glue_processors[task]() + if label_list is None: + label_list = processor.get_labels() + logger.info("Use label list {} for task {}.".format(label_list, task)) + label_map = {label: i for i, label in enumerate(label_list)} + features = [] + for (ex_index, example) in enumerate(examples): + inputs = tokenizer.encode_plus( + example.text_a, + example.text_b, + add_special_tokens=True, + max_length=max_length, + return_token_type_ids=True, + truncation=True, + ) + input_ids, token_type_ids = inputs["input_ids"], inputs["token_type_ids"] + # The mask has 1 for real tokens and 0 for padding tokens. Only real + # tokens are attended to. + attention_mask = [1 if mask_padding_with_zero else 0] * len(input_ids) + + # Zero-pad up to the sequence length. + seq_length = len(input_ids) + padding_length = max_length - len(input_ids) + + input_ids = input_ids + ([pad_token] * padding_length) + attention_mask = attention_mask + \ + ([0 if mask_padding_with_zero else 1] * padding_length) + token_type_ids = token_type_ids + ([pad_token_segment_id] * padding_length) + + assert len(input_ids) == max_length, \ + "Error with input_ids length {} vs {}".format( + len(input_ids), max_length) + assert len(attention_mask) == max_length, \ + "Error with attention_mask length {} vs {}".format( + len(attention_mask), max_length + ) + assert len(token_type_ids) == max_length, \ + "Error with token_type_ids length {} vs {}".format( + len(token_type_ids), max_length + ) + if output_mode == "classification": + label = label_map[example.label] + elif output_mode == "regression": + label = float(example.label) + else: + raise KeyError(output_mode) + + feats = InputFeatures( + input_ids=input_ids, + attention_mask=attention_mask, + token_type_ids=token_type_ids, + label=label, + seq_length=seq_length, + ) + features.append(feats) + return features + +@dataclass(frozen=True) +class InputFeatures: + """ + A single set of features of data. + Property names are the same names as the corresponding inputs to a model. + Args: + input_ids: Indices of input sequence tokens in the vocabulary. + attention_mask: Mask to avoid performing attention on padding token indices. + Mask values selected in ``[0, 1]``: Usually ``1`` for tokens that are NOT MASKED, + ``0`` for MASKED (padded) tokens. + token_type_ids: (Optional) Segment token indices to indicate first and second + portions of the inputs. Only some models use them. + label: (Optional) Label corresponding to the input. Int for classification problems, + float for regression problems. + seq_length: (Optional) The length of input sequence before padding. + """ + + input_ids: List[int] + attention_mask: Optional[List[int]] = None + token_type_ids: Optional[List[int]] = None + label: Optional[Union[int, float]] = None + seq_length: Optional[List[int]] = None + +class ONNXRTGLUE: + """Computes GLUE score. + + Args: + task (str, default=mrpc): The name of the task. + Choices include mrpc, qqp, qnli, rte, + sts-b, cola, mnli, wnli. + + """ + def __init__(self, task="mrpc"): + assert task in ["mrpc", "qqp", "qnli", "rte", "sts-b", "cola", \ + "mnli", "wnli", "sst-2"], "Unsupported task type" + self.pred_list = None + self.label_list = None + self.task = task + self.return_key = { + "cola": "mcc", + "mrpc": "acc", + "sts-b": "corr", + "qqp": "acc", + "mnli": "mnli/acc", + "qnli": "acc", + "rte": "acc", + "wnli": "acc", + "sst-2": "acc" + } + + def update(self, preds, labels): + """add preds and labels to storage""" + if isinstance(preds, list) and len(preds) == 1: + preds = preds[0] + if isinstance(labels, list) and len(labels) == 1: + labels = labels[0] + if self.pred_list is None: + self.pred_list = preds + self.label_list = labels + else: + self.pred_list = np.append(self.pred_list, preds, axis=0) + self.label_list = np.append(self.label_list, labels, axis=0) + + def reset(self): + """clear preds and labels storage""" + self.pred_list = None + self.label_list = None + + def result(self): + """calculate metric""" + output_mode = transformers.glue_output_modes[self.task] + + if output_mode == "classification": + processed_preds = np.argmax(self.pred_list, axis=1) + elif output_mode == "regression": + processed_preds = np.squeeze(self.pred_list) + result = transformers.glue_compute_metrics(\ + self.task, processed_preds, self.label_list) + return result[self.return_key[self.task]] + +if __name__ == "__main__": + logger.info("Evaluating ONNXRuntime full precision accuracy and performance:") + parser = argparse.ArgumentParser( + description="BERT fine-tune examples for classification/regression tasks.", + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument( + "--model_path", + type=str, + help="Pre-trained model on onnx file" + ) + parser.add_argument( + "--benchmark", + action="store_true", \ + default=False + ) + parser.add_argument( + "--tune", + action="store_true", \ + default=False, + help="whether quantize the model" + ) + parser.add_argument( + "--output_model", + type=str, + help="output model path" + ) + parser.add_argument( + "--mode", + type=str, + help="benchmark mode of performance or accuracy" + ) + parser.add_argument( + "--model_name_or_path", + type=str, + help="pretrained model name or path" + ) + parser.add_argument( + "--data_path", + type=str, + help="input data path" + ) + parser.add_argument( + "--batch_size", + default=8, + type=int, + ) + parser.add_argument( + "--task", + type=str, + default="mrpc", + choices=["mrpc", "qqp", "qnli", "rte", "sts-b", "cola", \ + "mnli", "wnli", "sst-2"], + help="GLUE task name" + ) + parser.add_argument( + "--quant_format", + type=str, + default="QOperator", + choices=["QDQ", "QOperator"], + help="quantization format" + ) + parser.add_argument( + "--dynamic_length", + type=bool, + default=False, + help="dynamic length" + ) + parser.add_argument( + "--max_seq_length", + type=int, + default=128, + help="max sequence length" + ) + parser.add_argument( + "--model_type", + type=str, + default="bert", + choices=["distilbert", "bert", "mobilebert", "roberta"], + help="model type" + ) + args = parser.parse_args() + + dataset = ONNXRTBertDataset(args.model_path, + data_dir=args.data_path, + model_name_or_path=args.model_name_or_path, + max_seq_length=args.max_seq_length, + task=args.task, + model_type=args.model_type, + dynamic_length=args.dynamic_length) + dataloader = DefaultDataLoader(dataset, args.batch_size) + metric = ONNXRTGLUE(args.task) + + def eval_func(model): + metric.reset() + session = onnxruntime.InferenceSession(model.SerializeToString(), + providers=onnxruntime.get_available_providers()) + ort_inputs = {} + len_inputs = len(session.get_inputs()) + inputs_names = [session.get_inputs()[i].name for i in range(len_inputs)] + for idx, (inputs, labels) in enumerate(dataloader): + if not isinstance(labels, list): + labels = [labels] + inputs = inputs[:len_inputs] + for i in range(len_inputs): + ort_inputs.update({inputs_names[i]: inputs[i]}) + predictions = session.run(None, ort_inputs) + metric.update(predictions[0], labels) + return metric.result() + + if args.benchmark: + model = onnx.load(args.model_path) + if args.mode == "performance": + from neural_compressor.benchmark import fit + from neural_compressor.config import BenchmarkConfig + conf = BenchmarkConfig(iteration=100, + cores_per_instance=4, + num_of_instance=1) + fit(model, conf, b_dataloader=dataloader) + elif args.mode == "accuracy": + acc_result = eval_func(model) + print("Batch size = %d" % args.batch_size) + print("Accuracy: %.5f" % acc_result) + + if args.tune: + if onnxruntime.__version__ <= '1.13.1': + from onnxruntime.transformers import optimizer + from onnxruntime.transformers.fusion_options import FusionOptions + opt_options = FusionOptions("bert") + opt_options.enable_embed_layer_norm = False + + model_optimizer = optimizer.optimize_model( + args.model_path, + "bert", + num_heads=12, + hidden_size=768, + optimization_options=opt_options) + model = model_optimizer.model + else: + model = onnx.load(args.model_path) + + from neural_compressor import quantization, PostTrainingQuantConfig + config = PostTrainingQuantConfig(approach="static", + quant_format=args.quant_format, + recipes={"optypes_to_exclude_output_quant": ["MatMul", "Gemm", "Attention", "FusedGemm"]}) + q_model = quantization.fit(model, + config, + eval_func=eval_func, + calib_dataloader=dataloader) + q_model.save(args.output_model) + diff --git a/examples/onnxrt/nlp/roberta/quantization/ptq_static/prepare_data.sh b/examples/onnxrt/nlp/roberta/quantization/ptq_static/prepare_data.sh new file mode 100644 index 00000000000..8e434a5c521 --- /dev/null +++ b/examples/onnxrt/nlp/roberta/quantization/ptq_static/prepare_data.sh @@ -0,0 +1,34 @@ +#!/bin/bash +set -x + +function main { + init_params "$@" + download_data + +} + +# init params +function init_params { + + for var in "$@" + do + case $var in + --data_dir=*) + data_dir=$(echo $var |cut -f2 -d=) + ;; + --task_name=*) + task_name=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_tuning +function download_data { + wget https://raw.githubusercontent.com/huggingface/transformers/f98ef14d161d7bcdc9808b5ec399981481411cc1/utils/download_glue_data.py + python download_glue_data.py --data_dir=${data_dir} --tasks=${task_name} +} + +main "$@" + diff --git a/examples/onnxrt/nlp/roberta/quantization/ptq_static/prepare_model.sh b/examples/onnxrt/nlp/roberta/quantization/ptq_static/prepare_model.sh new file mode 100644 index 00000000000..b2afbddae7f --- /dev/null +++ b/examples/onnxrt/nlp/roberta/quantization/ptq_static/prepare_model.sh @@ -0,0 +1,41 @@ +#!/bin/bash +set -x + +function main { + init_params "$@" + export_model + +} + +# init params +function init_params { + + for var in "$@" + do + case $var in + --input_dir=*) + input_dir=$(echo $var |cut -f2 -d=) + ;; + --task_name=*) + task_name=$(echo $var |cut -f2 -d=) + ;; + --max_len=*) + max_len=$(echo $var |cut -f2 -d=) + ;; + --output_model=*) + output_model=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_tuning +function export_model { + # curl https://download.pytorch.org/tutorial/MRPC.zip --output MPRC.zip + # unzip -n MPRC.zip + python export.py --input_dir ${input_dir} --task_name ${task_name} --output_model ${output_model} +} + +main "$@" + diff --git a/examples/onnxrt/nlp/roberta/quantization/ptq_static/requirements.txt b/examples/onnxrt/nlp/roberta/quantization/ptq_static/requirements.txt new file mode 100644 index 00000000000..1fb753da72e --- /dev/null +++ b/examples/onnxrt/nlp/roberta/quantization/ptq_static/requirements.txt @@ -0,0 +1,7 @@ +torch +transformers +onnx +onnxruntime +coloredlogs +sympy +onnxruntime-extensions; python_version < '3.10' diff --git a/examples/onnxrt/nlp/roberta/quantization/ptq_static/run_benchmark.sh b/examples/onnxrt/nlp/roberta/quantization/ptq_static/run_benchmark.sh new file mode 100644 index 00000000000..f0309360e14 --- /dev/null +++ b/examples/onnxrt/nlp/roberta/quantization/ptq_static/run_benchmark.sh @@ -0,0 +1,62 @@ +#!/bin/bash +set -x + +function main { + + init_params "$@" + run_benchmark + +} + +# init params +function init_params { + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --mode=*) + mode=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + --batch_size=*) + batch_size=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_benchmark +function run_benchmark { + if [[ ${mode} == "accuracy" ]]; then + dynamic_length=False + elif [[ ${mode} == "performance" ]]; then + dynamic_length=True + else + echo "Error: No such mode: ${mode}" + exit 1 + fi + + model_name_or_path="roberta-base" + task_name="mrpc" + model_type="roberta" + + python main.py \ + --model_path ${input_model} \ + --model_name_or_path ${model_name_or_path} \ + --data_path ${dataset_location} \ + --task ${task_name} \ + --batch_size ${batch_size} \ + --model_type ${model_type} \ + --mode ${mode} \ + --dynamic_length ${dynamic_length} \ + --benchmark + +} + +main "$@" + diff --git a/examples/onnxrt/nlp/roberta/quantization/ptq_static/run_tuning.sh b/examples/onnxrt/nlp/roberta/quantization/ptq_static/run_tuning.sh new file mode 100644 index 00000000000..bd9bfdbaa8b --- /dev/null +++ b/examples/onnxrt/nlp/roberta/quantization/ptq_static/run_tuning.sh @@ -0,0 +1,53 @@ +#!/bin/bash +set -x + +function main { + init_params "$@" + run_tuning +} + +# init params +function init_params { + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --output_model=*) + output_model=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + --quant_format=*) + quant_format=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_tuning +function run_tuning { + model_name_or_path="roberta-base" + batch_size=8 + task_name="mrpc" + model_type="roberta" + + python main.py \ + --model_path ${input_model} \ + --output_model ${output_model} \ + --model_name_or_path ${model_name_or_path} \ + --data_path ${dataset_location} \ + --task ${task_name} \ + --batch_size ${batch_size} \ + --quant_format ${quant_format} \ + --model_type ${model_type} \ + --tune +} + +main "$@" + + + diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/DUC/quantization/ptq/README.md b/examples/onnxrt/object_detection/onnx_model_zoo/DUC/quantization/ptq_static/README.md similarity index 65% rename from examples/onnxrt/object_detection/onnx_model_zoo/DUC/quantization/ptq/README.md rename to examples/onnxrt/object_detection/onnx_model_zoo/DUC/quantization/ptq_static/README.md index 6ecc8276ac8..4c469a721e4 100644 --- a/examples/onnxrt/object_detection/onnx_model_zoo/DUC/quantization/ptq/README.md +++ b/examples/onnxrt/object_detection/onnx_model_zoo/DUC/quantization/ptq_static/README.md @@ -21,24 +21,35 @@ wget https://github.com/onnx/models/raw/main/vision/object_detection_segmentatio ``` ## 3. Prepare Dataset -Download SQuAD dataset from [SQuAD dataset link](https://rajpurkar.github.io/SQuAD-explorer/). +Download dataset [cityscapes dataset](https://www.cityscapes-dataset.com/downloads/). + +Dataset directories: + +```bash +cityscapes +├── gtFine +| └── val +├── leftImg8bit +| └── val +``` # Run ## 1. Quantization -Quantize model with QLinearOps: +Static quantization with QOperator format: ```bash bash run_tuning.sh --input_model=path/to/model \ # model path as *.onnx - --dataset_location=/path/to/leftImg8bit/val \ - --output_model=path/to/save + --output_model=path/to/save \ # model path as *.onnx + --dataset_location=/path/to/cityscapes/leftImg8bit/val \ + --quant_format="QOperator" ``` ## 2. Benchmark ```bash bash run_benchmark.sh --input_model=path/to/model \ # model path as *.onnx - --dataset_location=/path/to/leftImg8bit/val \ + --dataset_location=/path/to/cityscapes/leftImg8bit/val \ --mode=performance ``` diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/DUC/quantization/ptq/cityscapes_labels.py b/examples/onnxrt/object_detection/onnx_model_zoo/DUC/quantization/ptq_static/cityscapes_labels.py similarity index 100% rename from examples/onnxrt/object_detection/onnx_model_zoo/DUC/quantization/ptq/cityscapes_labels.py rename to examples/onnxrt/object_detection/onnx_model_zoo/DUC/quantization/ptq_static/cityscapes_labels.py diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/DUC/quantization/ptq/main.py b/examples/onnxrt/object_detection/onnx_model_zoo/DUC/quantization/ptq_static/main.py similarity index 94% rename from examples/onnxrt/object_detection/onnx_model_zoo/DUC/quantization/ptq/main.py rename to examples/onnxrt/object_detection/onnx_model_zoo/DUC/quantization/ptq_static/main.py index 3b85e5794d5..fc7acabea2b 100644 --- a/examples/onnxrt/object_detection/onnx_model_zoo/DUC/quantization/ptq/main.py +++ b/examples/onnxrt/object_detection/onnx_model_zoo/DUC/quantization/ptq_static/main.py @@ -73,6 +73,18 @@ type=str, help="benchmark mode of performance or accuracy" ) +parser.add_argument( + '--quant_format', + type=str, + choices=['QOperator', 'QDQ'], + help="quantization format" +) +parser.add_argument( + '--batch_size', + type=int, + default=1, + help="quantization format" +) args = parser.parse_args() crop_sz = (800, 800) cell_width = 2 @@ -206,15 +218,15 @@ def result(self): if __name__ == "__main__": model = onnx.load(args.model_path) - batch_size = 1 args.data_path = args.data_path.replace('\\', '/') label_path = os.path.join(args.data_path.split('/leftImg8bit/val')[0], 'gtFine', 'val') - dataloader = Dataloader(args.data_path, label_path, batch_size=batch_size) + dataloader = Dataloader(args.data_path, label_path, batch_size=args.batch_size) metric = IoU() def eval_func(model): metric.reset() - session = ort.InferenceSession(model.SerializeToString(), None) + session = ort.InferenceSession(model.SerializeToString(), + providers=ort.get_available_providers()) ort_inputs = {} len_inputs = len(session.get_inputs()) inputs_names = [session.get_inputs()[i].name for i in range(len_inputs)] @@ -249,14 +261,16 @@ def eval_func(model): fit(model, conf, b_dataloader=dataloader) elif args.mode == 'accuracy': acc_result = eval_func(model) - print("Batch size = %d" % batch_size) + print("Batch size = %d" % args.batch_size) print("Accuracy: %.5f" % acc_result) if args.tune: from neural_compressor import quantization, PostTrainingQuantConfig from neural_compressor.config import AccuracyCriterion - accuracy_criterion = AccuracyCriterion(higher_is_better=False, criterion='absolute') + accuracy_criterion = AccuracyCriterion() + accuracy_criterion.absolute = 0.01 config = PostTrainingQuantConfig(approach='static', + quant_format=args.quant_format, accuracy_criterion=accuracy_criterion) q_model = quantization.fit(model, config, calib_dataloader=dataloader, eval_func=eval_func) q_model.save(args.output_model) diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/DUC/quantization/ptq/requirements.txt b/examples/onnxrt/object_detection/onnx_model_zoo/DUC/quantization/ptq_static/requirements.txt similarity index 100% rename from examples/onnxrt/object_detection/onnx_model_zoo/DUC/quantization/ptq/requirements.txt rename to examples/onnxrt/object_detection/onnx_model_zoo/DUC/quantization/ptq_static/requirements.txt diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/DUC/quantization/ptq/run_benchmark.sh b/examples/onnxrt/object_detection/onnx_model_zoo/DUC/quantization/ptq_static/run_benchmark.sh similarity index 91% rename from examples/onnxrt/object_detection/onnx_model_zoo/DUC/quantization/ptq/run_benchmark.sh rename to examples/onnxrt/object_detection/onnx_model_zoo/DUC/quantization/ptq_static/run_benchmark.sh index 6ff7e63317d..56ec299e337 100644 --- a/examples/onnxrt/object_detection/onnx_model_zoo/DUC/quantization/ptq/run_benchmark.sh +++ b/examples/onnxrt/object_detection/onnx_model_zoo/DUC/quantization/ptq_static/run_benchmark.sh @@ -29,11 +29,12 @@ function init_params { # run_benchmark function run_benchmark { - + batch_size=1 python main.py \ --model_path ${input_model} \ --mode ${mode} \ --data_path ${dataset_location} \ + --batch_size ${batch_size} \ --benchmark } diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/DUC/quantization/ptq_static/run_tuning.sh b/examples/onnxrt/object_detection/onnx_model_zoo/DUC/quantization/ptq_static/run_tuning.sh new file mode 100644 index 00000000000..827cdbc2f4d --- /dev/null +++ b/examples/onnxrt/object_detection/onnx_model_zoo/DUC/quantization/ptq_static/run_tuning.sh @@ -0,0 +1,43 @@ +#!/bin/bash +set -x + +function main { + init_params "$@" + run_tuning + +} + +# init params +function init_params { + + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + --output_model=*) + output_model=$(echo $var |cut -f2 -d=) + ;; + --quant_format=*) + quant_format=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_tuning +function run_tuning { + python main.py \ + --model_path ${input_model} \ + --output_model ${output_model} \ + --data_path ${dataset_location} \ + --quant_format ${quant_format} \ + --tune +} + +main "$@" diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/faster_rcnn/quantization/ptq_static/README.md b/examples/onnxrt/object_detection/onnx_model_zoo/faster_rcnn/quantization/ptq_static/README.md new file mode 100644 index 00000000000..574eb950c85 --- /dev/null +++ b/examples/onnxrt/object_detection/onnx_model_zoo/faster_rcnn/quantization/ptq_static/README.md @@ -0,0 +1,71 @@ +Step-by-Step +============ + +This example load an object detection model converted from [ONNX Model Zoo](https://github.com/onnx/models) and confirm its accuracy and speed based on [MS COCO 2017 dataset](https://cocodataset.org/#download). + +# Prerequisite + +## 1. Environment + +```shell +pip install neural-compressor +pip install -r requirements.txt +``` +> Note: Validated ONNX Runtime [Version](/docs/source/installation_guide.md#validated-software-environment). + +## 2. Prepare Model + +Download model from [ONNX Model Zoo](https://github.com/onnx/models) + +```shell +wget https://github.com/onnx/models/raw/main/vision/object_detection_segmentation/faster-rcnn/model/FasterRCNN-12.onnx +``` + +## 3. Prepare Dataset + +Download [MS COCO 2017 dataset](https://cocodataset.org/#download). + +Dataset directories: + +```bash +coco2017 +├── annotations +| ├── instances_val2017.json +| └── ... +├── test2017 +├── train2017 +└── val2017 +``` + +# Run + +## 1. Quantization + +Static quantization with QOperator format: + +```bash +bash run_tuning.sh --input_model=path/to/model \ # model path as *.onnx + --output_model=path/to/save \ + --dataset_location=path/to/coco2017 \ # dataset path containing 'val2017' and 'annotations' folders + --label_path=label_map.yaml \ + --quant_format="QOperator" +``` + +Static quantization with QDQ format: + +```bash +bash run_tuning.sh --input_model=path/to/model \ # model path as *.onnx + --output_model=path/to/save \ + --dataset_location=path/to/coco2017 \ # dataset path containing 'val2017' and 'annotations' folders + --label_path=label_map.yaml \ + --quant_format="QDQ" +``` + +## 2. Benchmark + +```bash +bash run_benchmark.sh --input_model=path/to/model \ # model path as *.onnx + --dataset_location=path/to/coco2017 \ # dataset path containing 'val2017' and 'annotations' folders + --label_path=label_map.yaml \ + --mode=performance # or accuracy +``` diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/faster_rcnn/quantization/ptq_static/coco_label_map.py b/examples/onnxrt/object_detection/onnx_model_zoo/faster_rcnn/quantization/ptq_static/coco_label_map.py new file mode 100644 index 00000000000..1e88f8abad8 --- /dev/null +++ b/examples/onnxrt/object_detection/onnx_model_zoo/faster_rcnn/quantization/ptq_static/coco_label_map.py @@ -0,0 +1,103 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# + +"""The dict mapping category IDs to its names of labels.""" + +category_map = { + 1: 'person', + 2: 'bicycle', + 3: 'car', + 4: 'motorcycle', + 5: 'airplane', + 6: 'bus', + 7: 'train', + 8: 'truck', + 9: 'boat', + 10: 'traffic light', + 11: 'fire hydrant', + 13: 'stop sign', + 14: 'parking meter', + 15: 'bench', + 16: 'bird', + 17: 'cat', + 18: 'dog', + 19: 'horse', + 20: 'sheep', + 21: 'cow', + 22: 'elephant', + 23: 'bear', + 24: 'zebra', + 25: 'giraffe', + 27: 'backpack', + 28: 'umbrella', + 31: 'handbag', + 32: 'tie', + 33: 'suitcase', + 34: 'frisbee', + 35: 'skis', + 36: 'snowboard', + 37: 'sports ball', + 38: 'kite', + 39: 'baseball bat', + 40: 'baseball glove', + 41: 'skateboard', + 42: 'surfboard', + 43: 'tennis racket', + 44: 'bottle', + 46: 'wine glass', + 47: 'cup', + 48: 'fork', + 49: 'knife', + 50: 'spoon', + 51: 'bowl', + 52: 'banana', + 53: 'apple', + 54: 'sandwich', + 55: 'orange', + 56: 'broccoli', + 57: 'carrot', + 58: 'hot dog', + 59: 'pizza', + 60: 'donut', + 61: 'cake', + 62: 'chair', + 63: 'couch', + 64: 'potted plant', + 65: 'bed', + 67: 'dining table', + 70: 'toilet', + 72: 'tv', + 73: 'laptop', + 74: 'mouse', + 75: 'remote', + 76: 'keyboard', + 77: 'cell phone', + 78: 'microwave', + 79: 'oven', + 80: 'toaster', + 81: 'sink', + 82: 'refrigerator', + 84: 'book', + 85: 'clock', + 86: 'vase', + 87: 'scissors', + 88: 'teddy bear', + 89: 'hair drier', + 90: 'toothbrush' +} \ No newline at end of file diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/faster_rcnn/quantization/ptq_static/coco_tools.py b/examples/onnxrt/object_detection/onnx_model_zoo/faster_rcnn/quantization/ptq_static/coco_tools.py new file mode 100644 index 00000000000..2c57fd61302 --- /dev/null +++ b/examples/onnxrt/object_detection/onnx_model_zoo/faster_rcnn/quantization/ptq_static/coco_tools.py @@ -0,0 +1,672 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +"""Wrappers for third party pycocotools to be used within object_detection. +Note that nothing in this file is tensorflow related and thus cannot +be called directly as a slim metric, for example. +TODO(jonathanhuang): wrap as a slim metric in metrics.py +Usage example: given a set of images with ids in the list image_ids +and corresponding lists of numpy arrays encoding groundtruth (boxes and classes) +and detections (boxes, scores and classes), where elements of each list +correspond to detections/annotations of a single image, +then evaluation (in multi-class mode) can be invoked as follows: + groundtruth_dict = coco_tools.ExportGroundtruthToCOCO( + image_ids, groundtruth_boxes_list, groundtruth_classes_list, + max_num_classes, output_path=None) + detections_list = coco_tools.ExportDetectionsToCOCO( + image_ids, detection_boxes_list, detection_scores_list, + detection_classes_list, output_path=None) + groundtruth = coco_tools.COCOWrapper(groundtruth_dict) + detections = groundtruth.LoadAnnotations(detections_list) + evaluator = coco_tools.COCOEvalWrapper(groundtruth, detections, + agnostic_mode=False) + metrics = evaluator.ComputeMetrics() +""" + +import copy +import time + +import numpy as np + +from collections import OrderedDict +from neural_compressor.utils import logger +from pycocotools import coco +from pycocotools import cocoeval +from pycocotools import mask +from typing import Any, Dict, List, Set, Union + + +class COCOWrapper(coco.COCO): + """Wrapper for the pycocotools COCO class. + + Attributes: + dataset: a dictionary holding bounding box annotations in the COCO format. + detection_type: type of detections being wrapped. Can be one of ['bbox', + 'segmentation'] + """ + + def __init__(self, dataset: Dict[str, Any], detection_type: str = 'bbox'): + """Construct a COCOWrapper. + See http://mscoco.org/dataset/#format for a description of the format. + By default, the coco.COCO class constructor reads from a JSON file. + This function duplicates the same behavior but loads from a dictionary, + allowing us to perform evaluation without writing to external storage. + Args: + dataset: a dictionary holding bounding box annotations in the COCO format. + detection_type: type of detections being wrapped. Can be one of ['bbox', + 'segmentation'] + Raises: + ValueError: if detection_type is unsupported. + """ + supported_detection_types = ['bbox', 'segmentation'] + if detection_type not in supported_detection_types: + raise ValueError('Unsupported detection type: {}. ' + 'Supported values are: {}'.format( + detection_type, supported_detection_types)) + self._detection_type = detection_type + coco.COCO.__init__(self) + self.dataset = dataset + self.createIndex() + + def LoadAnnotations(self, annotations: list) -> coco.COCO: + """Load annotations dictionary into COCO datastructure. + See http://mscoco.org/dataset/#format for a description of the annotations + format. As above, this function replicates the default behavior of the API + but does not require writing to external storage. + Args: + annotations: python list holding object detection results where each + detection is encoded as a dict with required keys ['image_id', + 'category_id', 'score'] and one of ['bbox', 'segmentation'] based on + `detection_type`. + Returns: + a coco.COCO datastructure holding object detection annotations results + Raises: + ValueError: if (1) annotations is not a list or annotations do not + correspond to the images contained in self. + """ + results = coco.COCO() + results.dataset['images'] = [img for img in self.dataset['images']] + + logger.info("Load and prepare annotation results.") + tic = time.time() + + if not isinstance(annotations, list): + raise ValueError('annotations is not a list of objects') + annotation_img_ids = [ann['image_id'] for ann in annotations] + if (set(annotation_img_ids) != (set(annotation_img_ids) + & set(self.getImgIds()))): + raise ValueError('Results do not correspond to current coco set') + results.dataset['categories'] = copy.deepcopy( + self.dataset['categories']) + if self._detection_type == 'bbox': + for idx, ann in enumerate(annotations): + bb = ann['bbox'] + ann['area'] = bb[2] * bb[3] + ann['id'] = idx + 1 + ann['iscrowd'] = 0 + elif self._detection_type == 'segmentation': + for idx, ann in enumerate(annotations): + ann['area'] = mask.area(ann['segmentation']) + ann['bbox'] = mask.toBbox(ann['segmentation']) + ann['id'] = idx + 1 + ann['iscrowd'] = 0 + logger.info('DONE (t=%0.2fs)', (time.time() - tic)) + + results.dataset['annotations'] = annotations + results.createIndex() + return results + + +class COCOEvalWrapper(cocoeval.COCOeval): + """Wrapper for the pycocotools COCOeval class. + To evaluate, create two objects (groundtruth_dict and detections_list) + using the conventions listed at http://mscoco.org/dataset/#format. + Then call evaluation as follows: + groundtruth = coco_tools.COCOWrapper(groundtruth_dict) + detections = groundtruth.LoadAnnotations(detections_list) + evaluator = coco_tools.COCOEvalWrapper(groundtruth, detections, + agnostic_mode=False) + metrics = evaluator.ComputeMetrics() + """ + + def __init__(self, + groundtruth: coco.COCO = None, + detections: coco.COCO = None, + agnostic_mode = False, + iou_type: str = 'bbox', + iou_thrs: Union[str, float] = None, + map_points=None): + """Construct a COCOEvalWrapper. + Note that for the area-based metrics to be meaningful, detection and + groundtruth boxes must be in image coordinates measured in pixels. + Args: + groundtruth: a coco.COCO (or coco_tools.COCOWrapper) object holding + groundtruth annotations + detections: a coco.COCO (or coco_tools.COCOWrapper) object holding + detections + agnostic_mode: boolean (default: False). If True, evaluation ignores + class labels, treating all detections as proposals. + iou_thrs: Minimal value for intersection over union that allows to + make decision that prediction bounding box is true positive. + You can specify one float value between 0 to 1 or + string "05:0.05:0.95" for standard COCO thresholds. + iou_type: IOU type to use for evaluation. Supports `bbox` or `segm`. + map_points: The way to calculate mAP. 101 for 101-point interpolated AP, 11 for + 11-point interpolated AP, 0 for area under PR curve. + """ + cocoeval.COCOeval.__init__(self, + groundtruth, + detections, + iouType=iou_type) + if agnostic_mode: + self.params.useCats = 0 + if iou_thrs == '0.5:0.05:0.95': + self.params.iouThrs = np.linspace(.5, 0.95, int(np.round((0.95 - .5) / .05)) + 1, \ + endpoint=True) + elif isinstance(iou_thrs, float): + self.params.iouThrs = [iou_thrs] + + if map_points == 101: + self.params.recThrs = np.linspace(.0, 1.00, int(np.round((1.00 - .0) / .01)) + 1, \ + endpoint=True) + if map_points == 11: + self.params.recThrs = np.linspace(.0, 1.00, int(np.round((1.00 - .0) / .1)) + 1, \ + endpoint=True) + if map_points == 0: + self.params.recThrs = [-1] + + + def GetCategory(self, category_id: int) -> dict: + """Fetch dictionary holding category information given category id. + Args: + category_id: integer id + Returns: + dictionary holding 'id', 'name'. + """ + return self.cocoGt.cats[category_id] + + def GetAgnosticMode(self) -> bool: + """Return whether COCO Eval is configured to evaluate in agnostic mode.""" + return self.params.useCats == 0 + + def GetCategoryIdList(self) -> List[int]: + """Return the list of IDs of all valid categories.""" + return self.params.catIds + + def accumulate(self, p: cocoeval.Params = None): + """Accumulate evaluation results per image and store it to self.eval. + + Args: + p: input params for evaluation + """ + print('Accumulating evaluation results...') + tic = time.time() + if not self.evalImgs: + print('Please run evaluate() first') + # allows input customized parameters + if p is None: + p = self.params + p.catIds = p.catIds if p.useCats == 1 else [-1] + T = len(p.iouThrs) + R = len(p.recThrs) + K = len(p.catIds) if p.useCats else 1 + A = len(p.areaRng) + M = len(p.maxDets) + precision = -np.ones((T,R,K,A,M)) # -1 for the precision of absent categories + recall = -np.ones((T,K,A,M)) + scores = -np.ones((T,R,K,A,M)) + + # create dictionary for future indexing + _pe = self._paramsEval + print('-pe', _pe) + catIds = _pe.catIds if _pe.useCats else [-1] + setK = set(catIds) + setA = set(map(tuple, _pe.areaRng)) + setM = set(_pe.maxDets) + setI = set(_pe.imgIds) + # get inds to evaluate + k_list = [n for n, k in enumerate(p.catIds) if k in setK] + m_list = [m for n, m in enumerate(p.maxDets) if m in setM] + a_list = [n for n, a in enumerate(map(lambda x: tuple(x), p.areaRng)) if a in setA] + i_list = [n for n, i in enumerate(p.imgIds) if i in setI] + I0 = len(_pe.imgIds) + A0 = len(_pe.areaRng) + # retrieve E at each category, area range, and max number of detections + for k, k0 in enumerate(k_list): + Nk = k0*A0*I0 + for a, a0 in enumerate(a_list): + Na = a0*I0 + for m, maxDet in enumerate(m_list): + E = [self.evalImgs[Nk + Na + i] for i in i_list] + E = [e for e in E if not e is None] + if len(E) == 0: continue + dtScores = np.concatenate([e['dtScores'][0:maxDet] for e in E]) + + # different sorting method generates slightly different results. + # mergesort is used to be consistent as Matlab implementation. + inds = np.argsort(-dtScores, kind='mergesort') + dtScoresSorted = dtScores[inds] + + dtm = np.concatenate([e['dtMatches'][:,0:maxDet] for e in E], axis=1)[:,inds] + dtIg = np.concatenate([e['dtIgnore'][:,0:maxDet] for e in E], axis=1)[:,inds] + gtIg = np.concatenate([e['gtIgnore'] for e in E]) + npig = np.count_nonzero(gtIg==0 ) + if npig == 0: continue + tps = np.logical_and( dtm, np.logical_not(dtIg) ) + fps = np.logical_and(np.logical_not(dtm), np.logical_not(dtIg) ) + + tp_sum = np.cumsum(tps, axis=1).astype(dtype=np.float32) + fp_sum = np.cumsum(fps, axis=1).astype(dtype=np.float32) + for t, (tp, fp) in enumerate(zip(tp_sum, fp_sum)): + tp = np.array(tp) + fp = np.array(fp) + nd = len(tp) + rc = tp / npig + pr = tp / (fp+tp+np.spacing(1)) + + # calculate precision + if R == 1: + rc = np.concatenate(([0.], rc, [1.])) + pr = np.concatenate(([0.], pr, [0.])) + + # compute the precision envelope + for i in range(pr.size - 1, 0, -1): + pr[i - 1] = np.maximum(pr[i - 1], pr[i]) + + # to calculate area under PR curve, look for points + # where X axis (recall) changes value + change_point = np.where(rc[1:] != rc[:-1])[0] + # and sum (\Delta recall) * recall + res = np.sum((rc[change_point + 1] - rc[change_point]) \ + * pr[change_point + 1]) + precision[t,:,k,a,m] = np.array([res]) + else: + q = np.zeros((R,)) + + # numpy is slow without cython optimization for accessing elements + # use python array gets significant speed improvement + pr = pr.tolist(); q = q.tolist() + + for i in range(nd-1, 0, -1): + if pr[i] > pr[i-1]: + pr[i-1] = pr[i] + + inds = np.searchsorted(rc, p.recThrs, side='left') + try: + for ri, pi in enumerate(inds): + q[ri] = pr[pi] + except: + pass + precision[t,:,k,a,m] = np.array(q) + + # calculate recall + if nd: + recall[t,k,a,m] = rc[-1] + else: + recall[t,k,a,m] = 0 + + # calculate score + ss = np.zeros((R,)) + inds = np.searchsorted(rc, p.recThrs, side='left') + try: + for ri, pi in enumerate(inds): + ss[ri] = dtScoresSorted[pi] + except: + pass + scores[t,:,k,a,m] = np.array(ss) + # exit(0) + self.eval = { + 'params': p, + 'counts': [T, R, K, A, M], + 'precision': precision, + 'recall': recall, + 'scores': scores, + } + toc = time.time() + print('DONE (t={:0.2f}s).'.format( toc-tic)) + + + def ComputeMetrics(self, + include_metrics_per_category: bool = False, + all_metrics_per_category: bool = False): # pragma: no cover + """Compute detection metrics. + Args: + include_metrics_per_category: Whether include metrics per category. + all_metrics_per_category: Whether include all the summery metrics for + each category in per_category_ap. Be careful with setting it to true if + you have more than handful of categories, because it will pollute + your mldash. + Returns: + A tuple of (summary_metrics, per_category_ap), in which + (1) summary_metrics is a dictionary holding: + 'Precision/mAP': mean average precision over classes averaged over IOU + thresholds ranging from .5 to .95 with .05 increments; + 'Precision/mAP@.50IOU': mean average precision at 50% IOU; + 'Precision/mAP@.75IOU': mean average precision at 75% IOU; + 'Precision/mAP (small)': mean average precision for small objects + (area < 32^2 pixels); + 'Precision/mAP (medium)': mean average precision for medium sized + objects (32^2 pixels < area < 96^2 pixels); + 'Precision/mAP (large)': mean average precision for large objects + (96^2 pixels < area < 10000^2 pixels); + 'Recall/AR@1': average recall with 1 detection; + 'Recall/AR@10': average recall with 10 detections; + 'Recall/AR@100': average recall with 100 detections; + 'Recall/AR@100 (small)': average recall for small objects with 100 + detections; + 'Recall/AR@100 (medium)': average recall for medium objects with 100 + detections; + 'Recall/AR@100 (large)': average recall for large objects with 100 + detections; + and (2) per_category_ap is a dictionary holding category specific results with + keys of the form: 'Precision mAP ByCategory/category' + (without the supercategory part if no supercategories exist). + For backward compatibility 'PerformanceByCategory' is included in the + output regardless of all_metrics_per_category. If evaluating class-agnostic + mode, per_category_ap is an empty dictionary. + Raises: + ValueError: If category_stats does not exist. + """ + self.evaluate() + self.accumulate() + self.summarize() + + summary_metrics = OrderedDict([ + ('Precision/mAP', self.stats[0]), + ('Precision/mAP@.50IOU', self.stats[1]), + ('Precision/mAP@.75IOU', self.stats[2]), + ('Precision/mAP (small)', self.stats[3]), + ('Precision/mAP (medium)', self.stats[4]), + ('Precision/mAP (large)', self.stats[5]), + ('Recall/AR@1', self.stats[6]), ('Recall/AR@10', self.stats[7]), + ('Recall/AR@100', self.stats[8]), + ('Recall/AR@100 (small)', self.stats[9]), + ('Recall/AR@100 (medium)', self.stats[10]), + ('Recall/AR@100 (large)', self.stats[11]) + ]) + if not include_metrics_per_category: + return summary_metrics, {} + if not hasattr(self, 'category_stats'): + raise ValueError('Category stats do not exist') + per_category_ap = OrderedDict([]) + if self.GetAgnosticMode(): + return summary_metrics, per_category_ap + for category_index, category_id in enumerate(self.GetCategoryIdList()): + category = self.GetCategory(category_id)['name'] + # Kept for backward compatilbility + # pylint: disable=no-member + per_category_ap['PerformanceByCategory/mAP/{}'.format( + category)] = self.category_stats[0][category_index] + if all_metrics_per_category: + per_category_ap['Precision mAP ByCategory/{}'.format( + category)] = self.category_stats[0][category_index] + per_category_ap['Precision mAP@.50IOU ByCategory/{}'.format( + category)] = self.category_stats[1][category_index] + per_category_ap['Precision mAP@.75IOU ByCategory/{}'.format( + category)] = self.category_stats[2][category_index] + per_category_ap['Precision mAP (small) ByCategory/{}'.format( + category)] = self.category_stats[3][category_index] + per_category_ap['Precision mAP (medium) ByCategory/{}'.format( + category)] = self.category_stats[4][category_index] + per_category_ap['Precision mAP (large) ByCategory/{}'.format( + category)] = self.category_stats[5][category_index] + per_category_ap['Recall AR@1 ByCategory/{}'.format( + category)] = self.category_stats[6][category_index] + per_category_ap['Recall AR@10 ByCategory/{}'.format( + category)] = self.category_stats[7][category_index] + per_category_ap['Recall AR@100 ByCategory/{}'.format( + category)] = self.category_stats[8][category_index] + per_category_ap['Recall AR@100 (small) ByCategory/{}'.format( + category)] = self.category_stats[9][category_index] + per_category_ap['Recall AR@100 (medium) ByCategory/{}'.format( + category)] = self.category_stats[10][category_index] + per_category_ap['Recall AR@100 (large) ByCategory/{}'.format( + category)] = self.category_stats[11][category_index] + + return summary_metrics, per_category_ap + + +def _ConvertBoxToCOCOFormat(box): + """Convert a box in [ymin, xmin, ymax, xmax] format to COCO format. + This is a utility function for converting from our internal + [ymin, xmin, ymax, xmax] convention to the convention used by the COCO API + i.e., [xmin, ymin, width, height]. + Args: + box: a numpy array in format of [ymin, xmin, ymax, xmax] + Returns: + A list of floats, in COCO format, representing [xmin, ymin, width, height] + """ + return [ + float(box[1]), + float(box[0]), + float(box[3] - box[1]), + float(box[2] - box[0]) + ] + + +def _RleCompress(masks): + """Compresses mask using Run-length encoding provided by pycocotools. + Args: + masks: uint8 numpy array of shape [mask_height, mask_width] with values in + {0, 1}. + Returns: + A pycocotools Run-length encoding of the mask. + """ + return mask.encode(np.asfortranarray(masks)) + + +def ExportSingleImageGroundtruthToCoco(image_id: Union[int, str], + next_annotation_id: int, + category_id_set: Set[str], + groundtruth_boxes: np.array, + groundtruth_classes: np.array, + groundtruth_masks: Union[np.array, None] = None, + groundtruth_is_crowd: Union[np.array, None] = None) -> list: + """Export groundtruth of a single image to COCO format. + This function converts groundtruth detection annotations represented as numpy + arrays to dictionaries that can be ingested by the COCO evaluation API. Note + that the image_ids provided here must match the ones given to + ExportSingleImageDetectionsToCoco. We assume that boxes and classes are in + correspondence - that is: groundtruth_boxes[i, :], and + groundtruth_classes[i] are associated with the same groundtruth annotation. + In the exported result, "area" fields are always set to the area of the + groundtruth bounding box. + Args: + image_id: a unique image identifier either of type integer or string. + next_annotation_id: integer specifying the first id to use for the + groundtruth annotations. All annotations are assigned a continuous integer + id starting from this value. + category_id_set: A set of valid class ids. Groundtruth with classes not in + category_id_set are dropped. + groundtruth_boxes: numpy array (float32) with shape [num_gt_boxes, 4] + groundtruth_classes: numpy array (int) with shape [num_gt_boxes] + groundtruth_masks: optional uint8 numpy array of shape [num_detections, + image_height, image_width] containing detection_masks. + groundtruth_is_crowd: optional numpy array (int) with shape [num_gt_boxes] + indicating whether groundtruth boxes are crowd. + Returns: + A list of groundtruth annotations for a single image in the COCO format. + Raises: + ValueError: if (1) groundtruth_boxes and groundtruth_classes do not have the + right lengths or (2) if each of the elements inside these lists do not + have the correct shapes or (3) if image_ids are not integers + """ + if len(groundtruth_classes.shape) != 1: + raise ValueError('groundtruth_classes is ' 'expected to be of rank 1.') + if len(groundtruth_boxes.shape) != 2: + raise ValueError('groundtruth_boxes is expected to be of ' 'rank 2.') + if groundtruth_boxes.shape[1] != 4: + raise ValueError('groundtruth_boxes should have ' 'shape[1] == 4.') + num_boxes = groundtruth_classes.shape[0] + if num_boxes != groundtruth_boxes.shape[0]: + raise ValueError( + 'Corresponding entries in groundtruth_classes, ' + 'and groundtruth_boxes should have ' + 'compatible shapes (i.e., agree on the 0th dimension).' + 'Classes shape: %d. Boxes shape: %d. Image ID: %s' % + (groundtruth_classes.shape[0], groundtruth_boxes.shape[0], + image_id)) + has_is_crowd = groundtruth_is_crowd is not None + if has_is_crowd and len(groundtruth_is_crowd.shape) != 1: + raise ValueError('groundtruth_is_crowd is expected to be of rank 1.') + groundtruth_list = [] + for i in range(num_boxes): + if groundtruth_classes[i] in category_id_set: + iscrowd = groundtruth_is_crowd[i] if has_is_crowd else 0 + export_dict = { + 'id': + next_annotation_id + i, + 'image_id': + image_id, + 'category_id': + int(groundtruth_classes[i]), + 'bbox': + list(_ConvertBoxToCOCOFormat(groundtruth_boxes[i, :])), + 'area': + float((groundtruth_boxes[i, 2] - groundtruth_boxes[i, 0]) * + (groundtruth_boxes[i, 3] - groundtruth_boxes[i, 1])), + 'iscrowd': + iscrowd + } + if groundtruth_masks is not None: + export_dict['segmentation'] = _RleCompress( + groundtruth_masks[i]) + groundtruth_list.append(export_dict) + return groundtruth_list + + +def ExportSingleImageDetectionBoxesToCoco(image_id: Union[int, str], + category_id_set: Set[int], + detection_boxes: np.array, + detection_scores: np.array, + detection_classes: np.array) -> list: + """Export detections of a single image to COCO format. + This function converts detections represented as numpy arrays to dictionaries + that can be ingested by the COCO evaluation API. Note that the image_ids + provided here must match the ones given to the + ExporSingleImageDetectionBoxesToCoco. We assume that boxes, and classes are in + correspondence - that is: boxes[i, :], and classes[i] + are associated with the same groundtruth annotation. + Args: + image_id: unique image identifier either of type integer or string. + category_id_set: A set of valid class ids. Detections with classes not in + category_id_set are dropped. + detection_boxes: float numpy array of shape [num_detections, 4] containing + detection boxes. + detection_scores: float numpy array of shape [num_detections] containing + scored for the detection boxes. + detection_classes: integer numpy array of shape [num_detections] containing + the classes for detection boxes. + Returns: + A list of detection annotations for a single image in the COCO format. + Raises: + ValueError: if (1) detection_boxes, detection_scores and detection_classes + do not have the right lengths or (2) if each of the elements inside these + lists do not have the correct shapes or (3) if image_ids are not integers. + """ + if len(detection_classes.shape) != 1 or len(detection_scores.shape) != 1: + raise ValueError( + 'All entries in detection_classes and detection_scores' + 'expected to be of rank 1.') + if len(detection_boxes.shape) != 2: + raise ValueError('All entries in detection_boxes expected to be of ' + 'rank 2.') + if detection_boxes.shape[1] != 4: + raise ValueError('All entries in detection_boxes should have ' + 'shape[1] == 4.') + num_boxes = detection_classes.shape[0] + if not num_boxes == detection_boxes.shape[0] == detection_scores.shape[0]: + raise ValueError( + 'Corresponding entries in detection_classes, ' + 'detection_scores and detection_boxes should have ' + 'compatible shapes (i.e., agree on the 0th dimension). ' + 'Classes shape: %d. Boxes shape: %d. ' + 'Scores shape: %d' % + (detection_classes.shape[0], detection_boxes.shape[0], + detection_scores.shape[0])) + detections_list = [] + for i in range(num_boxes): + if detection_classes[i] in category_id_set: + detections_list.append({ + 'image_id': + image_id, + 'category_id': + int(detection_classes[i]), + 'bbox': + list(_ConvertBoxToCOCOFormat(detection_boxes[i, :])), + 'score': + float(detection_scores[i]) + }) + return detections_list + + +def ExportSingleImageDetectionMasksToCoco(image_id: Union[str, int], + category_id_set: Set[int], + detection_masks: np.array, + detection_scores: np.array, + detection_classes: np.array) -> list: + """Export detection masks of a single image to COCO format. + This function converts detections represented as numpy arrays to dictionaries + that can be ingested by the COCO evaluation API. We assume that + detection_masks, detection_scores, and detection_classes are in correspondence + - that is: detection_masks[i, :], detection_classes[i] and detection_scores[i] + are associated with the same annotation. + Args: + image_id: unique image identifier either of type integer or string. + category_id_set: A set of valid class ids. Detections with classes not in + category_id_set are dropped. + detection_masks: uint8 numpy array of shape [num_detections, image_height, + image_width] containing detection_masks. + detection_scores: float numpy array of shape [num_detections] containing + scores for detection masks. + detection_classes: integer numpy array of shape [num_detections] containing + the classes for detection masks. + Returns: + A list of detection mask annotations for a single image in the COCO format. + Raises: + ValueError: if (1) detection_masks, detection_scores and detection_classes + do not have the right lengths or (2) if each of the elements inside these + lists do not have the correct shapes or (3) if image_ids are not integers. + """ + if len(detection_classes.shape) != 1 or len(detection_scores.shape) != 1: + raise ValueError( + 'All entries in detection_classes and detection_scores' + 'expected to be of rank 1.') + num_boxes = detection_classes.shape[0] + if not num_boxes == len(detection_masks) == detection_scores.shape[0]: + raise ValueError('Corresponding entries in detection_classes, ' + 'detection_scores and detection_masks should have ' + 'compatible lengths and shapes ' + 'Classes length: %d. Masks length: %d. ' + 'Scores length: %d' % + (detection_classes.shape[0], len(detection_masks), + detection_scores.shape[0])) + detections_list = [] + for i in range(num_boxes): + if detection_classes[i] in category_id_set: + detections_list.append({ + 'image_id': + image_id, + 'category_id': + int(detection_classes[i]), + 'segmentation': + _RleCompress(detection_masks[i]), + 'score': + float(detection_scores[i]) + }) + return detections_list \ No newline at end of file diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/faster_rcnn/quantization/ptq_static/label_map.yaml b/examples/onnxrt/object_detection/onnx_model_zoo/faster_rcnn/quantization/ptq_static/label_map.yaml new file mode 100644 index 00000000000..a15a9a6b0d5 --- /dev/null +++ b/examples/onnxrt/object_detection/onnx_model_zoo/faster_rcnn/quantization/ptq_static/label_map.yaml @@ -0,0 +1,80 @@ +person: 1 +bicycle: 2 +car: 3 +motorcycle: 4 +airplane: 5 +bus: 6 +train: 7 +truck: 8 +boat: 9 +traffic light: 10 +fire hydrant: 11 +stop sign: 12 +parking meter: 13 +bench: 14 +bird: 15 +cat: 16 +dog: 17 +horse: 18 +sheep: 19 +cow: 20 +elephant: 21 +bear: 22 +zebra: 23 +giraffe: 24 +backpack: 25 +umbrella: 26 +handbag: 27 +tie: 28 +suitcase: 29 +frisbee: 30 +skis: 31 +snowboard: 32 +sports ball: 33 +kite: 34 +baseball bat: 35 +baseball glove: 36 +skateboard: 37 +surfboard: 38 +tennis racket: 39 +bottle: 40 +wine glass: 41 +cup: 42 +fork: 43 +knife: 44 +spoon: 45 +bowl: 46 +banana: 47 +apple: 48 +sandwich: 49 +orange: 50 +broccoli: 51 +carrot: 52 +hot dog: 53 +pizza: 54 +donut: 55 +cake: 56 +chair: 57 +couch: 58 +potted plant: 59 +bed: 60 +dining table: 61 +toilet: 62 +tv: 63 +laptop: 64 +mouse: 65 +remote: 66 +keyboard: 67 +cell phone: 68 +microwave: 69 +oven: 70 +toaster: 71 +sink: 72 +refrigerator: 73 +book: 74 +clock: 75 +vase: 76 +scissors: 77 +teddy bear: 78 +hair drier: 79 +toothbrush: 80 \ No newline at end of file diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/faster_rcnn/quantization/ptq_static/main.py b/examples/onnxrt/object_detection/onnx_model_zoo/faster_rcnn/quantization/ptq_static/main.py new file mode 100644 index 00000000000..54fbffc968e --- /dev/null +++ b/examples/onnxrt/object_detection/onnx_model_zoo/faster_rcnn/quantization/ptq_static/main.py @@ -0,0 +1,398 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# pylint:disable=redefined-outer-name,logging-format-interpolation + + +import logging +import argparse + +import onnx +from PIL import Image +import math +import numpy as np +import onnxruntime as ort + +logger = logging.getLogger(__name__) +logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s - %(message)s', + datefmt = '%m/%d/%Y %H:%M:%S', + level = logging.WARN) +logger.info("Evaluating ONNXRuntime full precision accuracy and performance:") +parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter +) +parser.add_argument( + '--data_path', + type=str, + help="Path of COCO dataset, it contains val2017 and annotations subfolder" +) +parser.add_argument( + '--label_path', + type=str, + default='label_map.yaml', + help="Path of label map yaml file" +) +parser.add_argument( + '--model_path', + type=str, + help="Pre-trained model on onnx file" +) +parser.add_argument( + '--benchmark', + action='store_true', \ + default=False +) +parser.add_argument( + '--tune', + action='store_true', \ + default=False, + help="whether quantize the model" +) +parser.add_argument( + '--config', + type=str, + help="config yaml path" +) +parser.add_argument( + '--output_model', + type=str, + help="output model path" +) +parser.add_argument( + '--mode', + type=str, + help="benchmark mode of performance or accuracy" +) +parser.add_argument( + '--quant_format', + type=str, + choices=['QOperator', 'QDQ'], + help="quantization format" +) +parser.add_argument( + '--batch_size', + type=int, + default=1, + help="quantization format" +) +args = parser.parse_args() + +class Dataloader: + def __init__(self, root, batch_size=1, img_dir='val2017', \ + anno_dir='annotations/instances_val2017.json'): + import json + import os + import numpy as np + from pycocotools.coco import COCO + from coco_label_map import category_map + self.batch_size = batch_size + self.image_list = [] + img_path = os.path.join(root, img_dir) + anno_path = os.path.join(root, anno_dir) + coco = COCO(anno_path) + img_ids = coco.getImgIds() + cat_ids = coco.getCatIds() + for idx, img_id in enumerate(img_ids): + img_info = {} + bboxes = [] + labels = [] + ids = [] + img_detail = coco.loadImgs(img_id)[0] + ids.append(img_detail['file_name'].encode('utf-8')) + pic_height = img_detail['height'] + pic_width = img_detail['width'] + + ann_ids = coco.getAnnIds(imgIds=img_id,catIds=cat_ids) + anns = coco.loadAnns(ann_ids) + for ann in anns: + bbox = ann['bbox'] + if len(bbox) == 0: + continue + bboxes.append([bbox[0], bbox[1], bbox[0]+bbox[2], bbox[1]+bbox[3]]) + labels.append(category_map[ann['category_id']].encode('utf8')) + img_file = os.path.join(img_path, img_detail['file_name']) + if not os.path.exists(img_file) or len(bboxes) == 0: + continue + + if filter and not filter(None, bboxes): + continue + label = [np.array([bboxes]), np.array([labels]), np.zeros((1,0)), np.array([img_detail['file_name'].encode('utf-8')])] + with Image.open(img_file) as image: + image = image.convert('RGB') + image, label = self.preprocess((image, label)) + self.image_list.append((image, label)) + + def __iter__(self): + for item in self.image_list: + yield item + + def preprocess(self, sample): + image, label = sample + ratio = 800.0 / min(image.size[0], image.size[1]) + image = image.resize((int(ratio * image.size[0]), int(ratio * image.size[1])), Image.BILINEAR) + + # Convert to BGR + image = np.array(image)[:, :, [2, 1, 0]].astype('float32') + + # HWC -> CHW + image = np.transpose(image, [2, 0, 1]) + + # Normalize + mean_vec = np.array([102.9801, 115.9465, 122.7717]) + for i in range(image.shape[0]): + image[i, :, :] = image[i, :, :] - mean_vec[i] + + # Pad to be divisible of 32 + padded_h = int(math.ceil(image.shape[1] / 32) * 32) + padded_w = int(math.ceil(image.shape[2] / 32) * 32) + + padded_image = np.zeros((3, padded_h, padded_w), dtype=np.float32) + padded_image[:, :image.shape[1], :image.shape[2]] = image + image = padded_image + bboxes, str_labels,int_labels, image_ids = label + bboxes = ratio * bboxes + return image, (bboxes, str_labels, int_labels, image_ids) + +class COCOmAPv2(): + """Compute mean average precision of the detection task.""" + + def __init__(self, + anno_path=None, + iou_thrs='0.5:0.05:0.95', + map_points=101, + map_key='DetectionBoxes_Precision/mAP', + output_index_mapping={'num_detections':-1, 'boxes':0, 'scores':1, 'classes':2}): + """Initialize the metric. + Args: + anno_path: The path of annotation file. + iou_thrs: Minimal value for intersection over union that allows to make decision + that prediction bounding box is true positive. You can specify one float value + between 0 to 1 or string "05:0.05:0.95" for standard COCO thresholds. + map_points: The way to calculate mAP. 101 for 101-point interpolated AP, 11 for + 11-point interpolated AP, 0 for area under PR curve. + map_key: The key that mapping to pycocotools COCOeval. + Defaults to 'DetectionBoxes_Precision/mAP'. + output_index_mapping: The output index mapping. + Defaults to {'num_detections':-1, 'boxes':0, 'scores':1, 'classes':2}. + """ + self.output_index_mapping = output_index_mapping + from coco_label_map import category_map + if anno_path: + import os + import yaml + assert os.path.exists(anno_path), 'Annotation path does not exists!' + with open(anno_path, 'r') as f: + label_map = yaml.safe_load(f.read()) + self.category_map_reverse = {k: v for k,v in label_map.items()} + else: + # label: index + self.category_map_reverse = {v: k for k, v in category_map.items()} + self.image_ids = [] + self.ground_truth_list = [] + self.detection_list = [] + self.annotation_id = 1 + self.category_map = category_map + self.category_id_set = set( + [cat for cat in self.category_map]) #index + self.iou_thrs = iou_thrs + self.map_points = map_points + self.map_key = map_key + + def update(self, predicts, labels, sample_weight=None): + """Add the predictions and labels. + Args: + predicts: The predictions. + labels: The labels corresponding to the predictions. + sample_weight: The sample weight. Defaults to None. + """ + from coco_tools import ExportSingleImageGroundtruthToCoco,\ + ExportSingleImageDetectionBoxesToCoco + detections = [] + if 'num_detections' in self.output_index_mapping and \ + self.output_index_mapping['num_detections'] > -1: + for item in zip(*predicts): + detection = {} + num = int(item[self.output_index_mapping['num_detections']]) + detection['boxes'] = np.asarray( + item[self.output_index_mapping['boxes']])[0:num] + detection['scores'] = np.asarray( + item[self.output_index_mapping['scores']])[0:num] + detection['classes'] = np.asarray( + item[self.output_index_mapping['classes']])[0:num] + detections.append(detection) + else: + for item in zip(*predicts): + detection = {} + detection['boxes'] = np.asarray(item[self.output_index_mapping['boxes']]) + detection['scores'] = np.asarray(item[self.output_index_mapping['scores']]) + detection['classes'] = np.asarray(item[self.output_index_mapping['classes']]) + detections.append(detection) + + bboxes, str_labels,int_labels, image_ids = labels + labels = [] + if len(int_labels[0]) == 0: + for str_label in str_labels: + str_label = [ + x if type(x) == 'str' else x.decode('utf-8') + for x in str_label + ] + labels.append([self.category_map_reverse[x] for x in str_label]) + elif len(str_labels[0]) == 0: + for int_label in int_labels: + labels.append([x for x in int_label]) + + for idx, image_id in enumerate(image_ids): + image_id = image_id if type( + image_id) == 'str' else image_id.decode('utf-8') + if image_id in self.image_ids: + continue + self.image_ids.append(image_id) + + ground_truth = {} + ground_truth['boxes'] = np.asarray(bboxes[idx]) + ground_truth['classes'] = np.asarray(labels[idx]) + + self.ground_truth_list.extend( + ExportSingleImageGroundtruthToCoco( + image_id=image_id, + next_annotation_id=self.annotation_id, + category_id_set=self.category_id_set, + groundtruth_boxes=ground_truth['boxes'], + groundtruth_classes=ground_truth['classes'])) + self.annotation_id += ground_truth['boxes'].shape[0] + + self.detection_list.extend( + ExportSingleImageDetectionBoxesToCoco( + image_id=image_id, + category_id_set=self.category_id_set, + detection_boxes=detections[idx]['boxes'], + detection_scores=detections[idx]['scores'], + detection_classes=detections[idx]['classes'])) + + def reset(self): + """Reset the prediction and labels.""" + self.image_ids = [] + self.ground_truth_list = [] + self.detection_list = [] + self.annotation_id = 1 + + def result(self): + """Compute mean average precision. + Returns: + The mean average precision score. + """ + from coco_tools import COCOWrapper, COCOEvalWrapper + if len(self.ground_truth_list) == 0: + logger.warning("Sample num during evaluation is 0.") + return 0 + else: + groundtruth_dict = { + 'annotations': + self.ground_truth_list, + 'images': [{ + 'id': image_id + } for image_id in self.image_ids], + 'categories': [{ + 'id': k, + 'name': v + } for k, v in self.category_map.items()] + } + coco_wrapped_groundtruth = COCOWrapper(groundtruth_dict) + coco_wrapped_detections = coco_wrapped_groundtruth.LoadAnnotations( + self.detection_list) + box_evaluator = COCOEvalWrapper(coco_wrapped_groundtruth, + coco_wrapped_detections, + agnostic_mode=False, + iou_thrs = self.iou_thrs, + map_points = self.map_points) + box_metrics, box_per_category_ap = box_evaluator.ComputeMetrics( + include_metrics_per_category=False, all_metrics_per_category=False) + box_metrics.update(box_per_category_ap) + box_metrics = { + 'DetectionBoxes_' + key: value + for key, value in iter(box_metrics.items()) + } + + return box_metrics[self.map_key] + +class Post: + def __call__(self, sample): + preds, labels = sample + bboxes, classes, scores = preds + bboxes = np.reshape(bboxes, (1, -1, 4)) + classes = np.reshape(classes, (1, -1)) + scores = np.reshape(scores, (1, -1)) + return (bboxes, classes, scores), labels[0] + +if __name__ == "__main__": + model = onnx.load(args.model_path) + dataloader = Dataloader(args.data_path, batch_size=args.batch_size) + metric = COCOmAPv2(anno_path=args.label_path, output_index_mapping={'boxes':0, 'scores':2, 'classes':1}) + postprocess = Post() + + def eval_func(model): + metric.reset() + session = ort.InferenceSession(model.SerializeToString(), + providers=ort.get_available_providers()) + ort_inputs = {} + len_inputs = len(session.get_inputs()) + inputs_names = [session.get_inputs()[i].name for i in range(len_inputs)] + for idx, (inputs, labels) in enumerate(dataloader): + if not isinstance(labels, list): + labels = [labels] + if len_inputs == 1: + ort_inputs.update( + inputs if isinstance(inputs, dict) else {inputs_names[0]: inputs} + ) + else: + assert len_inputs == len(inputs), 'number of input tensors must align with graph inputs' + if isinstance(inputs, dict): + ort_inputs.update(inputs) + else: + for i in range(len_inputs): + if not isinstance(inputs[i], np.ndarray): + ort_inputs.update({inputs_names[i]: np.array(inputs[i])}) + else: + ort_inputs.update({inputs_names[i]: inputs[i]}) + predictions = session.run(None, ort_inputs) + predictions, labels = postprocess((predictions, labels)) + metric.update(predictions, labels) + return metric.result() + + if args.benchmark: + if args.mode == 'performance': + from neural_compressor.benchmark import fit + from neural_compressor.config import BenchmarkConfig + conf = BenchmarkConfig(iteration=100, + cores_per_instance=4, + num_of_instance=1) + fit(model, conf, b_dataloader=dataloader) + elif args.mode == 'accuracy': + acc_result = eval_func(model) + print("Batch size = %d" % args.batch_size) + print("Accuracy: %.5f" % acc_result) + + if args.tune: + from neural_compressor import quantization + from neural_compressor.config import AccuracyCriterion, PostTrainingQuantConfig + accuracy_criterion = AccuracyCriterion() + accuracy_criterion.absolute = 0.01 + config = PostTrainingQuantConfig(approach='static', + quant_format=args.quant_format, + accuracy_criterion=accuracy_criterion) + q_model = quantization.fit(model, config, calib_dataloader=dataloader, eval_func=eval_func) + q_model.save(args.output_model) + diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/faster_rcnn/quantization/ptq_static/requirements.txt b/examples/onnxrt/object_detection/onnx_model_zoo/faster_rcnn/quantization/ptq_static/requirements.txt new file mode 100644 index 00000000000..d92c94766dc --- /dev/null +++ b/examples/onnxrt/object_detection/onnx_model_zoo/faster_rcnn/quantization/ptq_static/requirements.txt @@ -0,0 +1,4 @@ +onnx +onnxruntime +onnxruntime-extensions; python_version < '3.10' +pillow>=8.2.0 # not directly required, pinned by Snyk to avoid a vulnerability diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/faster_rcnn/quantization/ptq_static/run_benchmark.sh b/examples/onnxrt/object_detection/onnx_model_zoo/faster_rcnn/quantization/ptq_static/run_benchmark.sh new file mode 100644 index 00000000000..17c8c45fa58 --- /dev/null +++ b/examples/onnxrt/object_detection/onnx_model_zoo/faster_rcnn/quantization/ptq_static/run_benchmark.sh @@ -0,0 +1,49 @@ +#!/bin/bash +set -x + +function main { + init_params "$@" + run_benchmark + +} + +# init params +function init_params { + + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + --label_path=*) + label_path=$(echo $var |cut -f2 -d=) + ;; + --mode=*) + mode=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_benchmark +function run_benchmark { + if [ ! $label_path ]; then + label_path='label_map.yaml' + fi + batch_size=1 + + python main.py \ + --model_path ${input_model} \ + --mode ${mode} \ + --data_path ${dataset_location} \ + --label_path ${label_path} \ + --batch_size ${batch_size} \ + --benchmark +} + +main "$@" \ No newline at end of file diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/faster_rcnn/quantization/ptq_static/run_tuning.sh b/examples/onnxrt/object_detection/onnx_model_zoo/faster_rcnn/quantization/ptq_static/run_tuning.sh new file mode 100644 index 00000000000..161010ad4bc --- /dev/null +++ b/examples/onnxrt/object_detection/onnx_model_zoo/faster_rcnn/quantization/ptq_static/run_tuning.sh @@ -0,0 +1,51 @@ +#!/bin/bash +set -x + +function main { + init_params "$@" + run_tuning + +} + +# init params +function init_params { + + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + --label_path=*) + label_path=$(echo $var |cut -f2 -d=) + ;; + --output_model=*) + output_model=$(echo $var |cut -f2 -d=) + ;; + --quant_format=*) + quant_format=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_tuning +function run_tuning { + if [ ! $label_path ]; then + label_path='label_map.yaml' + fi + + python main.py \ + --model_path ${input_model} \ + --output_model ${output_model} \ + --data_path ${dataset_location} \ + --label_path ${label_path} \ + --quant_format ${quant_format} \ + --tune +} + +main "$@" \ No newline at end of file diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/mask_rcnn/quantization/ptq_static/README.md b/examples/onnxrt/object_detection/onnx_model_zoo/mask_rcnn/quantization/ptq_static/README.md new file mode 100644 index 00000000000..90c4ec133da --- /dev/null +++ b/examples/onnxrt/object_detection/onnx_model_zoo/mask_rcnn/quantization/ptq_static/README.md @@ -0,0 +1,71 @@ +Step-by-Step +============ + +This example load an object detection model converted from [ONNX Model Zoo](https://github.com/onnx/models) and confirm its accuracy and speed based on [MS COCO 2017 dataset](https://cocodataset.org/#download). + +# Prerequisite + +## 1. Environment + +```shell +pip install neural-compressor +pip install -r requirements.txt +``` +> Note: Validated ONNX Runtime [Version](/docs/source/installation_guide.md#validated-software-environment). + +## 2. Prepare Model + +Download model from [ONNX Model Zoo](https://github.com/onnx/models) + +```shell +wget https://github.com/onnx/models/raw/main/vision/object_detection_segmentation/mask-rcnn/model/MaskRCNN-12.onnx +``` + +## 3. Prepare Dataset + +Download [MS COCO 2017 dataset](https://cocodataset.org/#download). + +Dataset directories: + +```bash +coco2017 +├── annotations +| ├── instances_val2017.json +| └── ... +├── test2017 +├── train2017 +└── val2017 +``` + +# Run + +## 1. Quantization + +Static quantization with QOperator format: + +```bash +bash run_tuning.sh --input_model=path/to/model \ # model path as *.onnx + --output_model=path/to/save \ # model path as *.onnx + --dataset_location=path/to/coco2017 \ # dataset path containing 'val2017' and 'annotations' folders + --label_path=label_map.yaml \ + --quant_format="QOperator" +``` + +Static quantization with QDQ format: + +```bash +bash run_tuning.sh --input_model=path/to/model \ # model path as *.onnx + --output_model=path/to/save \ # model path as *.onnx + --dataset_location=path/to/coco2017 \ # dataset path containing 'val2017' and 'annotations' folders + --label_path=label_map.yaml \ + --quant_format="QDQ" +``` + +## 2. Benchmark + +```bash +bash run_benchmark.sh --input_model=path/to/model \ # model path as *.onnx + --dataset_location=path/to/coco2017 \ # dataset path containing 'val2017' and 'annotations' folders + --label_path=label_map.yaml \ + --mode=performance # or accuracy +``` diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/mask_rcnn/quantization/ptq_static/coco_label_map.py b/examples/onnxrt/object_detection/onnx_model_zoo/mask_rcnn/quantization/ptq_static/coco_label_map.py new file mode 100644 index 00000000000..1e88f8abad8 --- /dev/null +++ b/examples/onnxrt/object_detection/onnx_model_zoo/mask_rcnn/quantization/ptq_static/coco_label_map.py @@ -0,0 +1,103 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# + +"""The dict mapping category IDs to its names of labels.""" + +category_map = { + 1: 'person', + 2: 'bicycle', + 3: 'car', + 4: 'motorcycle', + 5: 'airplane', + 6: 'bus', + 7: 'train', + 8: 'truck', + 9: 'boat', + 10: 'traffic light', + 11: 'fire hydrant', + 13: 'stop sign', + 14: 'parking meter', + 15: 'bench', + 16: 'bird', + 17: 'cat', + 18: 'dog', + 19: 'horse', + 20: 'sheep', + 21: 'cow', + 22: 'elephant', + 23: 'bear', + 24: 'zebra', + 25: 'giraffe', + 27: 'backpack', + 28: 'umbrella', + 31: 'handbag', + 32: 'tie', + 33: 'suitcase', + 34: 'frisbee', + 35: 'skis', + 36: 'snowboard', + 37: 'sports ball', + 38: 'kite', + 39: 'baseball bat', + 40: 'baseball glove', + 41: 'skateboard', + 42: 'surfboard', + 43: 'tennis racket', + 44: 'bottle', + 46: 'wine glass', + 47: 'cup', + 48: 'fork', + 49: 'knife', + 50: 'spoon', + 51: 'bowl', + 52: 'banana', + 53: 'apple', + 54: 'sandwich', + 55: 'orange', + 56: 'broccoli', + 57: 'carrot', + 58: 'hot dog', + 59: 'pizza', + 60: 'donut', + 61: 'cake', + 62: 'chair', + 63: 'couch', + 64: 'potted plant', + 65: 'bed', + 67: 'dining table', + 70: 'toilet', + 72: 'tv', + 73: 'laptop', + 74: 'mouse', + 75: 'remote', + 76: 'keyboard', + 77: 'cell phone', + 78: 'microwave', + 79: 'oven', + 80: 'toaster', + 81: 'sink', + 82: 'refrigerator', + 84: 'book', + 85: 'clock', + 86: 'vase', + 87: 'scissors', + 88: 'teddy bear', + 89: 'hair drier', + 90: 'toothbrush' +} \ No newline at end of file diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/mask_rcnn/quantization/ptq_static/coco_tools.py b/examples/onnxrt/object_detection/onnx_model_zoo/mask_rcnn/quantization/ptq_static/coco_tools.py new file mode 100644 index 00000000000..2c57fd61302 --- /dev/null +++ b/examples/onnxrt/object_detection/onnx_model_zoo/mask_rcnn/quantization/ptq_static/coco_tools.py @@ -0,0 +1,672 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +"""Wrappers for third party pycocotools to be used within object_detection. +Note that nothing in this file is tensorflow related and thus cannot +be called directly as a slim metric, for example. +TODO(jonathanhuang): wrap as a slim metric in metrics.py +Usage example: given a set of images with ids in the list image_ids +and corresponding lists of numpy arrays encoding groundtruth (boxes and classes) +and detections (boxes, scores and classes), where elements of each list +correspond to detections/annotations of a single image, +then evaluation (in multi-class mode) can be invoked as follows: + groundtruth_dict = coco_tools.ExportGroundtruthToCOCO( + image_ids, groundtruth_boxes_list, groundtruth_classes_list, + max_num_classes, output_path=None) + detections_list = coco_tools.ExportDetectionsToCOCO( + image_ids, detection_boxes_list, detection_scores_list, + detection_classes_list, output_path=None) + groundtruth = coco_tools.COCOWrapper(groundtruth_dict) + detections = groundtruth.LoadAnnotations(detections_list) + evaluator = coco_tools.COCOEvalWrapper(groundtruth, detections, + agnostic_mode=False) + metrics = evaluator.ComputeMetrics() +""" + +import copy +import time + +import numpy as np + +from collections import OrderedDict +from neural_compressor.utils import logger +from pycocotools import coco +from pycocotools import cocoeval +from pycocotools import mask +from typing import Any, Dict, List, Set, Union + + +class COCOWrapper(coco.COCO): + """Wrapper for the pycocotools COCO class. + + Attributes: + dataset: a dictionary holding bounding box annotations in the COCO format. + detection_type: type of detections being wrapped. Can be one of ['bbox', + 'segmentation'] + """ + + def __init__(self, dataset: Dict[str, Any], detection_type: str = 'bbox'): + """Construct a COCOWrapper. + See http://mscoco.org/dataset/#format for a description of the format. + By default, the coco.COCO class constructor reads from a JSON file. + This function duplicates the same behavior but loads from a dictionary, + allowing us to perform evaluation without writing to external storage. + Args: + dataset: a dictionary holding bounding box annotations in the COCO format. + detection_type: type of detections being wrapped. Can be one of ['bbox', + 'segmentation'] + Raises: + ValueError: if detection_type is unsupported. + """ + supported_detection_types = ['bbox', 'segmentation'] + if detection_type not in supported_detection_types: + raise ValueError('Unsupported detection type: {}. ' + 'Supported values are: {}'.format( + detection_type, supported_detection_types)) + self._detection_type = detection_type + coco.COCO.__init__(self) + self.dataset = dataset + self.createIndex() + + def LoadAnnotations(self, annotations: list) -> coco.COCO: + """Load annotations dictionary into COCO datastructure. + See http://mscoco.org/dataset/#format for a description of the annotations + format. As above, this function replicates the default behavior of the API + but does not require writing to external storage. + Args: + annotations: python list holding object detection results where each + detection is encoded as a dict with required keys ['image_id', + 'category_id', 'score'] and one of ['bbox', 'segmentation'] based on + `detection_type`. + Returns: + a coco.COCO datastructure holding object detection annotations results + Raises: + ValueError: if (1) annotations is not a list or annotations do not + correspond to the images contained in self. + """ + results = coco.COCO() + results.dataset['images'] = [img for img in self.dataset['images']] + + logger.info("Load and prepare annotation results.") + tic = time.time() + + if not isinstance(annotations, list): + raise ValueError('annotations is not a list of objects') + annotation_img_ids = [ann['image_id'] for ann in annotations] + if (set(annotation_img_ids) != (set(annotation_img_ids) + & set(self.getImgIds()))): + raise ValueError('Results do not correspond to current coco set') + results.dataset['categories'] = copy.deepcopy( + self.dataset['categories']) + if self._detection_type == 'bbox': + for idx, ann in enumerate(annotations): + bb = ann['bbox'] + ann['area'] = bb[2] * bb[3] + ann['id'] = idx + 1 + ann['iscrowd'] = 0 + elif self._detection_type == 'segmentation': + for idx, ann in enumerate(annotations): + ann['area'] = mask.area(ann['segmentation']) + ann['bbox'] = mask.toBbox(ann['segmentation']) + ann['id'] = idx + 1 + ann['iscrowd'] = 0 + logger.info('DONE (t=%0.2fs)', (time.time() - tic)) + + results.dataset['annotations'] = annotations + results.createIndex() + return results + + +class COCOEvalWrapper(cocoeval.COCOeval): + """Wrapper for the pycocotools COCOeval class. + To evaluate, create two objects (groundtruth_dict and detections_list) + using the conventions listed at http://mscoco.org/dataset/#format. + Then call evaluation as follows: + groundtruth = coco_tools.COCOWrapper(groundtruth_dict) + detections = groundtruth.LoadAnnotations(detections_list) + evaluator = coco_tools.COCOEvalWrapper(groundtruth, detections, + agnostic_mode=False) + metrics = evaluator.ComputeMetrics() + """ + + def __init__(self, + groundtruth: coco.COCO = None, + detections: coco.COCO = None, + agnostic_mode = False, + iou_type: str = 'bbox', + iou_thrs: Union[str, float] = None, + map_points=None): + """Construct a COCOEvalWrapper. + Note that for the area-based metrics to be meaningful, detection and + groundtruth boxes must be in image coordinates measured in pixels. + Args: + groundtruth: a coco.COCO (or coco_tools.COCOWrapper) object holding + groundtruth annotations + detections: a coco.COCO (or coco_tools.COCOWrapper) object holding + detections + agnostic_mode: boolean (default: False). If True, evaluation ignores + class labels, treating all detections as proposals. + iou_thrs: Minimal value for intersection over union that allows to + make decision that prediction bounding box is true positive. + You can specify one float value between 0 to 1 or + string "05:0.05:0.95" for standard COCO thresholds. + iou_type: IOU type to use for evaluation. Supports `bbox` or `segm`. + map_points: The way to calculate mAP. 101 for 101-point interpolated AP, 11 for + 11-point interpolated AP, 0 for area under PR curve. + """ + cocoeval.COCOeval.__init__(self, + groundtruth, + detections, + iouType=iou_type) + if agnostic_mode: + self.params.useCats = 0 + if iou_thrs == '0.5:0.05:0.95': + self.params.iouThrs = np.linspace(.5, 0.95, int(np.round((0.95 - .5) / .05)) + 1, \ + endpoint=True) + elif isinstance(iou_thrs, float): + self.params.iouThrs = [iou_thrs] + + if map_points == 101: + self.params.recThrs = np.linspace(.0, 1.00, int(np.round((1.00 - .0) / .01)) + 1, \ + endpoint=True) + if map_points == 11: + self.params.recThrs = np.linspace(.0, 1.00, int(np.round((1.00 - .0) / .1)) + 1, \ + endpoint=True) + if map_points == 0: + self.params.recThrs = [-1] + + + def GetCategory(self, category_id: int) -> dict: + """Fetch dictionary holding category information given category id. + Args: + category_id: integer id + Returns: + dictionary holding 'id', 'name'. + """ + return self.cocoGt.cats[category_id] + + def GetAgnosticMode(self) -> bool: + """Return whether COCO Eval is configured to evaluate in agnostic mode.""" + return self.params.useCats == 0 + + def GetCategoryIdList(self) -> List[int]: + """Return the list of IDs of all valid categories.""" + return self.params.catIds + + def accumulate(self, p: cocoeval.Params = None): + """Accumulate evaluation results per image and store it to self.eval. + + Args: + p: input params for evaluation + """ + print('Accumulating evaluation results...') + tic = time.time() + if not self.evalImgs: + print('Please run evaluate() first') + # allows input customized parameters + if p is None: + p = self.params + p.catIds = p.catIds if p.useCats == 1 else [-1] + T = len(p.iouThrs) + R = len(p.recThrs) + K = len(p.catIds) if p.useCats else 1 + A = len(p.areaRng) + M = len(p.maxDets) + precision = -np.ones((T,R,K,A,M)) # -1 for the precision of absent categories + recall = -np.ones((T,K,A,M)) + scores = -np.ones((T,R,K,A,M)) + + # create dictionary for future indexing + _pe = self._paramsEval + print('-pe', _pe) + catIds = _pe.catIds if _pe.useCats else [-1] + setK = set(catIds) + setA = set(map(tuple, _pe.areaRng)) + setM = set(_pe.maxDets) + setI = set(_pe.imgIds) + # get inds to evaluate + k_list = [n for n, k in enumerate(p.catIds) if k in setK] + m_list = [m for n, m in enumerate(p.maxDets) if m in setM] + a_list = [n for n, a in enumerate(map(lambda x: tuple(x), p.areaRng)) if a in setA] + i_list = [n for n, i in enumerate(p.imgIds) if i in setI] + I0 = len(_pe.imgIds) + A0 = len(_pe.areaRng) + # retrieve E at each category, area range, and max number of detections + for k, k0 in enumerate(k_list): + Nk = k0*A0*I0 + for a, a0 in enumerate(a_list): + Na = a0*I0 + for m, maxDet in enumerate(m_list): + E = [self.evalImgs[Nk + Na + i] for i in i_list] + E = [e for e in E if not e is None] + if len(E) == 0: continue + dtScores = np.concatenate([e['dtScores'][0:maxDet] for e in E]) + + # different sorting method generates slightly different results. + # mergesort is used to be consistent as Matlab implementation. + inds = np.argsort(-dtScores, kind='mergesort') + dtScoresSorted = dtScores[inds] + + dtm = np.concatenate([e['dtMatches'][:,0:maxDet] for e in E], axis=1)[:,inds] + dtIg = np.concatenate([e['dtIgnore'][:,0:maxDet] for e in E], axis=1)[:,inds] + gtIg = np.concatenate([e['gtIgnore'] for e in E]) + npig = np.count_nonzero(gtIg==0 ) + if npig == 0: continue + tps = np.logical_and( dtm, np.logical_not(dtIg) ) + fps = np.logical_and(np.logical_not(dtm), np.logical_not(dtIg) ) + + tp_sum = np.cumsum(tps, axis=1).astype(dtype=np.float32) + fp_sum = np.cumsum(fps, axis=1).astype(dtype=np.float32) + for t, (tp, fp) in enumerate(zip(tp_sum, fp_sum)): + tp = np.array(tp) + fp = np.array(fp) + nd = len(tp) + rc = tp / npig + pr = tp / (fp+tp+np.spacing(1)) + + # calculate precision + if R == 1: + rc = np.concatenate(([0.], rc, [1.])) + pr = np.concatenate(([0.], pr, [0.])) + + # compute the precision envelope + for i in range(pr.size - 1, 0, -1): + pr[i - 1] = np.maximum(pr[i - 1], pr[i]) + + # to calculate area under PR curve, look for points + # where X axis (recall) changes value + change_point = np.where(rc[1:] != rc[:-1])[0] + # and sum (\Delta recall) * recall + res = np.sum((rc[change_point + 1] - rc[change_point]) \ + * pr[change_point + 1]) + precision[t,:,k,a,m] = np.array([res]) + else: + q = np.zeros((R,)) + + # numpy is slow without cython optimization for accessing elements + # use python array gets significant speed improvement + pr = pr.tolist(); q = q.tolist() + + for i in range(nd-1, 0, -1): + if pr[i] > pr[i-1]: + pr[i-1] = pr[i] + + inds = np.searchsorted(rc, p.recThrs, side='left') + try: + for ri, pi in enumerate(inds): + q[ri] = pr[pi] + except: + pass + precision[t,:,k,a,m] = np.array(q) + + # calculate recall + if nd: + recall[t,k,a,m] = rc[-1] + else: + recall[t,k,a,m] = 0 + + # calculate score + ss = np.zeros((R,)) + inds = np.searchsorted(rc, p.recThrs, side='left') + try: + for ri, pi in enumerate(inds): + ss[ri] = dtScoresSorted[pi] + except: + pass + scores[t,:,k,a,m] = np.array(ss) + # exit(0) + self.eval = { + 'params': p, + 'counts': [T, R, K, A, M], + 'precision': precision, + 'recall': recall, + 'scores': scores, + } + toc = time.time() + print('DONE (t={:0.2f}s).'.format( toc-tic)) + + + def ComputeMetrics(self, + include_metrics_per_category: bool = False, + all_metrics_per_category: bool = False): # pragma: no cover + """Compute detection metrics. + Args: + include_metrics_per_category: Whether include metrics per category. + all_metrics_per_category: Whether include all the summery metrics for + each category in per_category_ap. Be careful with setting it to true if + you have more than handful of categories, because it will pollute + your mldash. + Returns: + A tuple of (summary_metrics, per_category_ap), in which + (1) summary_metrics is a dictionary holding: + 'Precision/mAP': mean average precision over classes averaged over IOU + thresholds ranging from .5 to .95 with .05 increments; + 'Precision/mAP@.50IOU': mean average precision at 50% IOU; + 'Precision/mAP@.75IOU': mean average precision at 75% IOU; + 'Precision/mAP (small)': mean average precision for small objects + (area < 32^2 pixels); + 'Precision/mAP (medium)': mean average precision for medium sized + objects (32^2 pixels < area < 96^2 pixels); + 'Precision/mAP (large)': mean average precision for large objects + (96^2 pixels < area < 10000^2 pixels); + 'Recall/AR@1': average recall with 1 detection; + 'Recall/AR@10': average recall with 10 detections; + 'Recall/AR@100': average recall with 100 detections; + 'Recall/AR@100 (small)': average recall for small objects with 100 + detections; + 'Recall/AR@100 (medium)': average recall for medium objects with 100 + detections; + 'Recall/AR@100 (large)': average recall for large objects with 100 + detections; + and (2) per_category_ap is a dictionary holding category specific results with + keys of the form: 'Precision mAP ByCategory/category' + (without the supercategory part if no supercategories exist). + For backward compatibility 'PerformanceByCategory' is included in the + output regardless of all_metrics_per_category. If evaluating class-agnostic + mode, per_category_ap is an empty dictionary. + Raises: + ValueError: If category_stats does not exist. + """ + self.evaluate() + self.accumulate() + self.summarize() + + summary_metrics = OrderedDict([ + ('Precision/mAP', self.stats[0]), + ('Precision/mAP@.50IOU', self.stats[1]), + ('Precision/mAP@.75IOU', self.stats[2]), + ('Precision/mAP (small)', self.stats[3]), + ('Precision/mAP (medium)', self.stats[4]), + ('Precision/mAP (large)', self.stats[5]), + ('Recall/AR@1', self.stats[6]), ('Recall/AR@10', self.stats[7]), + ('Recall/AR@100', self.stats[8]), + ('Recall/AR@100 (small)', self.stats[9]), + ('Recall/AR@100 (medium)', self.stats[10]), + ('Recall/AR@100 (large)', self.stats[11]) + ]) + if not include_metrics_per_category: + return summary_metrics, {} + if not hasattr(self, 'category_stats'): + raise ValueError('Category stats do not exist') + per_category_ap = OrderedDict([]) + if self.GetAgnosticMode(): + return summary_metrics, per_category_ap + for category_index, category_id in enumerate(self.GetCategoryIdList()): + category = self.GetCategory(category_id)['name'] + # Kept for backward compatilbility + # pylint: disable=no-member + per_category_ap['PerformanceByCategory/mAP/{}'.format( + category)] = self.category_stats[0][category_index] + if all_metrics_per_category: + per_category_ap['Precision mAP ByCategory/{}'.format( + category)] = self.category_stats[0][category_index] + per_category_ap['Precision mAP@.50IOU ByCategory/{}'.format( + category)] = self.category_stats[1][category_index] + per_category_ap['Precision mAP@.75IOU ByCategory/{}'.format( + category)] = self.category_stats[2][category_index] + per_category_ap['Precision mAP (small) ByCategory/{}'.format( + category)] = self.category_stats[3][category_index] + per_category_ap['Precision mAP (medium) ByCategory/{}'.format( + category)] = self.category_stats[4][category_index] + per_category_ap['Precision mAP (large) ByCategory/{}'.format( + category)] = self.category_stats[5][category_index] + per_category_ap['Recall AR@1 ByCategory/{}'.format( + category)] = self.category_stats[6][category_index] + per_category_ap['Recall AR@10 ByCategory/{}'.format( + category)] = self.category_stats[7][category_index] + per_category_ap['Recall AR@100 ByCategory/{}'.format( + category)] = self.category_stats[8][category_index] + per_category_ap['Recall AR@100 (small) ByCategory/{}'.format( + category)] = self.category_stats[9][category_index] + per_category_ap['Recall AR@100 (medium) ByCategory/{}'.format( + category)] = self.category_stats[10][category_index] + per_category_ap['Recall AR@100 (large) ByCategory/{}'.format( + category)] = self.category_stats[11][category_index] + + return summary_metrics, per_category_ap + + +def _ConvertBoxToCOCOFormat(box): + """Convert a box in [ymin, xmin, ymax, xmax] format to COCO format. + This is a utility function for converting from our internal + [ymin, xmin, ymax, xmax] convention to the convention used by the COCO API + i.e., [xmin, ymin, width, height]. + Args: + box: a numpy array in format of [ymin, xmin, ymax, xmax] + Returns: + A list of floats, in COCO format, representing [xmin, ymin, width, height] + """ + return [ + float(box[1]), + float(box[0]), + float(box[3] - box[1]), + float(box[2] - box[0]) + ] + + +def _RleCompress(masks): + """Compresses mask using Run-length encoding provided by pycocotools. + Args: + masks: uint8 numpy array of shape [mask_height, mask_width] with values in + {0, 1}. + Returns: + A pycocotools Run-length encoding of the mask. + """ + return mask.encode(np.asfortranarray(masks)) + + +def ExportSingleImageGroundtruthToCoco(image_id: Union[int, str], + next_annotation_id: int, + category_id_set: Set[str], + groundtruth_boxes: np.array, + groundtruth_classes: np.array, + groundtruth_masks: Union[np.array, None] = None, + groundtruth_is_crowd: Union[np.array, None] = None) -> list: + """Export groundtruth of a single image to COCO format. + This function converts groundtruth detection annotations represented as numpy + arrays to dictionaries that can be ingested by the COCO evaluation API. Note + that the image_ids provided here must match the ones given to + ExportSingleImageDetectionsToCoco. We assume that boxes and classes are in + correspondence - that is: groundtruth_boxes[i, :], and + groundtruth_classes[i] are associated with the same groundtruth annotation. + In the exported result, "area" fields are always set to the area of the + groundtruth bounding box. + Args: + image_id: a unique image identifier either of type integer or string. + next_annotation_id: integer specifying the first id to use for the + groundtruth annotations. All annotations are assigned a continuous integer + id starting from this value. + category_id_set: A set of valid class ids. Groundtruth with classes not in + category_id_set are dropped. + groundtruth_boxes: numpy array (float32) with shape [num_gt_boxes, 4] + groundtruth_classes: numpy array (int) with shape [num_gt_boxes] + groundtruth_masks: optional uint8 numpy array of shape [num_detections, + image_height, image_width] containing detection_masks. + groundtruth_is_crowd: optional numpy array (int) with shape [num_gt_boxes] + indicating whether groundtruth boxes are crowd. + Returns: + A list of groundtruth annotations for a single image in the COCO format. + Raises: + ValueError: if (1) groundtruth_boxes and groundtruth_classes do not have the + right lengths or (2) if each of the elements inside these lists do not + have the correct shapes or (3) if image_ids are not integers + """ + if len(groundtruth_classes.shape) != 1: + raise ValueError('groundtruth_classes is ' 'expected to be of rank 1.') + if len(groundtruth_boxes.shape) != 2: + raise ValueError('groundtruth_boxes is expected to be of ' 'rank 2.') + if groundtruth_boxes.shape[1] != 4: + raise ValueError('groundtruth_boxes should have ' 'shape[1] == 4.') + num_boxes = groundtruth_classes.shape[0] + if num_boxes != groundtruth_boxes.shape[0]: + raise ValueError( + 'Corresponding entries in groundtruth_classes, ' + 'and groundtruth_boxes should have ' + 'compatible shapes (i.e., agree on the 0th dimension).' + 'Classes shape: %d. Boxes shape: %d. Image ID: %s' % + (groundtruth_classes.shape[0], groundtruth_boxes.shape[0], + image_id)) + has_is_crowd = groundtruth_is_crowd is not None + if has_is_crowd and len(groundtruth_is_crowd.shape) != 1: + raise ValueError('groundtruth_is_crowd is expected to be of rank 1.') + groundtruth_list = [] + for i in range(num_boxes): + if groundtruth_classes[i] in category_id_set: + iscrowd = groundtruth_is_crowd[i] if has_is_crowd else 0 + export_dict = { + 'id': + next_annotation_id + i, + 'image_id': + image_id, + 'category_id': + int(groundtruth_classes[i]), + 'bbox': + list(_ConvertBoxToCOCOFormat(groundtruth_boxes[i, :])), + 'area': + float((groundtruth_boxes[i, 2] - groundtruth_boxes[i, 0]) * + (groundtruth_boxes[i, 3] - groundtruth_boxes[i, 1])), + 'iscrowd': + iscrowd + } + if groundtruth_masks is not None: + export_dict['segmentation'] = _RleCompress( + groundtruth_masks[i]) + groundtruth_list.append(export_dict) + return groundtruth_list + + +def ExportSingleImageDetectionBoxesToCoco(image_id: Union[int, str], + category_id_set: Set[int], + detection_boxes: np.array, + detection_scores: np.array, + detection_classes: np.array) -> list: + """Export detections of a single image to COCO format. + This function converts detections represented as numpy arrays to dictionaries + that can be ingested by the COCO evaluation API. Note that the image_ids + provided here must match the ones given to the + ExporSingleImageDetectionBoxesToCoco. We assume that boxes, and classes are in + correspondence - that is: boxes[i, :], and classes[i] + are associated with the same groundtruth annotation. + Args: + image_id: unique image identifier either of type integer or string. + category_id_set: A set of valid class ids. Detections with classes not in + category_id_set are dropped. + detection_boxes: float numpy array of shape [num_detections, 4] containing + detection boxes. + detection_scores: float numpy array of shape [num_detections] containing + scored for the detection boxes. + detection_classes: integer numpy array of shape [num_detections] containing + the classes for detection boxes. + Returns: + A list of detection annotations for a single image in the COCO format. + Raises: + ValueError: if (1) detection_boxes, detection_scores and detection_classes + do not have the right lengths or (2) if each of the elements inside these + lists do not have the correct shapes or (3) if image_ids are not integers. + """ + if len(detection_classes.shape) != 1 or len(detection_scores.shape) != 1: + raise ValueError( + 'All entries in detection_classes and detection_scores' + 'expected to be of rank 1.') + if len(detection_boxes.shape) != 2: + raise ValueError('All entries in detection_boxes expected to be of ' + 'rank 2.') + if detection_boxes.shape[1] != 4: + raise ValueError('All entries in detection_boxes should have ' + 'shape[1] == 4.') + num_boxes = detection_classes.shape[0] + if not num_boxes == detection_boxes.shape[0] == detection_scores.shape[0]: + raise ValueError( + 'Corresponding entries in detection_classes, ' + 'detection_scores and detection_boxes should have ' + 'compatible shapes (i.e., agree on the 0th dimension). ' + 'Classes shape: %d. Boxes shape: %d. ' + 'Scores shape: %d' % + (detection_classes.shape[0], detection_boxes.shape[0], + detection_scores.shape[0])) + detections_list = [] + for i in range(num_boxes): + if detection_classes[i] in category_id_set: + detections_list.append({ + 'image_id': + image_id, + 'category_id': + int(detection_classes[i]), + 'bbox': + list(_ConvertBoxToCOCOFormat(detection_boxes[i, :])), + 'score': + float(detection_scores[i]) + }) + return detections_list + + +def ExportSingleImageDetectionMasksToCoco(image_id: Union[str, int], + category_id_set: Set[int], + detection_masks: np.array, + detection_scores: np.array, + detection_classes: np.array) -> list: + """Export detection masks of a single image to COCO format. + This function converts detections represented as numpy arrays to dictionaries + that can be ingested by the COCO evaluation API. We assume that + detection_masks, detection_scores, and detection_classes are in correspondence + - that is: detection_masks[i, :], detection_classes[i] and detection_scores[i] + are associated with the same annotation. + Args: + image_id: unique image identifier either of type integer or string. + category_id_set: A set of valid class ids. Detections with classes not in + category_id_set are dropped. + detection_masks: uint8 numpy array of shape [num_detections, image_height, + image_width] containing detection_masks. + detection_scores: float numpy array of shape [num_detections] containing + scores for detection masks. + detection_classes: integer numpy array of shape [num_detections] containing + the classes for detection masks. + Returns: + A list of detection mask annotations for a single image in the COCO format. + Raises: + ValueError: if (1) detection_masks, detection_scores and detection_classes + do not have the right lengths or (2) if each of the elements inside these + lists do not have the correct shapes or (3) if image_ids are not integers. + """ + if len(detection_classes.shape) != 1 or len(detection_scores.shape) != 1: + raise ValueError( + 'All entries in detection_classes and detection_scores' + 'expected to be of rank 1.') + num_boxes = detection_classes.shape[0] + if not num_boxes == len(detection_masks) == detection_scores.shape[0]: + raise ValueError('Corresponding entries in detection_classes, ' + 'detection_scores and detection_masks should have ' + 'compatible lengths and shapes ' + 'Classes length: %d. Masks length: %d. ' + 'Scores length: %d' % + (detection_classes.shape[0], len(detection_masks), + detection_scores.shape[0])) + detections_list = [] + for i in range(num_boxes): + if detection_classes[i] in category_id_set: + detections_list.append({ + 'image_id': + image_id, + 'category_id': + int(detection_classes[i]), + 'segmentation': + _RleCompress(detection_masks[i]), + 'score': + float(detection_scores[i]) + }) + return detections_list \ No newline at end of file diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/mask_rcnn/quantization/ptq_static/label_map.yaml b/examples/onnxrt/object_detection/onnx_model_zoo/mask_rcnn/quantization/ptq_static/label_map.yaml new file mode 100644 index 00000000000..1fbc9263dc9 --- /dev/null +++ b/examples/onnxrt/object_detection/onnx_model_zoo/mask_rcnn/quantization/ptq_static/label_map.yaml @@ -0,0 +1,80 @@ +person: 1 +bicycle: 2 +car: 3 +motorcycle: 4 +airplane: 5 +bus: 6 +train: 7 +truck: 8 +boat: 9 +traffic light: 10 +fire hydrant: 11 +stop sign: 12 +parking meter: 13 +bench: 14 +bird: 15 +cat: 16 +dog: 17 +horse: 18 +sheep: 19 +cow: 20 +elephant: 21 +bear: 22 +zebra: 23 +giraffe: 24 +backpack: 25 +umbrella: 26 +handbag: 27 +tie: 28 +suitcase: 29 +frisbee: 30 +skis: 31 +snowboard: 32 +sports ball: 33 +kite: 34 +baseball bat: 35 +baseball glove: 36 +skateboard: 37 +surfboard: 38 +tennis racket: 39 +bottle: 40 +wine glass: 41 +cup: 42 +fork: 43 +knife: 44 +spoon: 45 +bowl: 46 +banana: 47 +apple: 48 +sandwich: 49 +orange: 50 +broccoli: 51 +carrot: 52 +hot dog: 53 +pizza: 54 +donut: 55 +cake: 56 +chair: 57 +couch: 58 +potted plant: 59 +bed: 60 +dining table: 61 +toilet: 62 +tv: 63 +laptop: 64 +mouse: 65 +remote: 66 +keyboard: 67 +cell phone: 68 +microwave: 69 +oven: 70 +toaster: 71 +sink: 72 +refrigerator: 73 +book: 74 +clock: 75 +vase: 76 +scissors: 77 +teddy bear: 78 +hair drier: 79 +toothbrush: 80 diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/mask_rcnn/quantization/ptq_static/main.py b/examples/onnxrt/object_detection/onnx_model_zoo/mask_rcnn/quantization/ptq_static/main.py new file mode 100644 index 00000000000..bb3ecf47dc3 --- /dev/null +++ b/examples/onnxrt/object_detection/onnx_model_zoo/mask_rcnn/quantization/ptq_static/main.py @@ -0,0 +1,397 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# pylint:disable=redefined-outer-name,logging-format-interpolation + + +import logging +import argparse + +import onnx +from PIL import Image +import math +import numpy as np +import onnxruntime as ort + +logger = logging.getLogger(__name__) +logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s - %(message)s', + datefmt = '%m/%d/%Y %H:%M:%S', + level = logging.WARN) +logger.info("Evaluating ONNXRuntime full precision accuracy and performance:") +parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter +) +parser.add_argument( + '--data_path', + type=str, + help="Path of COCO dataset, it contains val2017 and annotations subfolder" +) +parser.add_argument( + '--label_path', + type=str, + default='label_map.yaml', + help="Path of label map yaml file" +) +parser.add_argument( + '--model_path', + type=str, + help="Pre-trained model on onnx file" +) +parser.add_argument( + '--benchmark', + action='store_true', \ + default=False +) +parser.add_argument( + '--tune', + action='store_true', \ + default=False, + help="whether quantize the model" +) +parser.add_argument( + '--config', + type=str, + help="config yaml path" +) +parser.add_argument( + '--output_model', + type=str, + help="output model path" +) +parser.add_argument( + '--mode', + type=str, + help="benchmark mode of performance or accuracy" +) +parser.add_argument( + '--quant_format', + type=str, + choices=['QOperator', 'QDQ'], + help="quantization format" +) +parser.add_argument( + '--batch_size', + type=int, + default=1, + help="quantization format" +) +args = parser.parse_args() + +class Dataloader: + def __init__(self, root, batch_size=1, img_dir='val2017', \ + anno_dir='annotations/instances_val2017.json'): + import json + import os + import numpy as np + from pycocotools.coco import COCO + from coco_label_map import category_map + self.batch_size = batch_size + self.image_list = [] + img_path = os.path.join(root, img_dir) + anno_path = os.path.join(root, anno_dir) + coco = COCO(anno_path) + img_ids = coco.getImgIds() + cat_ids = coco.getCatIds() + for idx, img_id in enumerate(img_ids): + img_info = {} + bboxes = [] + labels = [] + ids = [] + img_detail = coco.loadImgs(img_id)[0] + ids.append(img_detail['file_name'].encode('utf-8')) + pic_height = img_detail['height'] + pic_width = img_detail['width'] + + ann_ids = coco.getAnnIds(imgIds=img_id,catIds=cat_ids) + anns = coco.loadAnns(ann_ids) + for ann in anns: + bbox = ann['bbox'] + if len(bbox) == 0: + continue + bboxes.append([bbox[0], bbox[1], bbox[0]+bbox[2], bbox[1]+bbox[3]]) + labels.append(category_map[ann['category_id']].encode('utf8')) + img_file = os.path.join(img_path, img_detail['file_name']) + if not os.path.exists(img_file) or len(bboxes) == 0: + continue + + if filter and not filter(None, bboxes): + continue + label = [np.array([bboxes]), np.array([labels]), np.zeros((1,0)), np.array([img_detail['file_name'].encode('utf-8')])] + with Image.open(img_file) as image: + image = image.convert('RGB') + image, label = self.preprocess((image, label)) + self.image_list.append((image, label)) + + def __iter__(self): + for item in self.image_list: + yield item + + def preprocess(self, sample): + image, label = sample + ratio = 800.0 / min(image.size[0], image.size[1]) + image = image.resize((int(ratio * image.size[0]), int(ratio * image.size[1])), Image.BILINEAR) + + # Convert to BGR + image = np.array(image)[:, :, [2, 1, 0]].astype('float32') + + # HWC -> CHW + image = np.transpose(image, [2, 0, 1]) + + # Normalize + mean_vec = np.array([102.9801, 115.9465, 122.7717]) + for i in range(image.shape[0]): + image[i, :, :] = image[i, :, :] - mean_vec[i] + + # Pad to be divisible of 32 + padded_h = int(math.ceil(image.shape[1] / 32) * 32) + padded_w = int(math.ceil(image.shape[2] / 32) * 32) + + padded_image = np.zeros((3, padded_h, padded_w), dtype=np.float32) + padded_image[:, :image.shape[1], :image.shape[2]] = image + image = padded_image + bboxes, str_labels,int_labels, image_ids = label + bboxes = ratio * bboxes + return image, (bboxes, str_labels, int_labels, image_ids) + +class COCOmAPv2(): + """Compute mean average precision of the detection task.""" + + def __init__(self, + anno_path=None, + iou_thrs='0.5:0.05:0.95', + map_points=101, + map_key='DetectionBoxes_Precision/mAP', + output_index_mapping={'num_detections':-1, 'boxes':0, 'scores':1, 'classes':2}): + """Initialize the metric. + Args: + anno_path: The path of annotation file. + iou_thrs: Minimal value for intersection over union that allows to make decision + that prediction bounding box is true positive. You can specify one float value + between 0 to 1 or string "05:0.05:0.95" for standard COCO thresholds. + map_points: The way to calculate mAP. 101 for 101-point interpolated AP, 11 for + 11-point interpolated AP, 0 for area under PR curve. + map_key: The key that mapping to pycocotools COCOeval. + Defaults to 'DetectionBoxes_Precision/mAP'. + output_index_mapping: The output index mapping. + Defaults to {'num_detections':-1, 'boxes':0, 'scores':1, 'classes':2}. + """ + self.output_index_mapping = output_index_mapping + from coco_label_map import category_map + if anno_path: + import os + import yaml + assert os.path.exists(anno_path), 'Annotation path does not exists!' + with open(anno_path, 'r') as f: + label_map = yaml.safe_load(f.read()) + self.category_map_reverse = {k: v for k,v in label_map.items()} + else: + # label: index + self.category_map_reverse = {v: k for k, v in category_map.items()} + self.image_ids = [] + self.ground_truth_list = [] + self.detection_list = [] + self.annotation_id = 1 + self.category_map = category_map + self.category_id_set = set( + [cat for cat in self.category_map]) #index + self.iou_thrs = iou_thrs + self.map_points = map_points + self.map_key = map_key + + def update(self, predicts, labels, sample_weight=None): + """Add the predictions and labels. + Args: + predicts: The predictions. + labels: The labels corresponding to the predictions. + sample_weight: The sample weight. Defaults to None. + """ + from coco_tools import ExportSingleImageGroundtruthToCoco,\ + ExportSingleImageDetectionBoxesToCoco + detections = [] + if 'num_detections' in self.output_index_mapping and \ + self.output_index_mapping['num_detections'] > -1: + for item in zip(*predicts): + detection = {} + num = int(item[self.output_index_mapping['num_detections']]) + detection['boxes'] = np.asarray( + item[self.output_index_mapping['boxes']])[0:num] + detection['scores'] = np.asarray( + item[self.output_index_mapping['scores']])[0:num] + detection['classes'] = np.asarray( + item[self.output_index_mapping['classes']])[0:num] + detections.append(detection) + else: + for item in zip(*predicts): + detection = {} + detection['boxes'] = np.asarray(item[self.output_index_mapping['boxes']]) + detection['scores'] = np.asarray(item[self.output_index_mapping['scores']]) + detection['classes'] = np.asarray(item[self.output_index_mapping['classes']]) + detections.append(detection) + + bboxes, str_labels,int_labels, image_ids = labels + labels = [] + if len(int_labels[0]) == 0: + for str_label in str_labels: + str_label = [ + x if type(x) == 'str' else x.decode('utf-8') + for x in str_label + ] + labels.append([self.category_map_reverse[x] for x in str_label]) + elif len(str_labels[0]) == 0: + for int_label in int_labels: + labels.append([x for x in int_label]) + + for idx, image_id in enumerate(image_ids): + image_id = image_id if type( + image_id) == 'str' else image_id.decode('utf-8') + if image_id in self.image_ids: + continue + self.image_ids.append(image_id) + + ground_truth = {} + ground_truth['boxes'] = np.asarray(bboxes[idx]) + ground_truth['classes'] = np.asarray(labels[idx]) + + self.ground_truth_list.extend( + ExportSingleImageGroundtruthToCoco( + image_id=image_id, + next_annotation_id=self.annotation_id, + category_id_set=self.category_id_set, + groundtruth_boxes=ground_truth['boxes'], + groundtruth_classes=ground_truth['classes'])) + self.annotation_id += ground_truth['boxes'].shape[0] + + self.detection_list.extend( + ExportSingleImageDetectionBoxesToCoco( + image_id=image_id, + category_id_set=self.category_id_set, + detection_boxes=detections[idx]['boxes'], + detection_scores=detections[idx]['scores'], + detection_classes=detections[idx]['classes'])) + + def reset(self): + """Reset the prediction and labels.""" + self.image_ids = [] + self.ground_truth_list = [] + self.detection_list = [] + self.annotation_id = 1 + + def result(self): + """Compute mean average precision. + Returns: + The mean average precision score. + """ + from coco_tools import COCOWrapper, COCOEvalWrapper + if len(self.ground_truth_list) == 0: + logger.warning("Sample num during evaluation is 0.") + return 0 + else: + groundtruth_dict = { + 'annotations': + self.ground_truth_list, + 'images': [{ + 'id': image_id + } for image_id in self.image_ids], + 'categories': [{ + 'id': k, + 'name': v + } for k, v in self.category_map.items()] + } + coco_wrapped_groundtruth = COCOWrapper(groundtruth_dict) + coco_wrapped_detections = coco_wrapped_groundtruth.LoadAnnotations( + self.detection_list) + box_evaluator = COCOEvalWrapper(coco_wrapped_groundtruth, + coco_wrapped_detections, + agnostic_mode=False, + iou_thrs = self.iou_thrs, + map_points = self.map_points) + box_metrics, box_per_category_ap = box_evaluator.ComputeMetrics( + include_metrics_per_category=False, all_metrics_per_category=False) + box_metrics.update(box_per_category_ap) + box_metrics = { + 'DetectionBoxes_' + key: value + for key, value in iter(box_metrics.items()) + } + + return box_metrics[self.map_key] + +class Post: + def __call__(self, sample): + preds, labels = sample + bboxes, classes, scores, _ = preds + bboxes = np.reshape(bboxes, (1, -1, 4)) + classes = np.reshape(classes, (1, -1)) + scores = np.reshape(scores, (1, -1)) + return (bboxes, classes, scores), labels[0] + +if __name__ == "__main__": + model = onnx.load(args.model_path) + dataloader = Dataloader(args.data_path, batch_size=args.batch_size) + metric = COCOmAPv2(anno_path=args.label_path, output_index_mapping={'boxes':0, 'scores':2, 'classes':1}) + postprocess = Post() + + def eval_func(model): + metric.reset() + session = ort.InferenceSession(model.SerializeToString(), + providers=ort.get_available_providers()) + ort_inputs = {} + len_inputs = len(session.get_inputs()) + inputs_names = [session.get_inputs()[i].name for i in range(len_inputs)] + for idx, (inputs, labels) in enumerate(dataloader): + if not isinstance(labels, list): + labels = [labels] + if len_inputs == 1: + ort_inputs.update( + inputs if isinstance(inputs, dict) else {inputs_names[0]: inputs} + ) + else: + assert len_inputs == len(inputs), 'number of input tensors must align with graph inputs' + if isinstance(inputs, dict): + ort_inputs.update(inputs) + else: + for i in range(len_inputs): + if not isinstance(inputs[i], np.ndarray): + ort_inputs.update({inputs_names[i]: np.array(inputs[i])}) + else: + ort_inputs.update({inputs_names[i]: inputs[i]}) + predictions = session.run(None, ort_inputs) + predictions, labels = postprocess((predictions, labels)) + metric.update(predictions, labels) + return metric.result() + + if args.benchmark: + if args.mode == 'performance': + from neural_compressor.benchmark import fit + from neural_compressor.config import BenchmarkConfig + conf = BenchmarkConfig(iteration=100, + cores_per_instance=4, + num_of_instance=1) + fit(model, conf, b_dataloader=dataloader) + elif args.mode == 'accuracy': + acc_result = eval_func(model) + print("Batch size = %d" % args.batch_size) + print("Accuracy: %.5f" % acc_result) + + if args.tune: + from neural_compressor import quantization + from neural_compressor.config import AccuracyCriterion, PostTrainingQuantConfig + accuracy_criterion = AccuracyCriterion() + accuracy_criterion.absolute = 0.01 + config = PostTrainingQuantConfig(approach='static', + accuracy_criterion=accuracy_criterion, + quant_format=args.quant_format) + q_model = quantization.fit(model, config, calib_dataloader=dataloader, eval_func=eval_func) + q_model.save(args.output_model) \ No newline at end of file diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/mask_rcnn/quantization/ptq_static/requirements.txt b/examples/onnxrt/object_detection/onnx_model_zoo/mask_rcnn/quantization/ptq_static/requirements.txt new file mode 100644 index 00000000000..d92c94766dc --- /dev/null +++ b/examples/onnxrt/object_detection/onnx_model_zoo/mask_rcnn/quantization/ptq_static/requirements.txt @@ -0,0 +1,4 @@ +onnx +onnxruntime +onnxruntime-extensions; python_version < '3.10' +pillow>=8.2.0 # not directly required, pinned by Snyk to avoid a vulnerability diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/mask_rcnn/quantization/ptq_static/run_benchmark.sh b/examples/onnxrt/object_detection/onnx_model_zoo/mask_rcnn/quantization/ptq_static/run_benchmark.sh new file mode 100644 index 00000000000..17c8c45fa58 --- /dev/null +++ b/examples/onnxrt/object_detection/onnx_model_zoo/mask_rcnn/quantization/ptq_static/run_benchmark.sh @@ -0,0 +1,49 @@ +#!/bin/bash +set -x + +function main { + init_params "$@" + run_benchmark + +} + +# init params +function init_params { + + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + --label_path=*) + label_path=$(echo $var |cut -f2 -d=) + ;; + --mode=*) + mode=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_benchmark +function run_benchmark { + if [ ! $label_path ]; then + label_path='label_map.yaml' + fi + batch_size=1 + + python main.py \ + --model_path ${input_model} \ + --mode ${mode} \ + --data_path ${dataset_location} \ + --label_path ${label_path} \ + --batch_size ${batch_size} \ + --benchmark +} + +main "$@" \ No newline at end of file diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/mask_rcnn/quantization/ptq_static/run_tuning.sh b/examples/onnxrt/object_detection/onnx_model_zoo/mask_rcnn/quantization/ptq_static/run_tuning.sh new file mode 100644 index 00000000000..161010ad4bc --- /dev/null +++ b/examples/onnxrt/object_detection/onnx_model_zoo/mask_rcnn/quantization/ptq_static/run_tuning.sh @@ -0,0 +1,51 @@ +#!/bin/bash +set -x + +function main { + init_params "$@" + run_tuning + +} + +# init params +function init_params { + + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + --label_path=*) + label_path=$(echo $var |cut -f2 -d=) + ;; + --output_model=*) + output_model=$(echo $var |cut -f2 -d=) + ;; + --quant_format=*) + quant_format=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_tuning +function run_tuning { + if [ ! $label_path ]; then + label_path='label_map.yaml' + fi + + python main.py \ + --model_path ${input_model} \ + --output_model ${output_model} \ + --data_path ${dataset_location} \ + --label_path ${label_path} \ + --quant_format ${quant_format} \ + --tune +} + +main "$@" \ No newline at end of file diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/ssd/quantization/ptq_static/README.md b/examples/onnxrt/object_detection/onnx_model_zoo/ssd/quantization/ptq_static/README.md new file mode 100644 index 00000000000..1768a0aeee8 --- /dev/null +++ b/examples/onnxrt/object_detection/onnx_model_zoo/ssd/quantization/ptq_static/README.md @@ -0,0 +1,71 @@ +Step-by-Step +============ + +This example load an object detection model converted from [ONNX Model Zoo](https://github.com/onnx/models) and confirm its accuracy and speed based on [MS COCO 2017 dataset](https://cocodataset.org/#download). + +# Prerequisite + +## 1. Environment + +```shell +pip install neural-compressor +pip install -r requirements.txt +``` +> Note: Validated ONNX Runtime [Version](/docs/source/installation_guide.md#validated-software-environment). + +## 2. Prepare Model + +Download model from [ONNX Model Zoo](https://github.com/onnx/models) + +```shell +wget https://github.com/onnx/models/raw/main/vision/object_detection_segmentation/ssd/model/ssd-12.onnx +``` + +## 3. Prepare Dataset + +Download [MS COCO 2017 dataset](https://cocodataset.org/#download). + +Dataset directories: + +```bash +coco2017 +├── annotations +| ├── instances_val2017.json +| └── ... +├── test2017 +├── train2017 +└── val2017 +``` + +# Run + +## 1. Quantization + +Static quantization with QOperator format: + +```bash +bash run_tuning.sh --input_model=path/to/model \ # model path as *.onnx + --output_model=path/to/save \ # model path as *.onnx + --dataset_location=path/to/coco2017 \ # dataset path containing 'val2017' and 'annotations' folders + --label_path=label_map.yaml \ + --quant_format="QOperator" +``` + +Static quantization with QDQ format: + +```bash +bash run_tuning.sh --input_model=path/to/model \ # model path as *.onnx + --output_model=path/to/save \ # model path as *.onnx + --dataset_location=path/to/coco2017 \ # dataset path containing 'val2017' and 'annotations' folders + --label_path=label_map.yaml \ + --quant_format="QDQ" +``` + +## 2. Benchmark + +```bash +bash run_benchmark.sh --input_model=path/to/model \ # model path as *.onnx + --dataset_location=path/to/coco2017 \ # dataset path containing 'val2017' and 'annotations' folders + --label_path=label_map.yaml \ + --mode=performance # or accuracy +``` diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/ssd/quantization/ptq_static/coco_label_map.py b/examples/onnxrt/object_detection/onnx_model_zoo/ssd/quantization/ptq_static/coco_label_map.py new file mode 100644 index 00000000000..1e88f8abad8 --- /dev/null +++ b/examples/onnxrt/object_detection/onnx_model_zoo/ssd/quantization/ptq_static/coco_label_map.py @@ -0,0 +1,103 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# + +"""The dict mapping category IDs to its names of labels.""" + +category_map = { + 1: 'person', + 2: 'bicycle', + 3: 'car', + 4: 'motorcycle', + 5: 'airplane', + 6: 'bus', + 7: 'train', + 8: 'truck', + 9: 'boat', + 10: 'traffic light', + 11: 'fire hydrant', + 13: 'stop sign', + 14: 'parking meter', + 15: 'bench', + 16: 'bird', + 17: 'cat', + 18: 'dog', + 19: 'horse', + 20: 'sheep', + 21: 'cow', + 22: 'elephant', + 23: 'bear', + 24: 'zebra', + 25: 'giraffe', + 27: 'backpack', + 28: 'umbrella', + 31: 'handbag', + 32: 'tie', + 33: 'suitcase', + 34: 'frisbee', + 35: 'skis', + 36: 'snowboard', + 37: 'sports ball', + 38: 'kite', + 39: 'baseball bat', + 40: 'baseball glove', + 41: 'skateboard', + 42: 'surfboard', + 43: 'tennis racket', + 44: 'bottle', + 46: 'wine glass', + 47: 'cup', + 48: 'fork', + 49: 'knife', + 50: 'spoon', + 51: 'bowl', + 52: 'banana', + 53: 'apple', + 54: 'sandwich', + 55: 'orange', + 56: 'broccoli', + 57: 'carrot', + 58: 'hot dog', + 59: 'pizza', + 60: 'donut', + 61: 'cake', + 62: 'chair', + 63: 'couch', + 64: 'potted plant', + 65: 'bed', + 67: 'dining table', + 70: 'toilet', + 72: 'tv', + 73: 'laptop', + 74: 'mouse', + 75: 'remote', + 76: 'keyboard', + 77: 'cell phone', + 78: 'microwave', + 79: 'oven', + 80: 'toaster', + 81: 'sink', + 82: 'refrigerator', + 84: 'book', + 85: 'clock', + 86: 'vase', + 87: 'scissors', + 88: 'teddy bear', + 89: 'hair drier', + 90: 'toothbrush' +} \ No newline at end of file diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/ssd/quantization/ptq_static/coco_tools.py b/examples/onnxrt/object_detection/onnx_model_zoo/ssd/quantization/ptq_static/coco_tools.py new file mode 100644 index 00000000000..2c57fd61302 --- /dev/null +++ b/examples/onnxrt/object_detection/onnx_model_zoo/ssd/quantization/ptq_static/coco_tools.py @@ -0,0 +1,672 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +"""Wrappers for third party pycocotools to be used within object_detection. +Note that nothing in this file is tensorflow related and thus cannot +be called directly as a slim metric, for example. +TODO(jonathanhuang): wrap as a slim metric in metrics.py +Usage example: given a set of images with ids in the list image_ids +and corresponding lists of numpy arrays encoding groundtruth (boxes and classes) +and detections (boxes, scores and classes), where elements of each list +correspond to detections/annotations of a single image, +then evaluation (in multi-class mode) can be invoked as follows: + groundtruth_dict = coco_tools.ExportGroundtruthToCOCO( + image_ids, groundtruth_boxes_list, groundtruth_classes_list, + max_num_classes, output_path=None) + detections_list = coco_tools.ExportDetectionsToCOCO( + image_ids, detection_boxes_list, detection_scores_list, + detection_classes_list, output_path=None) + groundtruth = coco_tools.COCOWrapper(groundtruth_dict) + detections = groundtruth.LoadAnnotations(detections_list) + evaluator = coco_tools.COCOEvalWrapper(groundtruth, detections, + agnostic_mode=False) + metrics = evaluator.ComputeMetrics() +""" + +import copy +import time + +import numpy as np + +from collections import OrderedDict +from neural_compressor.utils import logger +from pycocotools import coco +from pycocotools import cocoeval +from pycocotools import mask +from typing import Any, Dict, List, Set, Union + + +class COCOWrapper(coco.COCO): + """Wrapper for the pycocotools COCO class. + + Attributes: + dataset: a dictionary holding bounding box annotations in the COCO format. + detection_type: type of detections being wrapped. Can be one of ['bbox', + 'segmentation'] + """ + + def __init__(self, dataset: Dict[str, Any], detection_type: str = 'bbox'): + """Construct a COCOWrapper. + See http://mscoco.org/dataset/#format for a description of the format. + By default, the coco.COCO class constructor reads from a JSON file. + This function duplicates the same behavior but loads from a dictionary, + allowing us to perform evaluation without writing to external storage. + Args: + dataset: a dictionary holding bounding box annotations in the COCO format. + detection_type: type of detections being wrapped. Can be one of ['bbox', + 'segmentation'] + Raises: + ValueError: if detection_type is unsupported. + """ + supported_detection_types = ['bbox', 'segmentation'] + if detection_type not in supported_detection_types: + raise ValueError('Unsupported detection type: {}. ' + 'Supported values are: {}'.format( + detection_type, supported_detection_types)) + self._detection_type = detection_type + coco.COCO.__init__(self) + self.dataset = dataset + self.createIndex() + + def LoadAnnotations(self, annotations: list) -> coco.COCO: + """Load annotations dictionary into COCO datastructure. + See http://mscoco.org/dataset/#format for a description of the annotations + format. As above, this function replicates the default behavior of the API + but does not require writing to external storage. + Args: + annotations: python list holding object detection results where each + detection is encoded as a dict with required keys ['image_id', + 'category_id', 'score'] and one of ['bbox', 'segmentation'] based on + `detection_type`. + Returns: + a coco.COCO datastructure holding object detection annotations results + Raises: + ValueError: if (1) annotations is not a list or annotations do not + correspond to the images contained in self. + """ + results = coco.COCO() + results.dataset['images'] = [img for img in self.dataset['images']] + + logger.info("Load and prepare annotation results.") + tic = time.time() + + if not isinstance(annotations, list): + raise ValueError('annotations is not a list of objects') + annotation_img_ids = [ann['image_id'] for ann in annotations] + if (set(annotation_img_ids) != (set(annotation_img_ids) + & set(self.getImgIds()))): + raise ValueError('Results do not correspond to current coco set') + results.dataset['categories'] = copy.deepcopy( + self.dataset['categories']) + if self._detection_type == 'bbox': + for idx, ann in enumerate(annotations): + bb = ann['bbox'] + ann['area'] = bb[2] * bb[3] + ann['id'] = idx + 1 + ann['iscrowd'] = 0 + elif self._detection_type == 'segmentation': + for idx, ann in enumerate(annotations): + ann['area'] = mask.area(ann['segmentation']) + ann['bbox'] = mask.toBbox(ann['segmentation']) + ann['id'] = idx + 1 + ann['iscrowd'] = 0 + logger.info('DONE (t=%0.2fs)', (time.time() - tic)) + + results.dataset['annotations'] = annotations + results.createIndex() + return results + + +class COCOEvalWrapper(cocoeval.COCOeval): + """Wrapper for the pycocotools COCOeval class. + To evaluate, create two objects (groundtruth_dict and detections_list) + using the conventions listed at http://mscoco.org/dataset/#format. + Then call evaluation as follows: + groundtruth = coco_tools.COCOWrapper(groundtruth_dict) + detections = groundtruth.LoadAnnotations(detections_list) + evaluator = coco_tools.COCOEvalWrapper(groundtruth, detections, + agnostic_mode=False) + metrics = evaluator.ComputeMetrics() + """ + + def __init__(self, + groundtruth: coco.COCO = None, + detections: coco.COCO = None, + agnostic_mode = False, + iou_type: str = 'bbox', + iou_thrs: Union[str, float] = None, + map_points=None): + """Construct a COCOEvalWrapper. + Note that for the area-based metrics to be meaningful, detection and + groundtruth boxes must be in image coordinates measured in pixels. + Args: + groundtruth: a coco.COCO (or coco_tools.COCOWrapper) object holding + groundtruth annotations + detections: a coco.COCO (or coco_tools.COCOWrapper) object holding + detections + agnostic_mode: boolean (default: False). If True, evaluation ignores + class labels, treating all detections as proposals. + iou_thrs: Minimal value for intersection over union that allows to + make decision that prediction bounding box is true positive. + You can specify one float value between 0 to 1 or + string "05:0.05:0.95" for standard COCO thresholds. + iou_type: IOU type to use for evaluation. Supports `bbox` or `segm`. + map_points: The way to calculate mAP. 101 for 101-point interpolated AP, 11 for + 11-point interpolated AP, 0 for area under PR curve. + """ + cocoeval.COCOeval.__init__(self, + groundtruth, + detections, + iouType=iou_type) + if agnostic_mode: + self.params.useCats = 0 + if iou_thrs == '0.5:0.05:0.95': + self.params.iouThrs = np.linspace(.5, 0.95, int(np.round((0.95 - .5) / .05)) + 1, \ + endpoint=True) + elif isinstance(iou_thrs, float): + self.params.iouThrs = [iou_thrs] + + if map_points == 101: + self.params.recThrs = np.linspace(.0, 1.00, int(np.round((1.00 - .0) / .01)) + 1, \ + endpoint=True) + if map_points == 11: + self.params.recThrs = np.linspace(.0, 1.00, int(np.round((1.00 - .0) / .1)) + 1, \ + endpoint=True) + if map_points == 0: + self.params.recThrs = [-1] + + + def GetCategory(self, category_id: int) -> dict: + """Fetch dictionary holding category information given category id. + Args: + category_id: integer id + Returns: + dictionary holding 'id', 'name'. + """ + return self.cocoGt.cats[category_id] + + def GetAgnosticMode(self) -> bool: + """Return whether COCO Eval is configured to evaluate in agnostic mode.""" + return self.params.useCats == 0 + + def GetCategoryIdList(self) -> List[int]: + """Return the list of IDs of all valid categories.""" + return self.params.catIds + + def accumulate(self, p: cocoeval.Params = None): + """Accumulate evaluation results per image and store it to self.eval. + + Args: + p: input params for evaluation + """ + print('Accumulating evaluation results...') + tic = time.time() + if not self.evalImgs: + print('Please run evaluate() first') + # allows input customized parameters + if p is None: + p = self.params + p.catIds = p.catIds if p.useCats == 1 else [-1] + T = len(p.iouThrs) + R = len(p.recThrs) + K = len(p.catIds) if p.useCats else 1 + A = len(p.areaRng) + M = len(p.maxDets) + precision = -np.ones((T,R,K,A,M)) # -1 for the precision of absent categories + recall = -np.ones((T,K,A,M)) + scores = -np.ones((T,R,K,A,M)) + + # create dictionary for future indexing + _pe = self._paramsEval + print('-pe', _pe) + catIds = _pe.catIds if _pe.useCats else [-1] + setK = set(catIds) + setA = set(map(tuple, _pe.areaRng)) + setM = set(_pe.maxDets) + setI = set(_pe.imgIds) + # get inds to evaluate + k_list = [n for n, k in enumerate(p.catIds) if k in setK] + m_list = [m for n, m in enumerate(p.maxDets) if m in setM] + a_list = [n for n, a in enumerate(map(lambda x: tuple(x), p.areaRng)) if a in setA] + i_list = [n for n, i in enumerate(p.imgIds) if i in setI] + I0 = len(_pe.imgIds) + A0 = len(_pe.areaRng) + # retrieve E at each category, area range, and max number of detections + for k, k0 in enumerate(k_list): + Nk = k0*A0*I0 + for a, a0 in enumerate(a_list): + Na = a0*I0 + for m, maxDet in enumerate(m_list): + E = [self.evalImgs[Nk + Na + i] for i in i_list] + E = [e for e in E if not e is None] + if len(E) == 0: continue + dtScores = np.concatenate([e['dtScores'][0:maxDet] for e in E]) + + # different sorting method generates slightly different results. + # mergesort is used to be consistent as Matlab implementation. + inds = np.argsort(-dtScores, kind='mergesort') + dtScoresSorted = dtScores[inds] + + dtm = np.concatenate([e['dtMatches'][:,0:maxDet] for e in E], axis=1)[:,inds] + dtIg = np.concatenate([e['dtIgnore'][:,0:maxDet] for e in E], axis=1)[:,inds] + gtIg = np.concatenate([e['gtIgnore'] for e in E]) + npig = np.count_nonzero(gtIg==0 ) + if npig == 0: continue + tps = np.logical_and( dtm, np.logical_not(dtIg) ) + fps = np.logical_and(np.logical_not(dtm), np.logical_not(dtIg) ) + + tp_sum = np.cumsum(tps, axis=1).astype(dtype=np.float32) + fp_sum = np.cumsum(fps, axis=1).astype(dtype=np.float32) + for t, (tp, fp) in enumerate(zip(tp_sum, fp_sum)): + tp = np.array(tp) + fp = np.array(fp) + nd = len(tp) + rc = tp / npig + pr = tp / (fp+tp+np.spacing(1)) + + # calculate precision + if R == 1: + rc = np.concatenate(([0.], rc, [1.])) + pr = np.concatenate(([0.], pr, [0.])) + + # compute the precision envelope + for i in range(pr.size - 1, 0, -1): + pr[i - 1] = np.maximum(pr[i - 1], pr[i]) + + # to calculate area under PR curve, look for points + # where X axis (recall) changes value + change_point = np.where(rc[1:] != rc[:-1])[0] + # and sum (\Delta recall) * recall + res = np.sum((rc[change_point + 1] - rc[change_point]) \ + * pr[change_point + 1]) + precision[t,:,k,a,m] = np.array([res]) + else: + q = np.zeros((R,)) + + # numpy is slow without cython optimization for accessing elements + # use python array gets significant speed improvement + pr = pr.tolist(); q = q.tolist() + + for i in range(nd-1, 0, -1): + if pr[i] > pr[i-1]: + pr[i-1] = pr[i] + + inds = np.searchsorted(rc, p.recThrs, side='left') + try: + for ri, pi in enumerate(inds): + q[ri] = pr[pi] + except: + pass + precision[t,:,k,a,m] = np.array(q) + + # calculate recall + if nd: + recall[t,k,a,m] = rc[-1] + else: + recall[t,k,a,m] = 0 + + # calculate score + ss = np.zeros((R,)) + inds = np.searchsorted(rc, p.recThrs, side='left') + try: + for ri, pi in enumerate(inds): + ss[ri] = dtScoresSorted[pi] + except: + pass + scores[t,:,k,a,m] = np.array(ss) + # exit(0) + self.eval = { + 'params': p, + 'counts': [T, R, K, A, M], + 'precision': precision, + 'recall': recall, + 'scores': scores, + } + toc = time.time() + print('DONE (t={:0.2f}s).'.format( toc-tic)) + + + def ComputeMetrics(self, + include_metrics_per_category: bool = False, + all_metrics_per_category: bool = False): # pragma: no cover + """Compute detection metrics. + Args: + include_metrics_per_category: Whether include metrics per category. + all_metrics_per_category: Whether include all the summery metrics for + each category in per_category_ap. Be careful with setting it to true if + you have more than handful of categories, because it will pollute + your mldash. + Returns: + A tuple of (summary_metrics, per_category_ap), in which + (1) summary_metrics is a dictionary holding: + 'Precision/mAP': mean average precision over classes averaged over IOU + thresholds ranging from .5 to .95 with .05 increments; + 'Precision/mAP@.50IOU': mean average precision at 50% IOU; + 'Precision/mAP@.75IOU': mean average precision at 75% IOU; + 'Precision/mAP (small)': mean average precision for small objects + (area < 32^2 pixels); + 'Precision/mAP (medium)': mean average precision for medium sized + objects (32^2 pixels < area < 96^2 pixels); + 'Precision/mAP (large)': mean average precision for large objects + (96^2 pixels < area < 10000^2 pixels); + 'Recall/AR@1': average recall with 1 detection; + 'Recall/AR@10': average recall with 10 detections; + 'Recall/AR@100': average recall with 100 detections; + 'Recall/AR@100 (small)': average recall for small objects with 100 + detections; + 'Recall/AR@100 (medium)': average recall for medium objects with 100 + detections; + 'Recall/AR@100 (large)': average recall for large objects with 100 + detections; + and (2) per_category_ap is a dictionary holding category specific results with + keys of the form: 'Precision mAP ByCategory/category' + (without the supercategory part if no supercategories exist). + For backward compatibility 'PerformanceByCategory' is included in the + output regardless of all_metrics_per_category. If evaluating class-agnostic + mode, per_category_ap is an empty dictionary. + Raises: + ValueError: If category_stats does not exist. + """ + self.evaluate() + self.accumulate() + self.summarize() + + summary_metrics = OrderedDict([ + ('Precision/mAP', self.stats[0]), + ('Precision/mAP@.50IOU', self.stats[1]), + ('Precision/mAP@.75IOU', self.stats[2]), + ('Precision/mAP (small)', self.stats[3]), + ('Precision/mAP (medium)', self.stats[4]), + ('Precision/mAP (large)', self.stats[5]), + ('Recall/AR@1', self.stats[6]), ('Recall/AR@10', self.stats[7]), + ('Recall/AR@100', self.stats[8]), + ('Recall/AR@100 (small)', self.stats[9]), + ('Recall/AR@100 (medium)', self.stats[10]), + ('Recall/AR@100 (large)', self.stats[11]) + ]) + if not include_metrics_per_category: + return summary_metrics, {} + if not hasattr(self, 'category_stats'): + raise ValueError('Category stats do not exist') + per_category_ap = OrderedDict([]) + if self.GetAgnosticMode(): + return summary_metrics, per_category_ap + for category_index, category_id in enumerate(self.GetCategoryIdList()): + category = self.GetCategory(category_id)['name'] + # Kept for backward compatilbility + # pylint: disable=no-member + per_category_ap['PerformanceByCategory/mAP/{}'.format( + category)] = self.category_stats[0][category_index] + if all_metrics_per_category: + per_category_ap['Precision mAP ByCategory/{}'.format( + category)] = self.category_stats[0][category_index] + per_category_ap['Precision mAP@.50IOU ByCategory/{}'.format( + category)] = self.category_stats[1][category_index] + per_category_ap['Precision mAP@.75IOU ByCategory/{}'.format( + category)] = self.category_stats[2][category_index] + per_category_ap['Precision mAP (small) ByCategory/{}'.format( + category)] = self.category_stats[3][category_index] + per_category_ap['Precision mAP (medium) ByCategory/{}'.format( + category)] = self.category_stats[4][category_index] + per_category_ap['Precision mAP (large) ByCategory/{}'.format( + category)] = self.category_stats[5][category_index] + per_category_ap['Recall AR@1 ByCategory/{}'.format( + category)] = self.category_stats[6][category_index] + per_category_ap['Recall AR@10 ByCategory/{}'.format( + category)] = self.category_stats[7][category_index] + per_category_ap['Recall AR@100 ByCategory/{}'.format( + category)] = self.category_stats[8][category_index] + per_category_ap['Recall AR@100 (small) ByCategory/{}'.format( + category)] = self.category_stats[9][category_index] + per_category_ap['Recall AR@100 (medium) ByCategory/{}'.format( + category)] = self.category_stats[10][category_index] + per_category_ap['Recall AR@100 (large) ByCategory/{}'.format( + category)] = self.category_stats[11][category_index] + + return summary_metrics, per_category_ap + + +def _ConvertBoxToCOCOFormat(box): + """Convert a box in [ymin, xmin, ymax, xmax] format to COCO format. + This is a utility function for converting from our internal + [ymin, xmin, ymax, xmax] convention to the convention used by the COCO API + i.e., [xmin, ymin, width, height]. + Args: + box: a numpy array in format of [ymin, xmin, ymax, xmax] + Returns: + A list of floats, in COCO format, representing [xmin, ymin, width, height] + """ + return [ + float(box[1]), + float(box[0]), + float(box[3] - box[1]), + float(box[2] - box[0]) + ] + + +def _RleCompress(masks): + """Compresses mask using Run-length encoding provided by pycocotools. + Args: + masks: uint8 numpy array of shape [mask_height, mask_width] with values in + {0, 1}. + Returns: + A pycocotools Run-length encoding of the mask. + """ + return mask.encode(np.asfortranarray(masks)) + + +def ExportSingleImageGroundtruthToCoco(image_id: Union[int, str], + next_annotation_id: int, + category_id_set: Set[str], + groundtruth_boxes: np.array, + groundtruth_classes: np.array, + groundtruth_masks: Union[np.array, None] = None, + groundtruth_is_crowd: Union[np.array, None] = None) -> list: + """Export groundtruth of a single image to COCO format. + This function converts groundtruth detection annotations represented as numpy + arrays to dictionaries that can be ingested by the COCO evaluation API. Note + that the image_ids provided here must match the ones given to + ExportSingleImageDetectionsToCoco. We assume that boxes and classes are in + correspondence - that is: groundtruth_boxes[i, :], and + groundtruth_classes[i] are associated with the same groundtruth annotation. + In the exported result, "area" fields are always set to the area of the + groundtruth bounding box. + Args: + image_id: a unique image identifier either of type integer or string. + next_annotation_id: integer specifying the first id to use for the + groundtruth annotations. All annotations are assigned a continuous integer + id starting from this value. + category_id_set: A set of valid class ids. Groundtruth with classes not in + category_id_set are dropped. + groundtruth_boxes: numpy array (float32) with shape [num_gt_boxes, 4] + groundtruth_classes: numpy array (int) with shape [num_gt_boxes] + groundtruth_masks: optional uint8 numpy array of shape [num_detections, + image_height, image_width] containing detection_masks. + groundtruth_is_crowd: optional numpy array (int) with shape [num_gt_boxes] + indicating whether groundtruth boxes are crowd. + Returns: + A list of groundtruth annotations for a single image in the COCO format. + Raises: + ValueError: if (1) groundtruth_boxes and groundtruth_classes do not have the + right lengths or (2) if each of the elements inside these lists do not + have the correct shapes or (3) if image_ids are not integers + """ + if len(groundtruth_classes.shape) != 1: + raise ValueError('groundtruth_classes is ' 'expected to be of rank 1.') + if len(groundtruth_boxes.shape) != 2: + raise ValueError('groundtruth_boxes is expected to be of ' 'rank 2.') + if groundtruth_boxes.shape[1] != 4: + raise ValueError('groundtruth_boxes should have ' 'shape[1] == 4.') + num_boxes = groundtruth_classes.shape[0] + if num_boxes != groundtruth_boxes.shape[0]: + raise ValueError( + 'Corresponding entries in groundtruth_classes, ' + 'and groundtruth_boxes should have ' + 'compatible shapes (i.e., agree on the 0th dimension).' + 'Classes shape: %d. Boxes shape: %d. Image ID: %s' % + (groundtruth_classes.shape[0], groundtruth_boxes.shape[0], + image_id)) + has_is_crowd = groundtruth_is_crowd is not None + if has_is_crowd and len(groundtruth_is_crowd.shape) != 1: + raise ValueError('groundtruth_is_crowd is expected to be of rank 1.') + groundtruth_list = [] + for i in range(num_boxes): + if groundtruth_classes[i] in category_id_set: + iscrowd = groundtruth_is_crowd[i] if has_is_crowd else 0 + export_dict = { + 'id': + next_annotation_id + i, + 'image_id': + image_id, + 'category_id': + int(groundtruth_classes[i]), + 'bbox': + list(_ConvertBoxToCOCOFormat(groundtruth_boxes[i, :])), + 'area': + float((groundtruth_boxes[i, 2] - groundtruth_boxes[i, 0]) * + (groundtruth_boxes[i, 3] - groundtruth_boxes[i, 1])), + 'iscrowd': + iscrowd + } + if groundtruth_masks is not None: + export_dict['segmentation'] = _RleCompress( + groundtruth_masks[i]) + groundtruth_list.append(export_dict) + return groundtruth_list + + +def ExportSingleImageDetectionBoxesToCoco(image_id: Union[int, str], + category_id_set: Set[int], + detection_boxes: np.array, + detection_scores: np.array, + detection_classes: np.array) -> list: + """Export detections of a single image to COCO format. + This function converts detections represented as numpy arrays to dictionaries + that can be ingested by the COCO evaluation API. Note that the image_ids + provided here must match the ones given to the + ExporSingleImageDetectionBoxesToCoco. We assume that boxes, and classes are in + correspondence - that is: boxes[i, :], and classes[i] + are associated with the same groundtruth annotation. + Args: + image_id: unique image identifier either of type integer or string. + category_id_set: A set of valid class ids. Detections with classes not in + category_id_set are dropped. + detection_boxes: float numpy array of shape [num_detections, 4] containing + detection boxes. + detection_scores: float numpy array of shape [num_detections] containing + scored for the detection boxes. + detection_classes: integer numpy array of shape [num_detections] containing + the classes for detection boxes. + Returns: + A list of detection annotations for a single image in the COCO format. + Raises: + ValueError: if (1) detection_boxes, detection_scores and detection_classes + do not have the right lengths or (2) if each of the elements inside these + lists do not have the correct shapes or (3) if image_ids are not integers. + """ + if len(detection_classes.shape) != 1 or len(detection_scores.shape) != 1: + raise ValueError( + 'All entries in detection_classes and detection_scores' + 'expected to be of rank 1.') + if len(detection_boxes.shape) != 2: + raise ValueError('All entries in detection_boxes expected to be of ' + 'rank 2.') + if detection_boxes.shape[1] != 4: + raise ValueError('All entries in detection_boxes should have ' + 'shape[1] == 4.') + num_boxes = detection_classes.shape[0] + if not num_boxes == detection_boxes.shape[0] == detection_scores.shape[0]: + raise ValueError( + 'Corresponding entries in detection_classes, ' + 'detection_scores and detection_boxes should have ' + 'compatible shapes (i.e., agree on the 0th dimension). ' + 'Classes shape: %d. Boxes shape: %d. ' + 'Scores shape: %d' % + (detection_classes.shape[0], detection_boxes.shape[0], + detection_scores.shape[0])) + detections_list = [] + for i in range(num_boxes): + if detection_classes[i] in category_id_set: + detections_list.append({ + 'image_id': + image_id, + 'category_id': + int(detection_classes[i]), + 'bbox': + list(_ConvertBoxToCOCOFormat(detection_boxes[i, :])), + 'score': + float(detection_scores[i]) + }) + return detections_list + + +def ExportSingleImageDetectionMasksToCoco(image_id: Union[str, int], + category_id_set: Set[int], + detection_masks: np.array, + detection_scores: np.array, + detection_classes: np.array) -> list: + """Export detection masks of a single image to COCO format. + This function converts detections represented as numpy arrays to dictionaries + that can be ingested by the COCO evaluation API. We assume that + detection_masks, detection_scores, and detection_classes are in correspondence + - that is: detection_masks[i, :], detection_classes[i] and detection_scores[i] + are associated with the same annotation. + Args: + image_id: unique image identifier either of type integer or string. + category_id_set: A set of valid class ids. Detections with classes not in + category_id_set are dropped. + detection_masks: uint8 numpy array of shape [num_detections, image_height, + image_width] containing detection_masks. + detection_scores: float numpy array of shape [num_detections] containing + scores for detection masks. + detection_classes: integer numpy array of shape [num_detections] containing + the classes for detection masks. + Returns: + A list of detection mask annotations for a single image in the COCO format. + Raises: + ValueError: if (1) detection_masks, detection_scores and detection_classes + do not have the right lengths or (2) if each of the elements inside these + lists do not have the correct shapes or (3) if image_ids are not integers. + """ + if len(detection_classes.shape) != 1 or len(detection_scores.shape) != 1: + raise ValueError( + 'All entries in detection_classes and detection_scores' + 'expected to be of rank 1.') + num_boxes = detection_classes.shape[0] + if not num_boxes == len(detection_masks) == detection_scores.shape[0]: + raise ValueError('Corresponding entries in detection_classes, ' + 'detection_scores and detection_masks should have ' + 'compatible lengths and shapes ' + 'Classes length: %d. Masks length: %d. ' + 'Scores length: %d' % + (detection_classes.shape[0], len(detection_masks), + detection_scores.shape[0])) + detections_list = [] + for i in range(num_boxes): + if detection_classes[i] in category_id_set: + detections_list.append({ + 'image_id': + image_id, + 'category_id': + int(detection_classes[i]), + 'segmentation': + _RleCompress(detection_masks[i]), + 'score': + float(detection_scores[i]) + }) + return detections_list \ No newline at end of file diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/ssd/quantization/ptq_static/data_utils.py b/examples/onnxrt/object_detection/onnx_model_zoo/ssd/quantization/ptq_static/data_utils.py new file mode 100644 index 00000000000..7e0273b574d --- /dev/null +++ b/examples/onnxrt/object_detection/onnx_model_zoo/ssd/quantization/ptq_static/data_utils.py @@ -0,0 +1,471 @@ +import numpy as np +import collections +from PIL import Image +import os +import yaml +from pycocotools.coco import COCO +import cv2 + +class SequentialSampler(): + def __init__(self, dataset): + self.whole_dataset = dataset + + def __iter__(self): + self.process_rank = 0 # The default rank is 0, which represents the main process + self.process_size = 1 # By default, process_size=1, only the main process is running + return iter(range(self.process_rank, len(self.whole_dataset), self.process_size)) + + def __len__(self): + return len(self.whole_dataset) + +class BatchSampler(): + def __init__(self, sampler, batch_size, drop_last=True): + if isinstance(drop_last, bool): + self.drop_last = drop_last + else: + raise ValueError("last_batch only support bool as input") + + self.sampler = sampler + self.batch_size = batch_size + self.drop_last = drop_last + + def __iter__(self): + batch = [] + for idx in self.sampler: + batch.append(idx) + if len(batch) == self.batch_size: + yield batch + batch = [] + if len(batch) > 0 and not self.drop_last: + yield batch + + def __len__(self): + if self.drop_last: + return len(self.sampler) // self.batch_size + else: + return (len(self.sampler) + self.batch_size - 1) // self.batch_size + +class IndexFetcher(): + def __init__(self, dataset, collate_fn, drop_last): + self.dataset = dataset + self.collate_fn = collate_fn + self.drop_last = drop_last + + def __call__(self, batched_indices): + data = [self.dataset[idx] for idx in batched_indices] + return self.collate_fn(data) + + +def default_collate(batch): + """Merge data with outer dimension batch size.""" + elem = batch[0] + if isinstance(elem, collections.abc.Mapping): + return {key: default_collate([d[key] for d in batch]) for key in elem} + elif isinstance(elem, collections.abc.Sequence): + batch = zip(*batch) + return [default_collate(samples) for samples in batch] + elif isinstance(elem, np.ndarray): + try: + return np.stack(batch) + except: + return batch + else: + return batch + +class COCORawDataloader(): + def __init__(self, dataset, batch_size=1, last_batch='rollover', collate_fn=None, + sampler=None, batch_sampler=None, num_workers=0, pin_memory=False, + shuffle=False): + self.dataset = dataset + self.last_batch = last_batch + self.sampler = sampler + self.batch_sampler = batch_sampler + self.num_workers = num_workers + self.pin_memory = pin_memory + self.collate_fn = collate_fn + self.batch_size = batch_size + self.shuffle = shuffle + self.drop_last = False if last_batch == 'rollover' else True + if self.collate_fn == None: + self.collate_fn = default_collate + + def __iter__(self): + """Yield data in iterative order.""" + return self._generate_dataloader( + self.dataset, + batch_size=self.batch_size, + last_batch=self.last_batch, + collate_fn=self.collate_fn, + sampler=self.sampler, + batch_sampler=self.batch_sampler, + num_workers=self.num_workers, + pin_memory=self.pin_memory, + shuffle=self.shuffle) + + def _generate_dataloader(self, dataset, batch_size, last_batch, collate_fn, sampler, + batch_sampler, num_workers, pin_memory, shuffle): + + sampler = self._generate_sampler(dataset) + self.batch_sampler = BatchSampler(sampler, batch_size, self.drop_last) + self.fetcher = IndexFetcher(dataset, collate_fn, self.drop_last) + + for batched_indices in self.batch_sampler: + try: + data = self.fetcher(batched_indices) + yield data + except StopIteration: + return + + def _generate_sampler(self, dataset): + if hasattr(dataset, "__getitem__"): + self.dataset_type = 'index' + return SequentialSampler(dataset) + else: + raise ValueError("dataset type only support (index, iter)") + + +class COCORawDataset(): + """Coco raw dataset. + Please arrange data in this way: + /root/img_dir/1.jpg + /root/img_dir/2.jpg + ... + /root/img_dir/n.jpg + /root/anno_dir + Please use Resize transform when batch_size > 1 + Args: root (str): Root directory of dataset. + img_dir (str, default='val2017'): image file directory. + anno_dir (str, default='annotations/instances_val2017.json'): annotation file directory. + transform (transform object, default=None): transform to process input data. + filter (Filter objects, default=None): filter out examples according + to specific conditions. + """ + + def __init__(self, root, img_dir='val2017', \ + anno_dir='annotations/instances_val2017.json', transform=None, filter=None): + """Initialize the attributes of class.""" + self.batch_size = 1 + self.image_list = [] + self.transform = transform + img_path = os.path.join(root, img_dir) + anno_path = os.path.join(root, anno_dir) + coco = COCO(anno_path) + img_ids = coco.getImgIds() + cat_ids = coco.getCatIds() + for idx, img_id in enumerate(img_ids): + img_info = {} + bboxes = [] + labels = [] + ids = [] + img_detail = coco.loadImgs(img_id)[0] + ids.append(img_detail['file_name'].encode('utf-8')) + pic_height = img_detail['height'] + pic_width = img_detail['width'] + + ann_ids = coco.getAnnIds(imgIds=img_id,catIds=cat_ids) + anns = coco.loadAnns(ann_ids) + for ann in anns: + bbox = ann['bbox'] + if len(bbox) == 0: + continue + bbox = [bbox[0]/float(pic_width), bbox[1]/float(pic_height),\ + bbox[2]/float(pic_width), bbox[3]/float(pic_height)] + bboxes.append([bbox[1], bbox[0], bbox[1]+bbox[3], bbox[0]+bbox[2]]) + labels.append(coco.cats[ann['category_id']]['name'].encode('utf8')) + img_file = os.path.join(img_path, img_detail['file_name']) + if not os.path.exists(img_file) or len(bboxes) == 0: + continue + + if filter and not filter(None, bboxes): + continue + + with Image.open(img_file) as image: + image = np.array(image.convert('RGB')) + self.image_list.append( + (image, [np.array(bboxes), np.array(labels), np.array([]),\ + np.array(img_detail['file_name'].encode('utf-8'))])) + + def __len__(self): + """Length of the dataset.""" + return len(self.image_list) + + def __getitem__(self, index): + """Magic method. + x[i] is roughly equivalent to type(x).__getitem__(x, index) + """ + sample = self.image_list[index] + if self.transform is not None: + sample= self.transform(sample) + return sample + +interpolation_map = { + 'nearest': cv2.INTER_NEAREST, + 'bilinear': cv2.INTER_LINEAR, + 'bicubic': cv2.INTER_CUBIC, +} + +class ResizeTransform(): + def __init__(self, size, interpolation='bilinear'): + if isinstance(size, int): + self.size = size, size + elif isinstance(size, list): + if len(size) == 1: + self.size = size[0], size[0] + elif len(size) == 2: + self.size = size[0], size[1] + + if interpolation in interpolation_map.keys(): + self.interpolation = interpolation_map[interpolation] + else: + raise ValueError("Undefined interpolation type") + + def __call__(self, sample): + image, label = sample + image = cv2.resize(image, self.size, interpolation=self.interpolation) + if len(image.shape) == 2: + image = np.expand_dims(image, -1) + return (image, label) + +class RescaleTransform(): + """Scale the values of image to [0,1]. + Returns: + tuple of processed image and label + """ + + def __call__(self, sample): + """Scale the values of the image in sample.""" + image, label = sample + if isinstance(image, np.ndarray): + image = image.astype('float32') / 255. + return (image, label) + +class NormalizeTransform(): + def __init__(self, mean=[0.0], std=[1.0]): + self.mean = mean + self.std = std + for item in self.std: + if item < 10**-6: + raise ValueError("Std should be greater than 0") + + def __call__(self, sample): + image, label = sample + assert len(self.mean) == image.shape[-1], 'Mean channel must match image channel' + image = (image - self.mean) / self.std + return (image, label) + +class TransposeTransform(): + def __init__(self, perm): + self.perm = perm + + def __call__(self, sample): + image, label = sample + assert len(image.shape) == len(self.perm), "Image rank doesn't match Perm rank" + image = np.transpose(image, axes=self.perm) + return (image, label) + +np_dtype_map = {'int8': np.int8, 'uint8': np.uint8, 'complex64': np.complex64, + 'uint16': np.uint16, 'int32': np.int32, 'uint32': np.uint32, + 'int64': np.int64, 'uint64': np.uint64, 'float32': np.float32, + 'float16': np.float16, 'float64': np.float64, 'bool': bool, + 'string': str, 'complex128': np.complex128, 'int16': np.int16} + +class CastTransform(): + def __init__(self, dtype='float32'): + assert dtype in np_dtype_map.keys(), 'Unknown dtype' + self.dtype = dtype + + def __call__(self, sample): + image, label = sample + image = image.astype(np_dtype_map[self.dtype]) + return (image, label) + +class ComposeTransform(): + def __init__(self, transform_list): + self.transform_list = transform_list + + def __call__(self, sample): + for transform in self.transform_list: + sample = transform(sample) + return sample + +class COCOmAPv2(): + """Compute mean average precision of the detection task.""" + + def __init__(self, + anno_path=None, + iou_thrs='0.5:0.05:0.95', + map_points=101, + map_key='DetectionBoxes_Precision/mAP', + output_index_mapping={'num_detections':-1, 'boxes':0, 'scores':1, 'classes':2}): + """Initialize the metric. + Args: + anno_path: The path of annotation file. + iou_thrs: Minimal value for intersection over union that allows to make decision + that prediction bounding box is true positive. You can specify one float value + between 0 to 1 or string "05:0.05:0.95" for standard COCO thresholds. + map_points: The way to calculate mAP. 101 for 101-point interpolated AP, 11 for + 11-point interpolated AP, 0 for area under PR curve. + map_key: The key that mapping to pycocotools COCOeval. + Defaults to 'DetectionBoxes_Precision/mAP'. + output_index_mapping: The output index mapping. + Defaults to {'num_detections':-1, 'boxes':0, 'scores':1, 'classes':2}. + """ + self.output_index_mapping = output_index_mapping + from coco_label_map import category_map + if anno_path: + assert os.path.exists(anno_path), 'Annotation path does not exists!' + with open(anno_path, 'r') as f: + label_map = yaml.safe_load(f.read()) + self.category_map_reverse = {k: v for k,v in label_map.items()} + else: + # label: index + self.category_map_reverse = {v: k for k, v in category_map.items()} + self.image_ids = [] + self.ground_truth_list = [] + self.detection_list = [] + self.annotation_id = 1 + self.category_map = category_map + self.category_id_set = set( + [cat for cat in self.category_map]) #index + self.iou_thrs = iou_thrs + self.map_points = map_points + self.map_key = map_key + + def update(self, predicts, labels, sample_weight=None): + """Add the predictions and labels. + Args: + predicts: The predictions. + labels: The labels corresponding to the predictions. + sample_weight: The sample weight. Defaults to None. + """ + from coco_tools import ExportSingleImageGroundtruthToCoco,\ + ExportSingleImageDetectionBoxesToCoco + detections = [] + if 'num_detections' in self.output_index_mapping and \ + self.output_index_mapping['num_detections'] > -1: + for item in zip(*predicts): + detection = {} + num = int(item[self.output_index_mapping['num_detections']]) + detection['boxes'] = np.asarray( + item[self.output_index_mapping['boxes']])[0:num] + detection['scores'] = np.asarray( + item[self.output_index_mapping['scores']])[0:num] + detection['classes'] = np.asarray( + item[self.output_index_mapping['classes']])[0:num] + detections.append(detection) + else: + for item in zip(*predicts): + detection = {} + detection['boxes'] = np.asarray(item[self.output_index_mapping['boxes']]) + detection['scores'] = np.asarray(item[self.output_index_mapping['scores']]) + detection['classes'] = np.asarray(item[self.output_index_mapping['classes']]) + detections.append(detection) + + bboxes, str_labels,int_labels, image_ids = labels + labels = [] + if len(int_labels[0]) == 0: + for str_label in str_labels: + str_label = [ + x if type(x) == 'str' else x.decode('utf-8') + for x in str_label + ] + labels.append([self.category_map_reverse[x] for x in str_label]) + elif len(str_labels[0]) == 0: + for int_label in int_labels: + labels.append([x for x in int_label]) + + for idx, image_id in enumerate(image_ids): + image_id = image_id if type( + image_id) == 'str' else image_id.decode('utf-8') + if image_id in self.image_ids: + continue + self.image_ids.append(image_id) + + ground_truth = {} + ground_truth['boxes'] = np.asarray(bboxes[idx]) + ground_truth['classes'] = np.asarray(labels[idx]) + + self.ground_truth_list.extend( + ExportSingleImageGroundtruthToCoco( + image_id=image_id, + next_annotation_id=self.annotation_id, + category_id_set=self.category_id_set, + groundtruth_boxes=ground_truth['boxes'], + groundtruth_classes=ground_truth['classes'])) + self.annotation_id += ground_truth['boxes'].shape[0] + + self.detection_list.extend( + ExportSingleImageDetectionBoxesToCoco( + image_id=image_id, + category_id_set=self.category_id_set, + detection_boxes=detections[idx]['boxes'], + detection_scores=detections[idx]['scores'], + detection_classes=detections[idx]['classes'])) + + def reset(self): + """Reset the prediction and labels.""" + self.image_ids = [] + self.ground_truth_list = [] + self.detection_list = [] + self.annotation_id = 1 + + def result(self): + """Compute mean average precision. + Returns: + The mean average precision score. + """ + from coco_tools import COCOWrapper, COCOEvalWrapper + if len(self.ground_truth_list) == 0: + return 0 + else: + groundtruth_dict = { + 'annotations': + self.ground_truth_list, + 'images': [{ + 'id': image_id + } for image_id in self.image_ids], + 'categories': [{ + 'id': k, + 'name': v + } for k, v in self.category_map.items()] + } + coco_wrapped_groundtruth = COCOWrapper(groundtruth_dict) + coco_wrapped_detections = coco_wrapped_groundtruth.LoadAnnotations( + self.detection_list) + box_evaluator = COCOEvalWrapper(coco_wrapped_groundtruth, + coco_wrapped_detections, + agnostic_mode=False, + iou_thrs = self.iou_thrs, + map_points = self.map_points) + box_metrics, box_per_category_ap = box_evaluator.ComputeMetrics( + include_metrics_per_category=False, all_metrics_per_category=False) + box_metrics.update(box_per_category_ap) + box_metrics = { + 'DetectionBoxes_' + key: value + for key, value in iter(box_metrics.items()) + } + + return box_metrics[self.map_key] + + +class Post: + def __call__(self, sample): + preds, labels = sample + preds[0][0][:, [0, 1, 2, 3]] = preds[0][0][:, [1, 0, 3, 2]] + return preds, labels + +class LabelBalanceCOCORawFilter(object): + """The label balance filter for COCO raw data.""" + + def __init__(self, size=1): + """Initialize the attribute of class.""" + self.size = size + + def __call__(self, image, label): + """Execute the filter. + + Args: + image: Not used. + label: label of a sample. + """ + return len(label) == self.size \ No newline at end of file diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/ssd/quantization/ptq_static/label_map.yaml b/examples/onnxrt/object_detection/onnx_model_zoo/ssd/quantization/ptq_static/label_map.yaml new file mode 100644 index 00000000000..1fbc9263dc9 --- /dev/null +++ b/examples/onnxrt/object_detection/onnx_model_zoo/ssd/quantization/ptq_static/label_map.yaml @@ -0,0 +1,80 @@ +person: 1 +bicycle: 2 +car: 3 +motorcycle: 4 +airplane: 5 +bus: 6 +train: 7 +truck: 8 +boat: 9 +traffic light: 10 +fire hydrant: 11 +stop sign: 12 +parking meter: 13 +bench: 14 +bird: 15 +cat: 16 +dog: 17 +horse: 18 +sheep: 19 +cow: 20 +elephant: 21 +bear: 22 +zebra: 23 +giraffe: 24 +backpack: 25 +umbrella: 26 +handbag: 27 +tie: 28 +suitcase: 29 +frisbee: 30 +skis: 31 +snowboard: 32 +sports ball: 33 +kite: 34 +baseball bat: 35 +baseball glove: 36 +skateboard: 37 +surfboard: 38 +tennis racket: 39 +bottle: 40 +wine glass: 41 +cup: 42 +fork: 43 +knife: 44 +spoon: 45 +bowl: 46 +banana: 47 +apple: 48 +sandwich: 49 +orange: 50 +broccoli: 51 +carrot: 52 +hot dog: 53 +pizza: 54 +donut: 55 +cake: 56 +chair: 57 +couch: 58 +potted plant: 59 +bed: 60 +dining table: 61 +toilet: 62 +tv: 63 +laptop: 64 +mouse: 65 +remote: 66 +keyboard: 67 +cell phone: 68 +microwave: 69 +oven: 70 +toaster: 71 +sink: 72 +refrigerator: 73 +book: 74 +clock: 75 +vase: 76 +scissors: 77 +teddy bear: 78 +hair drier: 79 +toothbrush: 80 diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/ssd/quantization/ptq_static/main.py b/examples/onnxrt/object_detection/onnx_model_zoo/ssd/quantization/ptq_static/main.py new file mode 100644 index 00000000000..0a5d11b8aa1 --- /dev/null +++ b/examples/onnxrt/object_detection/onnx_model_zoo/ssd/quantization/ptq_static/main.py @@ -0,0 +1,161 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# pylint:disable=redefined-outer-name,logging-format-interpolation + + +import logging +import argparse + +import onnx +import yaml +import onnxruntime as ort +import yaml +import numpy as np + +from data_utils import COCORawDataloader, COCORawDataset, COCOmAPv2, Post +from data_utils import ComposeTransform, ResizeTransform, RescaleTransform, NormalizeTransform, TransposeTransform, CastTransform + +logger = logging.getLogger(__name__) +logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s - %(message)s', + datefmt = '%m/%d/%Y %H:%M:%S', + level = logging.WARN) +logger.info("Evaluating ONNXRuntime full precision accuracy and performance:") +parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter +) +parser.add_argument( + '--model_path', + type=str, + help="Pre-trained model on onnx file" +) +parser.add_argument( + '--data_path', + type=str, + help="path to dataset" +) +parser.add_argument( + '--label_path', + type=str, + default='label_map.yaml', + help="Path of label map yaml file" +) +parser.add_argument( + '--benchmark', + action='store_true', \ + default=False +) +parser.add_argument( + '--tune', + action='store_true', \ + default=False, + help="whether quantize the model" +) +parser.add_argument( + '--config', + type=str, + help="config yaml path" +) +parser.add_argument( + '--output_model', + type=str, + help="output model path" +) +parser.add_argument( + '--mode', + type=str, + help="benchmark mode of performance or accuracy" +) +parser.add_argument( + '--quant_format', + type=str, + choices=['QOperator', 'QDQ'], + help="quantization format" +) +parser.add_argument( + '--batch_size', + type=int, + default=1, + help="quantization format" +) +args = parser.parse_args() + + + +if __name__ == "__main__": + model = onnx.load(args.model_path) + transform = ComposeTransform([ResizeTransform(size=1200), + RescaleTransform(), + NormalizeTransform(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), + TransposeTransform(perm=[2, 0, 1]), + CastTransform(dtype='float32')]) + dataset = COCORawDataset(args.data_path, transform=transform) + dataloader = COCORawDataloader(dataset, batch_size=args.batch_size) + metric = COCOmAPv2(anno_path=args.label_path, output_index_mapping={'boxes':0, 'scores':2, 'classes':1}) + postprocess = Post() + + def eval_func(model): + metric.reset() + session = ort.InferenceSession(model.SerializeToString(), + providers=ort.get_available_providers()) + ort_inputs = {} + len_inputs = len(session.get_inputs()) + inputs_names = [session.get_inputs()[i].name for i in range(len_inputs)] + for idx, (inputs, labels) in enumerate(dataloader): + if not isinstance(labels, list): + labels = [labels] + if len_inputs == 1: + ort_inputs.update( + inputs if isinstance(inputs, dict) else {inputs_names[0]: inputs} + ) + else: + assert len_inputs == len(inputs), 'number of input tensors must align with graph inputs' + if isinstance(inputs, dict): + ort_inputs.update(inputs) + else: + for i in range(len_inputs): + if not isinstance(inputs[i], np.ndarray): + ort_inputs.update({inputs_names[i]: np.array(inputs[i])}) + else: + ort_inputs.update({inputs_names[i]: inputs[i]}) + predictions = session.run(None, ort_inputs) + predictions, labels = postprocess((predictions, labels)) + metric.update(predictions, labels) + return metric.result() + + if args.benchmark: + if args.mode == 'performance': + from neural_compressor.benchmark import fit + from neural_compressor.config import BenchmarkConfig + conf = BenchmarkConfig(iteration=100, + cores_per_instance=4, + num_of_instance=1) + fit(model, conf, b_dataloader=dataloader) + elif args.mode == 'accuracy': + acc_result = eval_func(model) + print("Batch size = %d" % args.batch_size) + print("Accuracy: %.5f" % acc_result) + + if args.tune: + from neural_compressor import quantization + from neural_compressor.config import AccuracyCriterion, PostTrainingQuantConfig + accuracy_criterion = AccuracyCriterion() + accuracy_criterion.absolute = 0.01 + config = PostTrainingQuantConfig(approach='static', + accuracy_criterion=accuracy_criterion, + quant_format=args.quant_format) + q_model = quantization.fit(model, config, calib_dataloader=dataloader, eval_func=eval_func) + q_model.save(args.output_model) \ No newline at end of file diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/ssd/quantization/ptq_static/requirements.txt b/examples/onnxrt/object_detection/onnx_model_zoo/ssd/quantization/ptq_static/requirements.txt new file mode 100644 index 00000000000..f62a897bffa --- /dev/null +++ b/examples/onnxrt/object_detection/onnx_model_zoo/ssd/quantization/ptq_static/requirements.txt @@ -0,0 +1,6 @@ +onnx==1.9.0; python_version < '3.10' +onnx==1.12.0; python_version == '3.10' +onnxruntime==1.10.0; python_version < '3.10' +onnxruntime==1.12.0; python_version == '3.10' +onnxruntime-extensions; python_version < '3.10' +pillow>=8.2.0 # not directly required, pinned by Snyk to avoid a vulnerability diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/ssd/quantization/ptq_static/run_benchmark.sh b/examples/onnxrt/object_detection/onnx_model_zoo/ssd/quantization/ptq_static/run_benchmark.sh new file mode 100644 index 00000000000..17c8c45fa58 --- /dev/null +++ b/examples/onnxrt/object_detection/onnx_model_zoo/ssd/quantization/ptq_static/run_benchmark.sh @@ -0,0 +1,49 @@ +#!/bin/bash +set -x + +function main { + init_params "$@" + run_benchmark + +} + +# init params +function init_params { + + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + --label_path=*) + label_path=$(echo $var |cut -f2 -d=) + ;; + --mode=*) + mode=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_benchmark +function run_benchmark { + if [ ! $label_path ]; then + label_path='label_map.yaml' + fi + batch_size=1 + + python main.py \ + --model_path ${input_model} \ + --mode ${mode} \ + --data_path ${dataset_location} \ + --label_path ${label_path} \ + --batch_size ${batch_size} \ + --benchmark +} + +main "$@" \ No newline at end of file diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/ssd/quantization/ptq_static/run_tuning.sh b/examples/onnxrt/object_detection/onnx_model_zoo/ssd/quantization/ptq_static/run_tuning.sh new file mode 100644 index 00000000000..161010ad4bc --- /dev/null +++ b/examples/onnxrt/object_detection/onnx_model_zoo/ssd/quantization/ptq_static/run_tuning.sh @@ -0,0 +1,51 @@ +#!/bin/bash +set -x + +function main { + init_params "$@" + run_tuning + +} + +# init params +function init_params { + + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + --label_path=*) + label_path=$(echo $var |cut -f2 -d=) + ;; + --output_model=*) + output_model=$(echo $var |cut -f2 -d=) + ;; + --quant_format=*) + quant_format=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_tuning +function run_tuning { + if [ ! $label_path ]; then + label_path='label_map.yaml' + fi + + python main.py \ + --model_path ${input_model} \ + --output_model ${output_model} \ + --data_path ${dataset_location} \ + --label_path ${label_path} \ + --quant_format ${quant_format} \ + --tune +} + +main "$@" \ No newline at end of file diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/ssd_mobilenet_v1/quantization/ptq_static/README.md b/examples/onnxrt/object_detection/onnx_model_zoo/ssd_mobilenet_v1/quantization/ptq_static/README.md new file mode 100644 index 00000000000..cd034f23a8b --- /dev/null +++ b/examples/onnxrt/object_detection/onnx_model_zoo/ssd_mobilenet_v1/quantization/ptq_static/README.md @@ -0,0 +1,71 @@ +Step-by-Step +============ + +This example load an object detection model converted from [ONNX Model Zoo](https://github.com/onnx/models) and confirm its accuracy and speed based on [MS COCO 2017 dataset](https://cocodataset.org/#download). + +# Prerequisite + +## 1. Environment + +```shell +pip install neural-compressor +pip install -r requirements.txt +``` +> Note: Validated ONNX Runtime [Version](/docs/source/installation_guide.md#validated-software-environment). + +## 2. Prepare Model + +Please refer to [Converting SSDMobilenet To ONNX Tutorial](https://github.com/onnx/tensorflow-onnx/blob/main/tutorials/ConvertingSSDMobilenetToONNX.ipynb) for detailed model converted. The following is a simple example command: + +```shell +wget https://github.com/onnx/models/raw/main/vision/object_detection_segmentation/ssd-mobilenetv1/model/ssd_mobilenet_v1_12.onnx +``` + +## 3. Prepare Dataset + +Download [MS COCO 2017 dataset](https://cocodataset.org/#download). + +Dataset directories: + +```bash +coco2017 +├── annotations +| ├── instances_val2017.json +| └── ... +├── test2017 +├── train2017 +└── val2017 +``` + +# Run + +## 1. Quantization + +Static quantization with QOperator format: + +```bash +bash run_tuning.sh --input_model=path/to/model \ # model path as *.onnx + --output_model=path/to/save \ # model path as *.onnx + --dataset_location=path/to/coco2017 \ # dataset path containing 'val2017' and 'annotations' folders + --label_path=label_map.yaml \ + --quant_format="QOperator" +``` + +Static quantization with QDQ format: + +```bash +bash run_tuning.sh --input_model=path/to/model \ # model path as *.onnx + --output_model=path/to/save \ # model path as *.onnx + --dataset_location=path/to/coco2017 \ # dataset path containing 'val2017' and 'annotations' folders + --label_path=label_map.yaml \ + --quant_format="QDQ" +``` + +## 2. Benchmark + +```bash +bash run_benchmark.sh --input_model=path/to/model \ # model path as *.onnx + --dataset_location=path/to/coco2017 \ # dataset path containing 'val2017' and 'annotations' folders + --label_path=label_map.yaml \ + --mode=performance # or accuracy +``` diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/ssd_mobilenet_v1/quantization/ptq_static/coco_label_map.py b/examples/onnxrt/object_detection/onnx_model_zoo/ssd_mobilenet_v1/quantization/ptq_static/coco_label_map.py new file mode 100644 index 00000000000..1e88f8abad8 --- /dev/null +++ b/examples/onnxrt/object_detection/onnx_model_zoo/ssd_mobilenet_v1/quantization/ptq_static/coco_label_map.py @@ -0,0 +1,103 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# + +"""The dict mapping category IDs to its names of labels.""" + +category_map = { + 1: 'person', + 2: 'bicycle', + 3: 'car', + 4: 'motorcycle', + 5: 'airplane', + 6: 'bus', + 7: 'train', + 8: 'truck', + 9: 'boat', + 10: 'traffic light', + 11: 'fire hydrant', + 13: 'stop sign', + 14: 'parking meter', + 15: 'bench', + 16: 'bird', + 17: 'cat', + 18: 'dog', + 19: 'horse', + 20: 'sheep', + 21: 'cow', + 22: 'elephant', + 23: 'bear', + 24: 'zebra', + 25: 'giraffe', + 27: 'backpack', + 28: 'umbrella', + 31: 'handbag', + 32: 'tie', + 33: 'suitcase', + 34: 'frisbee', + 35: 'skis', + 36: 'snowboard', + 37: 'sports ball', + 38: 'kite', + 39: 'baseball bat', + 40: 'baseball glove', + 41: 'skateboard', + 42: 'surfboard', + 43: 'tennis racket', + 44: 'bottle', + 46: 'wine glass', + 47: 'cup', + 48: 'fork', + 49: 'knife', + 50: 'spoon', + 51: 'bowl', + 52: 'banana', + 53: 'apple', + 54: 'sandwich', + 55: 'orange', + 56: 'broccoli', + 57: 'carrot', + 58: 'hot dog', + 59: 'pizza', + 60: 'donut', + 61: 'cake', + 62: 'chair', + 63: 'couch', + 64: 'potted plant', + 65: 'bed', + 67: 'dining table', + 70: 'toilet', + 72: 'tv', + 73: 'laptop', + 74: 'mouse', + 75: 'remote', + 76: 'keyboard', + 77: 'cell phone', + 78: 'microwave', + 79: 'oven', + 80: 'toaster', + 81: 'sink', + 82: 'refrigerator', + 84: 'book', + 85: 'clock', + 86: 'vase', + 87: 'scissors', + 88: 'teddy bear', + 89: 'hair drier', + 90: 'toothbrush' +} \ No newline at end of file diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/ssd_mobilenet_v1/quantization/ptq_static/coco_tools.py b/examples/onnxrt/object_detection/onnx_model_zoo/ssd_mobilenet_v1/quantization/ptq_static/coco_tools.py new file mode 100644 index 00000000000..2c57fd61302 --- /dev/null +++ b/examples/onnxrt/object_detection/onnx_model_zoo/ssd_mobilenet_v1/quantization/ptq_static/coco_tools.py @@ -0,0 +1,672 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +"""Wrappers for third party pycocotools to be used within object_detection. +Note that nothing in this file is tensorflow related and thus cannot +be called directly as a slim metric, for example. +TODO(jonathanhuang): wrap as a slim metric in metrics.py +Usage example: given a set of images with ids in the list image_ids +and corresponding lists of numpy arrays encoding groundtruth (boxes and classes) +and detections (boxes, scores and classes), where elements of each list +correspond to detections/annotations of a single image, +then evaluation (in multi-class mode) can be invoked as follows: + groundtruth_dict = coco_tools.ExportGroundtruthToCOCO( + image_ids, groundtruth_boxes_list, groundtruth_classes_list, + max_num_classes, output_path=None) + detections_list = coco_tools.ExportDetectionsToCOCO( + image_ids, detection_boxes_list, detection_scores_list, + detection_classes_list, output_path=None) + groundtruth = coco_tools.COCOWrapper(groundtruth_dict) + detections = groundtruth.LoadAnnotations(detections_list) + evaluator = coco_tools.COCOEvalWrapper(groundtruth, detections, + agnostic_mode=False) + metrics = evaluator.ComputeMetrics() +""" + +import copy +import time + +import numpy as np + +from collections import OrderedDict +from neural_compressor.utils import logger +from pycocotools import coco +from pycocotools import cocoeval +from pycocotools import mask +from typing import Any, Dict, List, Set, Union + + +class COCOWrapper(coco.COCO): + """Wrapper for the pycocotools COCO class. + + Attributes: + dataset: a dictionary holding bounding box annotations in the COCO format. + detection_type: type of detections being wrapped. Can be one of ['bbox', + 'segmentation'] + """ + + def __init__(self, dataset: Dict[str, Any], detection_type: str = 'bbox'): + """Construct a COCOWrapper. + See http://mscoco.org/dataset/#format for a description of the format. + By default, the coco.COCO class constructor reads from a JSON file. + This function duplicates the same behavior but loads from a dictionary, + allowing us to perform evaluation without writing to external storage. + Args: + dataset: a dictionary holding bounding box annotations in the COCO format. + detection_type: type of detections being wrapped. Can be one of ['bbox', + 'segmentation'] + Raises: + ValueError: if detection_type is unsupported. + """ + supported_detection_types = ['bbox', 'segmentation'] + if detection_type not in supported_detection_types: + raise ValueError('Unsupported detection type: {}. ' + 'Supported values are: {}'.format( + detection_type, supported_detection_types)) + self._detection_type = detection_type + coco.COCO.__init__(self) + self.dataset = dataset + self.createIndex() + + def LoadAnnotations(self, annotations: list) -> coco.COCO: + """Load annotations dictionary into COCO datastructure. + See http://mscoco.org/dataset/#format for a description of the annotations + format. As above, this function replicates the default behavior of the API + but does not require writing to external storage. + Args: + annotations: python list holding object detection results where each + detection is encoded as a dict with required keys ['image_id', + 'category_id', 'score'] and one of ['bbox', 'segmentation'] based on + `detection_type`. + Returns: + a coco.COCO datastructure holding object detection annotations results + Raises: + ValueError: if (1) annotations is not a list or annotations do not + correspond to the images contained in self. + """ + results = coco.COCO() + results.dataset['images'] = [img for img in self.dataset['images']] + + logger.info("Load and prepare annotation results.") + tic = time.time() + + if not isinstance(annotations, list): + raise ValueError('annotations is not a list of objects') + annotation_img_ids = [ann['image_id'] for ann in annotations] + if (set(annotation_img_ids) != (set(annotation_img_ids) + & set(self.getImgIds()))): + raise ValueError('Results do not correspond to current coco set') + results.dataset['categories'] = copy.deepcopy( + self.dataset['categories']) + if self._detection_type == 'bbox': + for idx, ann in enumerate(annotations): + bb = ann['bbox'] + ann['area'] = bb[2] * bb[3] + ann['id'] = idx + 1 + ann['iscrowd'] = 0 + elif self._detection_type == 'segmentation': + for idx, ann in enumerate(annotations): + ann['area'] = mask.area(ann['segmentation']) + ann['bbox'] = mask.toBbox(ann['segmentation']) + ann['id'] = idx + 1 + ann['iscrowd'] = 0 + logger.info('DONE (t=%0.2fs)', (time.time() - tic)) + + results.dataset['annotations'] = annotations + results.createIndex() + return results + + +class COCOEvalWrapper(cocoeval.COCOeval): + """Wrapper for the pycocotools COCOeval class. + To evaluate, create two objects (groundtruth_dict and detections_list) + using the conventions listed at http://mscoco.org/dataset/#format. + Then call evaluation as follows: + groundtruth = coco_tools.COCOWrapper(groundtruth_dict) + detections = groundtruth.LoadAnnotations(detections_list) + evaluator = coco_tools.COCOEvalWrapper(groundtruth, detections, + agnostic_mode=False) + metrics = evaluator.ComputeMetrics() + """ + + def __init__(self, + groundtruth: coco.COCO = None, + detections: coco.COCO = None, + agnostic_mode = False, + iou_type: str = 'bbox', + iou_thrs: Union[str, float] = None, + map_points=None): + """Construct a COCOEvalWrapper. + Note that for the area-based metrics to be meaningful, detection and + groundtruth boxes must be in image coordinates measured in pixels. + Args: + groundtruth: a coco.COCO (or coco_tools.COCOWrapper) object holding + groundtruth annotations + detections: a coco.COCO (or coco_tools.COCOWrapper) object holding + detections + agnostic_mode: boolean (default: False). If True, evaluation ignores + class labels, treating all detections as proposals. + iou_thrs: Minimal value for intersection over union that allows to + make decision that prediction bounding box is true positive. + You can specify one float value between 0 to 1 or + string "05:0.05:0.95" for standard COCO thresholds. + iou_type: IOU type to use for evaluation. Supports `bbox` or `segm`. + map_points: The way to calculate mAP. 101 for 101-point interpolated AP, 11 for + 11-point interpolated AP, 0 for area under PR curve. + """ + cocoeval.COCOeval.__init__(self, + groundtruth, + detections, + iouType=iou_type) + if agnostic_mode: + self.params.useCats = 0 + if iou_thrs == '0.5:0.05:0.95': + self.params.iouThrs = np.linspace(.5, 0.95, int(np.round((0.95 - .5) / .05)) + 1, \ + endpoint=True) + elif isinstance(iou_thrs, float): + self.params.iouThrs = [iou_thrs] + + if map_points == 101: + self.params.recThrs = np.linspace(.0, 1.00, int(np.round((1.00 - .0) / .01)) + 1, \ + endpoint=True) + if map_points == 11: + self.params.recThrs = np.linspace(.0, 1.00, int(np.round((1.00 - .0) / .1)) + 1, \ + endpoint=True) + if map_points == 0: + self.params.recThrs = [-1] + + + def GetCategory(self, category_id: int) -> dict: + """Fetch dictionary holding category information given category id. + Args: + category_id: integer id + Returns: + dictionary holding 'id', 'name'. + """ + return self.cocoGt.cats[category_id] + + def GetAgnosticMode(self) -> bool: + """Return whether COCO Eval is configured to evaluate in agnostic mode.""" + return self.params.useCats == 0 + + def GetCategoryIdList(self) -> List[int]: + """Return the list of IDs of all valid categories.""" + return self.params.catIds + + def accumulate(self, p: cocoeval.Params = None): + """Accumulate evaluation results per image and store it to self.eval. + + Args: + p: input params for evaluation + """ + print('Accumulating evaluation results...') + tic = time.time() + if not self.evalImgs: + print('Please run evaluate() first') + # allows input customized parameters + if p is None: + p = self.params + p.catIds = p.catIds if p.useCats == 1 else [-1] + T = len(p.iouThrs) + R = len(p.recThrs) + K = len(p.catIds) if p.useCats else 1 + A = len(p.areaRng) + M = len(p.maxDets) + precision = -np.ones((T,R,K,A,M)) # -1 for the precision of absent categories + recall = -np.ones((T,K,A,M)) + scores = -np.ones((T,R,K,A,M)) + + # create dictionary for future indexing + _pe = self._paramsEval + print('-pe', _pe) + catIds = _pe.catIds if _pe.useCats else [-1] + setK = set(catIds) + setA = set(map(tuple, _pe.areaRng)) + setM = set(_pe.maxDets) + setI = set(_pe.imgIds) + # get inds to evaluate + k_list = [n for n, k in enumerate(p.catIds) if k in setK] + m_list = [m for n, m in enumerate(p.maxDets) if m in setM] + a_list = [n for n, a in enumerate(map(lambda x: tuple(x), p.areaRng)) if a in setA] + i_list = [n for n, i in enumerate(p.imgIds) if i in setI] + I0 = len(_pe.imgIds) + A0 = len(_pe.areaRng) + # retrieve E at each category, area range, and max number of detections + for k, k0 in enumerate(k_list): + Nk = k0*A0*I0 + for a, a0 in enumerate(a_list): + Na = a0*I0 + for m, maxDet in enumerate(m_list): + E = [self.evalImgs[Nk + Na + i] for i in i_list] + E = [e for e in E if not e is None] + if len(E) == 0: continue + dtScores = np.concatenate([e['dtScores'][0:maxDet] for e in E]) + + # different sorting method generates slightly different results. + # mergesort is used to be consistent as Matlab implementation. + inds = np.argsort(-dtScores, kind='mergesort') + dtScoresSorted = dtScores[inds] + + dtm = np.concatenate([e['dtMatches'][:,0:maxDet] for e in E], axis=1)[:,inds] + dtIg = np.concatenate([e['dtIgnore'][:,0:maxDet] for e in E], axis=1)[:,inds] + gtIg = np.concatenate([e['gtIgnore'] for e in E]) + npig = np.count_nonzero(gtIg==0 ) + if npig == 0: continue + tps = np.logical_and( dtm, np.logical_not(dtIg) ) + fps = np.logical_and(np.logical_not(dtm), np.logical_not(dtIg) ) + + tp_sum = np.cumsum(tps, axis=1).astype(dtype=np.float32) + fp_sum = np.cumsum(fps, axis=1).astype(dtype=np.float32) + for t, (tp, fp) in enumerate(zip(tp_sum, fp_sum)): + tp = np.array(tp) + fp = np.array(fp) + nd = len(tp) + rc = tp / npig + pr = tp / (fp+tp+np.spacing(1)) + + # calculate precision + if R == 1: + rc = np.concatenate(([0.], rc, [1.])) + pr = np.concatenate(([0.], pr, [0.])) + + # compute the precision envelope + for i in range(pr.size - 1, 0, -1): + pr[i - 1] = np.maximum(pr[i - 1], pr[i]) + + # to calculate area under PR curve, look for points + # where X axis (recall) changes value + change_point = np.where(rc[1:] != rc[:-1])[0] + # and sum (\Delta recall) * recall + res = np.sum((rc[change_point + 1] - rc[change_point]) \ + * pr[change_point + 1]) + precision[t,:,k,a,m] = np.array([res]) + else: + q = np.zeros((R,)) + + # numpy is slow without cython optimization for accessing elements + # use python array gets significant speed improvement + pr = pr.tolist(); q = q.tolist() + + for i in range(nd-1, 0, -1): + if pr[i] > pr[i-1]: + pr[i-1] = pr[i] + + inds = np.searchsorted(rc, p.recThrs, side='left') + try: + for ri, pi in enumerate(inds): + q[ri] = pr[pi] + except: + pass + precision[t,:,k,a,m] = np.array(q) + + # calculate recall + if nd: + recall[t,k,a,m] = rc[-1] + else: + recall[t,k,a,m] = 0 + + # calculate score + ss = np.zeros((R,)) + inds = np.searchsorted(rc, p.recThrs, side='left') + try: + for ri, pi in enumerate(inds): + ss[ri] = dtScoresSorted[pi] + except: + pass + scores[t,:,k,a,m] = np.array(ss) + # exit(0) + self.eval = { + 'params': p, + 'counts': [T, R, K, A, M], + 'precision': precision, + 'recall': recall, + 'scores': scores, + } + toc = time.time() + print('DONE (t={:0.2f}s).'.format( toc-tic)) + + + def ComputeMetrics(self, + include_metrics_per_category: bool = False, + all_metrics_per_category: bool = False): # pragma: no cover + """Compute detection metrics. + Args: + include_metrics_per_category: Whether include metrics per category. + all_metrics_per_category: Whether include all the summery metrics for + each category in per_category_ap. Be careful with setting it to true if + you have more than handful of categories, because it will pollute + your mldash. + Returns: + A tuple of (summary_metrics, per_category_ap), in which + (1) summary_metrics is a dictionary holding: + 'Precision/mAP': mean average precision over classes averaged over IOU + thresholds ranging from .5 to .95 with .05 increments; + 'Precision/mAP@.50IOU': mean average precision at 50% IOU; + 'Precision/mAP@.75IOU': mean average precision at 75% IOU; + 'Precision/mAP (small)': mean average precision for small objects + (area < 32^2 pixels); + 'Precision/mAP (medium)': mean average precision for medium sized + objects (32^2 pixels < area < 96^2 pixels); + 'Precision/mAP (large)': mean average precision for large objects + (96^2 pixels < area < 10000^2 pixels); + 'Recall/AR@1': average recall with 1 detection; + 'Recall/AR@10': average recall with 10 detections; + 'Recall/AR@100': average recall with 100 detections; + 'Recall/AR@100 (small)': average recall for small objects with 100 + detections; + 'Recall/AR@100 (medium)': average recall for medium objects with 100 + detections; + 'Recall/AR@100 (large)': average recall for large objects with 100 + detections; + and (2) per_category_ap is a dictionary holding category specific results with + keys of the form: 'Precision mAP ByCategory/category' + (without the supercategory part if no supercategories exist). + For backward compatibility 'PerformanceByCategory' is included in the + output regardless of all_metrics_per_category. If evaluating class-agnostic + mode, per_category_ap is an empty dictionary. + Raises: + ValueError: If category_stats does not exist. + """ + self.evaluate() + self.accumulate() + self.summarize() + + summary_metrics = OrderedDict([ + ('Precision/mAP', self.stats[0]), + ('Precision/mAP@.50IOU', self.stats[1]), + ('Precision/mAP@.75IOU', self.stats[2]), + ('Precision/mAP (small)', self.stats[3]), + ('Precision/mAP (medium)', self.stats[4]), + ('Precision/mAP (large)', self.stats[5]), + ('Recall/AR@1', self.stats[6]), ('Recall/AR@10', self.stats[7]), + ('Recall/AR@100', self.stats[8]), + ('Recall/AR@100 (small)', self.stats[9]), + ('Recall/AR@100 (medium)', self.stats[10]), + ('Recall/AR@100 (large)', self.stats[11]) + ]) + if not include_metrics_per_category: + return summary_metrics, {} + if not hasattr(self, 'category_stats'): + raise ValueError('Category stats do not exist') + per_category_ap = OrderedDict([]) + if self.GetAgnosticMode(): + return summary_metrics, per_category_ap + for category_index, category_id in enumerate(self.GetCategoryIdList()): + category = self.GetCategory(category_id)['name'] + # Kept for backward compatilbility + # pylint: disable=no-member + per_category_ap['PerformanceByCategory/mAP/{}'.format( + category)] = self.category_stats[0][category_index] + if all_metrics_per_category: + per_category_ap['Precision mAP ByCategory/{}'.format( + category)] = self.category_stats[0][category_index] + per_category_ap['Precision mAP@.50IOU ByCategory/{}'.format( + category)] = self.category_stats[1][category_index] + per_category_ap['Precision mAP@.75IOU ByCategory/{}'.format( + category)] = self.category_stats[2][category_index] + per_category_ap['Precision mAP (small) ByCategory/{}'.format( + category)] = self.category_stats[3][category_index] + per_category_ap['Precision mAP (medium) ByCategory/{}'.format( + category)] = self.category_stats[4][category_index] + per_category_ap['Precision mAP (large) ByCategory/{}'.format( + category)] = self.category_stats[5][category_index] + per_category_ap['Recall AR@1 ByCategory/{}'.format( + category)] = self.category_stats[6][category_index] + per_category_ap['Recall AR@10 ByCategory/{}'.format( + category)] = self.category_stats[7][category_index] + per_category_ap['Recall AR@100 ByCategory/{}'.format( + category)] = self.category_stats[8][category_index] + per_category_ap['Recall AR@100 (small) ByCategory/{}'.format( + category)] = self.category_stats[9][category_index] + per_category_ap['Recall AR@100 (medium) ByCategory/{}'.format( + category)] = self.category_stats[10][category_index] + per_category_ap['Recall AR@100 (large) ByCategory/{}'.format( + category)] = self.category_stats[11][category_index] + + return summary_metrics, per_category_ap + + +def _ConvertBoxToCOCOFormat(box): + """Convert a box in [ymin, xmin, ymax, xmax] format to COCO format. + This is a utility function for converting from our internal + [ymin, xmin, ymax, xmax] convention to the convention used by the COCO API + i.e., [xmin, ymin, width, height]. + Args: + box: a numpy array in format of [ymin, xmin, ymax, xmax] + Returns: + A list of floats, in COCO format, representing [xmin, ymin, width, height] + """ + return [ + float(box[1]), + float(box[0]), + float(box[3] - box[1]), + float(box[2] - box[0]) + ] + + +def _RleCompress(masks): + """Compresses mask using Run-length encoding provided by pycocotools. + Args: + masks: uint8 numpy array of shape [mask_height, mask_width] with values in + {0, 1}. + Returns: + A pycocotools Run-length encoding of the mask. + """ + return mask.encode(np.asfortranarray(masks)) + + +def ExportSingleImageGroundtruthToCoco(image_id: Union[int, str], + next_annotation_id: int, + category_id_set: Set[str], + groundtruth_boxes: np.array, + groundtruth_classes: np.array, + groundtruth_masks: Union[np.array, None] = None, + groundtruth_is_crowd: Union[np.array, None] = None) -> list: + """Export groundtruth of a single image to COCO format. + This function converts groundtruth detection annotations represented as numpy + arrays to dictionaries that can be ingested by the COCO evaluation API. Note + that the image_ids provided here must match the ones given to + ExportSingleImageDetectionsToCoco. We assume that boxes and classes are in + correspondence - that is: groundtruth_boxes[i, :], and + groundtruth_classes[i] are associated with the same groundtruth annotation. + In the exported result, "area" fields are always set to the area of the + groundtruth bounding box. + Args: + image_id: a unique image identifier either of type integer or string. + next_annotation_id: integer specifying the first id to use for the + groundtruth annotations. All annotations are assigned a continuous integer + id starting from this value. + category_id_set: A set of valid class ids. Groundtruth with classes not in + category_id_set are dropped. + groundtruth_boxes: numpy array (float32) with shape [num_gt_boxes, 4] + groundtruth_classes: numpy array (int) with shape [num_gt_boxes] + groundtruth_masks: optional uint8 numpy array of shape [num_detections, + image_height, image_width] containing detection_masks. + groundtruth_is_crowd: optional numpy array (int) with shape [num_gt_boxes] + indicating whether groundtruth boxes are crowd. + Returns: + A list of groundtruth annotations for a single image in the COCO format. + Raises: + ValueError: if (1) groundtruth_boxes and groundtruth_classes do not have the + right lengths or (2) if each of the elements inside these lists do not + have the correct shapes or (3) if image_ids are not integers + """ + if len(groundtruth_classes.shape) != 1: + raise ValueError('groundtruth_classes is ' 'expected to be of rank 1.') + if len(groundtruth_boxes.shape) != 2: + raise ValueError('groundtruth_boxes is expected to be of ' 'rank 2.') + if groundtruth_boxes.shape[1] != 4: + raise ValueError('groundtruth_boxes should have ' 'shape[1] == 4.') + num_boxes = groundtruth_classes.shape[0] + if num_boxes != groundtruth_boxes.shape[0]: + raise ValueError( + 'Corresponding entries in groundtruth_classes, ' + 'and groundtruth_boxes should have ' + 'compatible shapes (i.e., agree on the 0th dimension).' + 'Classes shape: %d. Boxes shape: %d. Image ID: %s' % + (groundtruth_classes.shape[0], groundtruth_boxes.shape[0], + image_id)) + has_is_crowd = groundtruth_is_crowd is not None + if has_is_crowd and len(groundtruth_is_crowd.shape) != 1: + raise ValueError('groundtruth_is_crowd is expected to be of rank 1.') + groundtruth_list = [] + for i in range(num_boxes): + if groundtruth_classes[i] in category_id_set: + iscrowd = groundtruth_is_crowd[i] if has_is_crowd else 0 + export_dict = { + 'id': + next_annotation_id + i, + 'image_id': + image_id, + 'category_id': + int(groundtruth_classes[i]), + 'bbox': + list(_ConvertBoxToCOCOFormat(groundtruth_boxes[i, :])), + 'area': + float((groundtruth_boxes[i, 2] - groundtruth_boxes[i, 0]) * + (groundtruth_boxes[i, 3] - groundtruth_boxes[i, 1])), + 'iscrowd': + iscrowd + } + if groundtruth_masks is not None: + export_dict['segmentation'] = _RleCompress( + groundtruth_masks[i]) + groundtruth_list.append(export_dict) + return groundtruth_list + + +def ExportSingleImageDetectionBoxesToCoco(image_id: Union[int, str], + category_id_set: Set[int], + detection_boxes: np.array, + detection_scores: np.array, + detection_classes: np.array) -> list: + """Export detections of a single image to COCO format. + This function converts detections represented as numpy arrays to dictionaries + that can be ingested by the COCO evaluation API. Note that the image_ids + provided here must match the ones given to the + ExporSingleImageDetectionBoxesToCoco. We assume that boxes, and classes are in + correspondence - that is: boxes[i, :], and classes[i] + are associated with the same groundtruth annotation. + Args: + image_id: unique image identifier either of type integer or string. + category_id_set: A set of valid class ids. Detections with classes not in + category_id_set are dropped. + detection_boxes: float numpy array of shape [num_detections, 4] containing + detection boxes. + detection_scores: float numpy array of shape [num_detections] containing + scored for the detection boxes. + detection_classes: integer numpy array of shape [num_detections] containing + the classes for detection boxes. + Returns: + A list of detection annotations for a single image in the COCO format. + Raises: + ValueError: if (1) detection_boxes, detection_scores and detection_classes + do not have the right lengths or (2) if each of the elements inside these + lists do not have the correct shapes or (3) if image_ids are not integers. + """ + if len(detection_classes.shape) != 1 or len(detection_scores.shape) != 1: + raise ValueError( + 'All entries in detection_classes and detection_scores' + 'expected to be of rank 1.') + if len(detection_boxes.shape) != 2: + raise ValueError('All entries in detection_boxes expected to be of ' + 'rank 2.') + if detection_boxes.shape[1] != 4: + raise ValueError('All entries in detection_boxes should have ' + 'shape[1] == 4.') + num_boxes = detection_classes.shape[0] + if not num_boxes == detection_boxes.shape[0] == detection_scores.shape[0]: + raise ValueError( + 'Corresponding entries in detection_classes, ' + 'detection_scores and detection_boxes should have ' + 'compatible shapes (i.e., agree on the 0th dimension). ' + 'Classes shape: %d. Boxes shape: %d. ' + 'Scores shape: %d' % + (detection_classes.shape[0], detection_boxes.shape[0], + detection_scores.shape[0])) + detections_list = [] + for i in range(num_boxes): + if detection_classes[i] in category_id_set: + detections_list.append({ + 'image_id': + image_id, + 'category_id': + int(detection_classes[i]), + 'bbox': + list(_ConvertBoxToCOCOFormat(detection_boxes[i, :])), + 'score': + float(detection_scores[i]) + }) + return detections_list + + +def ExportSingleImageDetectionMasksToCoco(image_id: Union[str, int], + category_id_set: Set[int], + detection_masks: np.array, + detection_scores: np.array, + detection_classes: np.array) -> list: + """Export detection masks of a single image to COCO format. + This function converts detections represented as numpy arrays to dictionaries + that can be ingested by the COCO evaluation API. We assume that + detection_masks, detection_scores, and detection_classes are in correspondence + - that is: detection_masks[i, :], detection_classes[i] and detection_scores[i] + are associated with the same annotation. + Args: + image_id: unique image identifier either of type integer or string. + category_id_set: A set of valid class ids. Detections with classes not in + category_id_set are dropped. + detection_masks: uint8 numpy array of shape [num_detections, image_height, + image_width] containing detection_masks. + detection_scores: float numpy array of shape [num_detections] containing + scores for detection masks. + detection_classes: integer numpy array of shape [num_detections] containing + the classes for detection masks. + Returns: + A list of detection mask annotations for a single image in the COCO format. + Raises: + ValueError: if (1) detection_masks, detection_scores and detection_classes + do not have the right lengths or (2) if each of the elements inside these + lists do not have the correct shapes or (3) if image_ids are not integers. + """ + if len(detection_classes.shape) != 1 or len(detection_scores.shape) != 1: + raise ValueError( + 'All entries in detection_classes and detection_scores' + 'expected to be of rank 1.') + num_boxes = detection_classes.shape[0] + if not num_boxes == len(detection_masks) == detection_scores.shape[0]: + raise ValueError('Corresponding entries in detection_classes, ' + 'detection_scores and detection_masks should have ' + 'compatible lengths and shapes ' + 'Classes length: %d. Masks length: %d. ' + 'Scores length: %d' % + (detection_classes.shape[0], len(detection_masks), + detection_scores.shape[0])) + detections_list = [] + for i in range(num_boxes): + if detection_classes[i] in category_id_set: + detections_list.append({ + 'image_id': + image_id, + 'category_id': + int(detection_classes[i]), + 'segmentation': + _RleCompress(detection_masks[i]), + 'score': + float(detection_scores[i]) + }) + return detections_list \ No newline at end of file diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/ssd_mobilenet_v1/quantization/ptq_static/data_utils.py b/examples/onnxrt/object_detection/onnx_model_zoo/ssd_mobilenet_v1/quantization/ptq_static/data_utils.py new file mode 100644 index 00000000000..31653c25bc5 --- /dev/null +++ b/examples/onnxrt/object_detection/onnx_model_zoo/ssd_mobilenet_v1/quantization/ptq_static/data_utils.py @@ -0,0 +1,470 @@ +import numpy as np +import collections +from PIL import Image +import os +import yaml +from pycocotools.coco import COCO +import cv2 + +class SequentialSampler(): + def __init__(self, dataset): + self.whole_dataset = dataset + + def __iter__(self): + self.process_rank = 0 # The default rank is 0, which represents the main process + self.process_size = 1 # By default, process_size=1, only the main process is running + return iter(range(self.process_rank, len(self.whole_dataset), self.process_size)) + + def __len__(self): + return len(self.whole_dataset) + +class BatchSampler(): + def __init__(self, sampler, batch_size, drop_last=True): + if isinstance(drop_last, bool): + self.drop_last = drop_last + else: + raise ValueError("last_batch only support bool as input") + + self.sampler = sampler + self.batch_size = batch_size + self.drop_last = drop_last + + def __iter__(self): + batch = [] + for idx in self.sampler: + batch.append(idx) + if len(batch) == self.batch_size: + yield batch + batch = [] + if len(batch) > 0 and not self.drop_last: + yield batch + + def __len__(self): + if self.drop_last: + return len(self.sampler) // self.batch_size + else: + return (len(self.sampler) + self.batch_size - 1) // self.batch_size + +class IndexFetcher(): + def __init__(self, dataset, collate_fn, drop_last): + self.dataset = dataset + self.collate_fn = collate_fn + self.drop_last = drop_last + + def __call__(self, batched_indices): + data = [self.dataset[idx] for idx in batched_indices] + return self.collate_fn(data) + + +def default_collate(batch): + """Merge data with outer dimension batch size.""" + elem = batch[0] + if isinstance(elem, collections.abc.Mapping): + return {key: default_collate([d[key] for d in batch]) for key in elem} + elif isinstance(elem, collections.abc.Sequence): + batch = zip(*batch) + return [default_collate(samples) for samples in batch] + elif isinstance(elem, np.ndarray): + try: + return np.stack(batch) + except: + return batch + else: + return batch + +class COCORawDataloader(): + def __init__(self, dataset, batch_size=1, last_batch='rollover', collate_fn=None, + sampler=None, batch_sampler=None, num_workers=0, pin_memory=False, + shuffle=False): + self.dataset = dataset + self.last_batch = last_batch + self.sampler = sampler + self.batch_sampler = batch_sampler + self.num_workers = num_workers + self.pin_memory = pin_memory + self.collate_fn = collate_fn + self.batch_size = batch_size + self.shuffle = shuffle + self.drop_last = False if last_batch == 'rollover' else True + if self.collate_fn == None: + self.collate_fn = default_collate + + def __iter__(self): + """Yield data in iterative order.""" + return self._generate_dataloader( + self.dataset, + batch_size=self.batch_size, + last_batch=self.last_batch, + collate_fn=self.collate_fn, + sampler=self.sampler, + batch_sampler=self.batch_sampler, + num_workers=self.num_workers, + pin_memory=self.pin_memory, + shuffle=self.shuffle) + + def _generate_dataloader(self, dataset, batch_size, last_batch, collate_fn, sampler, + batch_sampler, num_workers, pin_memory, shuffle): + + sampler = self._generate_sampler(dataset) + self.batch_sampler = BatchSampler(sampler, batch_size, self.drop_last) + self.fetcher = IndexFetcher(dataset, collate_fn, self.drop_last) + + for batched_indices in self.batch_sampler: + try: + data = self.fetcher(batched_indices) + yield data + except StopIteration: + return + + def _generate_sampler(self, dataset): + if hasattr(dataset, "__getitem__"): + self.dataset_type = 'index' + return SequentialSampler(dataset) + else: + raise ValueError("dataset type only support (index, iter)") + + +class COCORawDataset(): + """Coco raw dataset. + Please arrange data in this way: + /root/img_dir/1.jpg + /root/img_dir/2.jpg + ... + /root/img_dir/n.jpg + /root/anno_dir + Please use Resize transform when batch_size > 1 + Args: root (str): Root directory of dataset. + img_dir (str, default='val2017'): image file directory. + anno_dir (str, default='annotations/instances_val2017.json'): annotation file directory. + transform (transform object, default=None): transform to process input data. + filter (Filter objects, default=None): filter out examples according + to specific conditions. + """ + + def __init__(self, root, img_dir='val2017', \ + anno_dir='annotations/instances_val2017.json', transform=None, filter=None): + """Initialize the attributes of class.""" + self.batch_size = 1 + self.image_list = [] + self.transform = transform + img_path = os.path.join(root, img_dir) + anno_path = os.path.join(root, anno_dir) + coco = COCO(anno_path) + img_ids = coco.getImgIds() + cat_ids = coco.getCatIds() + for idx, img_id in enumerate(img_ids): + img_info = {} + bboxes = [] + labels = [] + ids = [] + img_detail = coco.loadImgs(img_id)[0] + ids.append(img_detail['file_name'].encode('utf-8')) + pic_height = img_detail['height'] + pic_width = img_detail['width'] + + ann_ids = coco.getAnnIds(imgIds=img_id,catIds=cat_ids) + anns = coco.loadAnns(ann_ids) + for ann in anns: + bbox = ann['bbox'] + if len(bbox) == 0: + continue + bbox = [bbox[0]/float(pic_width), bbox[1]/float(pic_height),\ + bbox[2]/float(pic_width), bbox[3]/float(pic_height)] + bboxes.append([bbox[1], bbox[0], bbox[1]+bbox[3], bbox[0]+bbox[2]]) + labels.append(coco.cats[ann['category_id']]['name'].encode('utf8')) + img_file = os.path.join(img_path, img_detail['file_name']) + if not os.path.exists(img_file) or len(bboxes) == 0: + continue + + if filter and not filter(None, bboxes): + continue + + with Image.open(img_file) as image: + image = np.array(image.convert('RGB')) + self.image_list.append( + (image, [np.array(bboxes), np.array(labels), np.array([]),\ + np.array(img_detail['file_name'].encode('utf-8'))])) + + def __len__(self): + """Length of the dataset.""" + return len(self.image_list) + + def __getitem__(self, index): + """Magic method. + x[i] is roughly equivalent to type(x).__getitem__(x, index) + """ + sample = self.image_list[index] + if self.transform is not None: + sample= self.transform(sample) + return sample + +interpolation_map = { + 'nearest': cv2.INTER_NEAREST, + 'bilinear': cv2.INTER_LINEAR, + 'bicubic': cv2.INTER_CUBIC, +} + +class ResizeTransform(): + def __init__(self, size, interpolation='bilinear'): + if isinstance(size, int): + self.size = size, size + elif isinstance(size, list): + if len(size) == 1: + self.size = size[0], size[0] + elif len(size) == 2: + self.size = size[0], size[1] + + if interpolation in interpolation_map.keys(): + self.interpolation = interpolation_map[interpolation] + else: + raise ValueError("Undefined interpolation type") + + def __call__(self, sample): + image, label = sample + image = cv2.resize(image, self.size, interpolation=self.interpolation) + if len(image.shape) == 2: + image = np.expand_dims(image, -1) + return (image, label) + +class RescaleTransform(): + """Scale the values of image to [0,1]. + Returns: + tuple of processed image and label + """ + + def __call__(self, sample): + """Scale the values of the image in sample.""" + image, label = sample + if isinstance(image, np.ndarray): + image = image.astype('float32') / 255. + return (image, label) + +class NormalizeTransform(): + def __init__(self, mean=[0.0], std=[1.0]): + self.mean = mean + self.std = std + for item in self.std: + if item < 10**-6: + raise ValueError("Std should be greater than 0") + + def __call__(self, sample): + image, label = sample + assert len(self.mean) == image.shape[-1], 'Mean channel must match image channel' + image = (image - self.mean) / self.std + return (image, label) + +class TransposeTransform(): + def __init__(self, perm): + self.perm = perm + + def __call__(self, sample): + image, label = sample + assert len(image.shape) == len(self.perm), "Image rank doesn't match Perm rank" + image = np.transpose(image, axes=self.perm) + return (image, label) + +np_dtype_map = {'int8': np.int8, 'uint8': np.uint8, 'complex64': np.complex64, + 'uint16': np.uint16, 'int32': np.int32, 'uint32': np.uint32, + 'int64': np.int64, 'uint64': np.uint64, 'float32': np.float32, + 'float16': np.float16, 'float64': np.float64, 'bool': bool, + 'string': str, 'complex128': np.complex128, 'int16': np.int16} + +class CastTransform(): + def __init__(self, dtype='float32'): + assert dtype in np_dtype_map.keys(), 'Unknown dtype' + self.dtype = dtype + + def __call__(self, sample): + image, label = sample + image = image.astype(np_dtype_map[self.dtype]) + return (image, label) + +class ComposeTransform(): + def __init__(self, transform_list): + self.transform_list = transform_list + + def __call__(self, sample): + for transform in self.transform_list: + sample = transform(sample) + return sample + +class COCOmAPv2(): + """Compute mean average precision of the detection task.""" + + def __init__(self, + anno_path=None, + iou_thrs='0.5:0.05:0.95', + map_points=101, + map_key='DetectionBoxes_Precision/mAP', + output_index_mapping={'num_detections':-1, 'boxes':0, 'scores':1, 'classes':2}): + """Initialize the metric. + Args: + anno_path: The path of annotation file. + iou_thrs: Minimal value for intersection over union that allows to make decision + that prediction bounding box is true positive. You can specify one float value + between 0 to 1 or string "05:0.05:0.95" for standard COCO thresholds. + map_points: The way to calculate mAP. 101 for 101-point interpolated AP, 11 for + 11-point interpolated AP, 0 for area under PR curve. + map_key: The key that mapping to pycocotools COCOeval. + Defaults to 'DetectionBoxes_Precision/mAP'. + output_index_mapping: The output index mapping. + Defaults to {'num_detections':-1, 'boxes':0, 'scores':1, 'classes':2}. + """ + self.output_index_mapping = output_index_mapping + from coco_label_map import category_map + if anno_path: + assert os.path.exists(anno_path), 'Annotation path does not exists!' + with open(anno_path, 'r') as f: + label_map = yaml.safe_load(f.read()) + self.category_map_reverse = {k: v for k,v in label_map.items()} + else: + # label: index + self.category_map_reverse = {v: k for k, v in category_map.items()} + self.image_ids = [] + self.ground_truth_list = [] + self.detection_list = [] + self.annotation_id = 1 + self.category_map = category_map + self.category_id_set = set( + [cat for cat in self.category_map]) #index + self.iou_thrs = iou_thrs + self.map_points = map_points + self.map_key = map_key + + def update(self, predicts, labels, sample_weight=None): + """Add the predictions and labels. + Args: + predicts: The predictions. + labels: The labels corresponding to the predictions. + sample_weight: The sample weight. Defaults to None. + """ + from coco_tools import ExportSingleImageGroundtruthToCoco,\ + ExportSingleImageDetectionBoxesToCoco + detections = [] + if 'num_detections' in self.output_index_mapping and \ + self.output_index_mapping['num_detections'] > -1: + for item in zip(*predicts): + detection = {} + num = int(item[self.output_index_mapping['num_detections']]) + detection['boxes'] = np.asarray( + item[self.output_index_mapping['boxes']])[0:num] + detection['scores'] = np.asarray( + item[self.output_index_mapping['scores']])[0:num] + detection['classes'] = np.asarray( + item[self.output_index_mapping['classes']])[0:num] + detections.append(detection) + else: + for item in zip(*predicts): + detection = {} + detection['boxes'] = np.asarray(item[self.output_index_mapping['boxes']]) + detection['scores'] = np.asarray(item[self.output_index_mapping['scores']]) + detection['classes'] = np.asarray(item[self.output_index_mapping['classes']]) + detections.append(detection) + + bboxes, str_labels,int_labels, image_ids = labels + labels = [] + if len(int_labels[0]) == 0: + for str_label in str_labels: + str_label = [ + x if type(x) == 'str' else x.decode('utf-8') + for x in str_label + ] + labels.append([self.category_map_reverse[x] for x in str_label]) + elif len(str_labels[0]) == 0: + for int_label in int_labels: + labels.append([x for x in int_label]) + + for idx, image_id in enumerate(image_ids): + image_id = image_id if type( + image_id) == 'str' else image_id.decode('utf-8') + if image_id in self.image_ids: + continue + self.image_ids.append(image_id) + + ground_truth = {} + ground_truth['boxes'] = np.asarray(bboxes[idx]) + ground_truth['classes'] = np.asarray(labels[idx]) + + self.ground_truth_list.extend( + ExportSingleImageGroundtruthToCoco( + image_id=image_id, + next_annotation_id=self.annotation_id, + category_id_set=self.category_id_set, + groundtruth_boxes=ground_truth['boxes'], + groundtruth_classes=ground_truth['classes'])) + self.annotation_id += ground_truth['boxes'].shape[0] + + self.detection_list.extend( + ExportSingleImageDetectionBoxesToCoco( + image_id=image_id, + category_id_set=self.category_id_set, + detection_boxes=detections[idx]['boxes'], + detection_scores=detections[idx]['scores'], + detection_classes=detections[idx]['classes'])) + + def reset(self): + """Reset the prediction and labels.""" + self.image_ids = [] + self.ground_truth_list = [] + self.detection_list = [] + self.annotation_id = 1 + + def result(self): + """Compute mean average precision. + Returns: + The mean average precision score. + """ + from coco_tools import COCOWrapper, COCOEvalWrapper + if len(self.ground_truth_list) == 0: + return 0 + else: + groundtruth_dict = { + 'annotations': + self.ground_truth_list, + 'images': [{ + 'id': image_id + } for image_id in self.image_ids], + 'categories': [{ + 'id': k, + 'name': v + } for k, v in self.category_map.items()] + } + coco_wrapped_groundtruth = COCOWrapper(groundtruth_dict) + coco_wrapped_detections = coco_wrapped_groundtruth.LoadAnnotations( + self.detection_list) + box_evaluator = COCOEvalWrapper(coco_wrapped_groundtruth, + coco_wrapped_detections, + agnostic_mode=False, + iou_thrs = self.iou_thrs, + map_points = self.map_points) + box_metrics, box_per_category_ap = box_evaluator.ComputeMetrics( + include_metrics_per_category=False, all_metrics_per_category=False) + box_metrics.update(box_per_category_ap) + box_metrics = { + 'DetectionBoxes_' + key: value + for key, value in iter(box_metrics.items()) + } + + return box_metrics[self.map_key] + +class Post: + def __call__(self, sample): + preds, labels = sample + preds[0][0][:, [0, 1, 2, 3]] = preds[0][0][:, [1, 0, 3, 2]] + return preds, labels + +class LabelBalanceCOCORawFilter(object): + """The label balance filter for COCO raw data.""" + + def __init__(self, size=1): + """Initialize the attribute of class.""" + self.size = size + + def __call__(self, image, label): + """Execute the filter. + + Args: + image: Not used. + label: label of a sample. + """ + return len(label) == self.size \ No newline at end of file diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/ssd_mobilenet_v1/quantization/ptq_static/label_map.yaml b/examples/onnxrt/object_detection/onnx_model_zoo/ssd_mobilenet_v1/quantization/ptq_static/label_map.yaml new file mode 100644 index 00000000000..0c65e2a9b84 --- /dev/null +++ b/examples/onnxrt/object_detection/onnx_model_zoo/ssd_mobilenet_v1/quantization/ptq_static/label_map.yaml @@ -0,0 +1,80 @@ +person: 1 +bicycle: 2 +car: 3 +motorcycle: 4 +airplane: 5 +bus: 6 +train: 7 +truck: 8 +boat: 9 +traffic light: 10 +fire hydrant: 11 +stop sign: 13 +parking meter: 14 +bench: 15 +bird: 16 +cat: 17 +dog: 18 +horse: 19 +sheep: 20 +cow: 21 +elephant: 22 +bear: 23 +zebra: 24 +giraffe: 25 +backpack: 27 +umbrella: 28 +handbag: 31 +tie: 32 +suitcase: 33 +frisbee: 34 +skis: 35 +snowboard: 36 +sports ball: 37 +kite: 38 +baseball bat: 39 +baseball glove: 40 +skateboard: 41 +surfboard: 42 +tennis racket: 43 +bottle: 44 +wine glass: 46 +cup: 47 +fork: 48 +knife: 49 +spoon: 50 +bowl: 51 +banana: 52 +apple: 53 +sandwich: 54 +orange: 55 +broccoli: 56 +carrot: 57 +hot dog: 58 +pizza: 59 +donut: 60 +cake: 61 +chair: 62 +couch: 63 +potted plant: 64 +bed: 65 +dining table: 67 +toilet: 70 +tv: 72 +laptop: 73 +mouse: 74 +remote: 75 +keyboard: 76 +cell phone: 77 +microwave: 78 +oven: 79 +toaster: 80 +sink: 81 +refrigerator: 82 +book: 84 +clock: 85 +vase: 86 +scissors: 87 +teddy bear: 88 +hair drier: 89 +toothbrush: 90 diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/ssd_mobilenet_v1/quantization/ptq_static/main.py b/examples/onnxrt/object_detection/onnx_model_zoo/ssd_mobilenet_v1/quantization/ptq_static/main.py new file mode 100644 index 00000000000..69e035b0bf6 --- /dev/null +++ b/examples/onnxrt/object_detection/onnx_model_zoo/ssd_mobilenet_v1/quantization/ptq_static/main.py @@ -0,0 +1,153 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# pylint:disable=redefined-outer-name,logging-format-interpolation + + +import logging +import argparse + +import onnx +import onnxruntime as ort +import numpy as np + +from data_utils import COCORawDataloader, COCORawDataset, COCOmAPv2 +from data_utils import ComposeTransform, ResizeTransform, LabelBalanceCOCORawFilter + +logger = logging.getLogger(__name__) +logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s - %(message)s', + datefmt = '%m/%d/%Y %H:%M:%S', + level = logging.WARN) +logger.info("Evaluating ONNXRuntime full precision accuracy and performance:") +parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter +) +parser.add_argument( + '--model_path', + type=str, + help="Pre-trained model on onnx file" +) +parser.add_argument( + '--data_path', + type=str, + help="path to dataset" +) +parser.add_argument( + '--label_path', + type=str, + default='label_map.yaml', + help="Path of label map yaml file" +) +parser.add_argument( + '--benchmark', + action='store_true', \ + default=False +) +parser.add_argument( + '--tune', + action='store_true', \ + default=False, + help="whether quantize the model" +) +parser.add_argument( + '--config', + type=str, + help="config yaml path" +) +parser.add_argument( + '--output_model', + type=str, + help="output model path" +) +parser.add_argument( + '--mode', + type=str, + help="benchmark mode of performance or accuracy" +) +parser.add_argument( + '--quant_format', + type=str, + choices=['QOperator', 'QDQ'], + help="quantization format" +) +parser.add_argument( + '--batch_size', + type=int, + default=1, + help="quantization format" +) +args = parser.parse_args() + +if __name__ == "__main__": + model = onnx.load(args.model_path) + filter = LabelBalanceCOCORawFilter() + eval_dataset = COCORawDataset(args.data_path) + calib_dataset = COCORawDataset(args.data_path, filter=filter) + eval_dataloader = COCORawDataloader(eval_dataset, batch_size=args.batch_size) + calib_dataloader = COCORawDataloader(calib_dataset, 1) + metric = COCOmAPv2(anno_path=args.label_path, output_index_mapping={'boxes': 0, + 'classes': 1, + 'scores': 2, + 'num_detections': 3}) + + def eval_func(model): + metric.reset() + session = ort.InferenceSession(model.SerializeToString(), + providers=ort.get_available_providers()) + ort_inputs = {} + len_inputs = len(session.get_inputs()) + inputs_names = [session.get_inputs()[i].name for i in range(len_inputs)] + for idx, (inputs, labels) in enumerate(eval_dataloader): + if not isinstance(labels, list): + labels = [labels] + if len_inputs == 1: + ort_inputs.update( + inputs if isinstance(inputs, dict) else {inputs_names[0]: inputs} + ) + else: + assert len_inputs == len(inputs), 'number of input tensors must align with graph inputs' + if isinstance(inputs, dict): + ort_inputs.update(inputs) + else: + for i in range(len_inputs): + if not isinstance(inputs[i], np.ndarray): + ort_inputs.update({inputs_names[i]: np.array(inputs[i])}) + else: + ort_inputs.update({inputs_names[i]: inputs[i]}) + predictions = session.run(None, ort_inputs) + metric.update(predictions, labels) + return metric.result() + + if args.benchmark: + if args.mode == 'performance': + from neural_compressor.benchmark import fit + from neural_compressor.config import BenchmarkConfig + conf = BenchmarkConfig(iteration=100, + cores_per_instance=4, + num_of_instance=1) + fit(model, conf, b_dataloader=eval_dataloader) + elif args.mode == 'accuracy': + acc_result = eval_func(model) + print("Batch size = %d" % args.batch_size) + print("Accuracy: %.5f" % acc_result) + + if args.tune: + from neural_compressor import quantization + from neural_compressor.config import PostTrainingQuantConfig + config = PostTrainingQuantConfig(approach='static', + quant_format=args.quant_format) + q_model = quantization.fit(model, config, calib_dataloader=calib_dataloader, eval_func=eval_func) + q_model.save(args.output_model) \ No newline at end of file diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/ssd_mobilenet_v1/quantization/ptq_static/requirements.txt b/examples/onnxrt/object_detection/onnx_model_zoo/ssd_mobilenet_v1/quantization/ptq_static/requirements.txt new file mode 100644 index 00000000000..d92c94766dc --- /dev/null +++ b/examples/onnxrt/object_detection/onnx_model_zoo/ssd_mobilenet_v1/quantization/ptq_static/requirements.txt @@ -0,0 +1,4 @@ +onnx +onnxruntime +onnxruntime-extensions; python_version < '3.10' +pillow>=8.2.0 # not directly required, pinned by Snyk to avoid a vulnerability diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/ssd_mobilenet_v1/quantization/ptq_static/run_benchmark.sh b/examples/onnxrt/object_detection/onnx_model_zoo/ssd_mobilenet_v1/quantization/ptq_static/run_benchmark.sh new file mode 100755 index 00000000000..f17716e49ce --- /dev/null +++ b/examples/onnxrt/object_detection/onnx_model_zoo/ssd_mobilenet_v1/quantization/ptq_static/run_benchmark.sh @@ -0,0 +1,51 @@ +#!/bin/bash +set -x + +function main { + init_params "$@" + run_benchmark + +} + +# init params +function init_params { + + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + --label_path=*) + label_path=$(echo $var |cut -f2 -d=) + ;; + --mode=*) + mode=$(echo $var |cut -f2 -d=) + ;; + --batch_size=*) + batch_size=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_benchmark +function run_benchmark { + if [ ! $label_path ]; then + label_path='label_map.yaml' + fi + + python main.py \ + --model_path ${input_model} \ + --mode ${mode} \ + --data_path ${dataset_location} \ + --label_path ${label_path} \ + --batch_size ${batch_size} \ + --benchmark +} + +main "$@" \ No newline at end of file diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/ssd_mobilenet_v1/quantization/ptq_static/run_tuning.sh b/examples/onnxrt/object_detection/onnx_model_zoo/ssd_mobilenet_v1/quantization/ptq_static/run_tuning.sh new file mode 100755 index 00000000000..161010ad4bc --- /dev/null +++ b/examples/onnxrt/object_detection/onnx_model_zoo/ssd_mobilenet_v1/quantization/ptq_static/run_tuning.sh @@ -0,0 +1,51 @@ +#!/bin/bash +set -x + +function main { + init_params "$@" + run_tuning + +} + +# init params +function init_params { + + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + --label_path=*) + label_path=$(echo $var |cut -f2 -d=) + ;; + --output_model=*) + output_model=$(echo $var |cut -f2 -d=) + ;; + --quant_format=*) + quant_format=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_tuning +function run_tuning { + if [ ! $label_path ]; then + label_path='label_map.yaml' + fi + + python main.py \ + --model_path ${input_model} \ + --output_model ${output_model} \ + --data_path ${dataset_location} \ + --label_path ${label_path} \ + --quant_format ${quant_format} \ + --tune +} + +main "$@" \ No newline at end of file diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/tiny_yolov3/quantization/ptq_static/README.md b/examples/onnxrt/object_detection/onnx_model_zoo/tiny_yolov3/quantization/ptq_static/README.md new file mode 100644 index 00000000000..af9b416d712 --- /dev/null +++ b/examples/onnxrt/object_detection/onnx_model_zoo/tiny_yolov3/quantization/ptq_static/README.md @@ -0,0 +1,62 @@ +Step-by-Step +============ + +This example load an object detection model converted from [ONNX Model Zoo](https://github.com/onnx/models) and confirm its accuracy and speed based on [MS COCO 2017 dataset](https://cocodataset.org/#download). + +# Prerequisite + +## 1. Environment + +```shell +pip install neural-compressor +pip install -r requirements.txt +``` +> Note: Validated ONNX Runtime [Version](/docs/source/installation_guide.md#validated-software-environment). + +## 2. Prepare Model + +Download model from [ONNX Model Zoo](https://github.com/onnx/models) + +```shell +wget https://github.com/onnx/models/raw/main/vision/object_detection_segmentation/tiny-yolov3/model/tiny-yolov3-12.onnx +``` + +## 3. Prepare Dataset + +Download [MS COCO 2017 dataset](https://cocodataset.org/#download). + +Dataset directories: + +```bash +coco2017 +├── annotations +| ├── instances_val2017.json +| └── ... +├── test2017 +├── train2017 +└── val2017 +``` + +# Run + +## 1. Quantization + +Static quantization with QOperator format: + +```bash +bash run_tuning.sh --input_model=path/to/model \ # model path as *.onnx + --output_model=path/to/save \ + --dataset_location=path/to/coco2017 \ # dataset path containing 'val2017' and 'annotations' folders + --label_path=label_map.yaml \ + --quant_format="QOperator" +``` + +## 2. Benchmark + +```bash +bash run_benchmark.sh --input_model=path/to/model \ # model path as *.onnx + --dataset_location=path/to/coco2017 \ # dataset path containing 'val2017' and 'annotations' folders + --label_path=label_map.yaml \ + --mode=performance # or accuracy +``` + diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/tiny_yolov3/quantization/ptq_static/coco_label_map.py b/examples/onnxrt/object_detection/onnx_model_zoo/tiny_yolov3/quantization/ptq_static/coco_label_map.py new file mode 100644 index 00000000000..1e88f8abad8 --- /dev/null +++ b/examples/onnxrt/object_detection/onnx_model_zoo/tiny_yolov3/quantization/ptq_static/coco_label_map.py @@ -0,0 +1,103 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# + +"""The dict mapping category IDs to its names of labels.""" + +category_map = { + 1: 'person', + 2: 'bicycle', + 3: 'car', + 4: 'motorcycle', + 5: 'airplane', + 6: 'bus', + 7: 'train', + 8: 'truck', + 9: 'boat', + 10: 'traffic light', + 11: 'fire hydrant', + 13: 'stop sign', + 14: 'parking meter', + 15: 'bench', + 16: 'bird', + 17: 'cat', + 18: 'dog', + 19: 'horse', + 20: 'sheep', + 21: 'cow', + 22: 'elephant', + 23: 'bear', + 24: 'zebra', + 25: 'giraffe', + 27: 'backpack', + 28: 'umbrella', + 31: 'handbag', + 32: 'tie', + 33: 'suitcase', + 34: 'frisbee', + 35: 'skis', + 36: 'snowboard', + 37: 'sports ball', + 38: 'kite', + 39: 'baseball bat', + 40: 'baseball glove', + 41: 'skateboard', + 42: 'surfboard', + 43: 'tennis racket', + 44: 'bottle', + 46: 'wine glass', + 47: 'cup', + 48: 'fork', + 49: 'knife', + 50: 'spoon', + 51: 'bowl', + 52: 'banana', + 53: 'apple', + 54: 'sandwich', + 55: 'orange', + 56: 'broccoli', + 57: 'carrot', + 58: 'hot dog', + 59: 'pizza', + 60: 'donut', + 61: 'cake', + 62: 'chair', + 63: 'couch', + 64: 'potted plant', + 65: 'bed', + 67: 'dining table', + 70: 'toilet', + 72: 'tv', + 73: 'laptop', + 74: 'mouse', + 75: 'remote', + 76: 'keyboard', + 77: 'cell phone', + 78: 'microwave', + 79: 'oven', + 80: 'toaster', + 81: 'sink', + 82: 'refrigerator', + 84: 'book', + 85: 'clock', + 86: 'vase', + 87: 'scissors', + 88: 'teddy bear', + 89: 'hair drier', + 90: 'toothbrush' +} \ No newline at end of file diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/tiny_yolov3/quantization/ptq_static/coco_tools.py b/examples/onnxrt/object_detection/onnx_model_zoo/tiny_yolov3/quantization/ptq_static/coco_tools.py new file mode 100644 index 00000000000..2c57fd61302 --- /dev/null +++ b/examples/onnxrt/object_detection/onnx_model_zoo/tiny_yolov3/quantization/ptq_static/coco_tools.py @@ -0,0 +1,672 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +"""Wrappers for third party pycocotools to be used within object_detection. +Note that nothing in this file is tensorflow related and thus cannot +be called directly as a slim metric, for example. +TODO(jonathanhuang): wrap as a slim metric in metrics.py +Usage example: given a set of images with ids in the list image_ids +and corresponding lists of numpy arrays encoding groundtruth (boxes and classes) +and detections (boxes, scores and classes), where elements of each list +correspond to detections/annotations of a single image, +then evaluation (in multi-class mode) can be invoked as follows: + groundtruth_dict = coco_tools.ExportGroundtruthToCOCO( + image_ids, groundtruth_boxes_list, groundtruth_classes_list, + max_num_classes, output_path=None) + detections_list = coco_tools.ExportDetectionsToCOCO( + image_ids, detection_boxes_list, detection_scores_list, + detection_classes_list, output_path=None) + groundtruth = coco_tools.COCOWrapper(groundtruth_dict) + detections = groundtruth.LoadAnnotations(detections_list) + evaluator = coco_tools.COCOEvalWrapper(groundtruth, detections, + agnostic_mode=False) + metrics = evaluator.ComputeMetrics() +""" + +import copy +import time + +import numpy as np + +from collections import OrderedDict +from neural_compressor.utils import logger +from pycocotools import coco +from pycocotools import cocoeval +from pycocotools import mask +from typing import Any, Dict, List, Set, Union + + +class COCOWrapper(coco.COCO): + """Wrapper for the pycocotools COCO class. + + Attributes: + dataset: a dictionary holding bounding box annotations in the COCO format. + detection_type: type of detections being wrapped. Can be one of ['bbox', + 'segmentation'] + """ + + def __init__(self, dataset: Dict[str, Any], detection_type: str = 'bbox'): + """Construct a COCOWrapper. + See http://mscoco.org/dataset/#format for a description of the format. + By default, the coco.COCO class constructor reads from a JSON file. + This function duplicates the same behavior but loads from a dictionary, + allowing us to perform evaluation without writing to external storage. + Args: + dataset: a dictionary holding bounding box annotations in the COCO format. + detection_type: type of detections being wrapped. Can be one of ['bbox', + 'segmentation'] + Raises: + ValueError: if detection_type is unsupported. + """ + supported_detection_types = ['bbox', 'segmentation'] + if detection_type not in supported_detection_types: + raise ValueError('Unsupported detection type: {}. ' + 'Supported values are: {}'.format( + detection_type, supported_detection_types)) + self._detection_type = detection_type + coco.COCO.__init__(self) + self.dataset = dataset + self.createIndex() + + def LoadAnnotations(self, annotations: list) -> coco.COCO: + """Load annotations dictionary into COCO datastructure. + See http://mscoco.org/dataset/#format for a description of the annotations + format. As above, this function replicates the default behavior of the API + but does not require writing to external storage. + Args: + annotations: python list holding object detection results where each + detection is encoded as a dict with required keys ['image_id', + 'category_id', 'score'] and one of ['bbox', 'segmentation'] based on + `detection_type`. + Returns: + a coco.COCO datastructure holding object detection annotations results + Raises: + ValueError: if (1) annotations is not a list or annotations do not + correspond to the images contained in self. + """ + results = coco.COCO() + results.dataset['images'] = [img for img in self.dataset['images']] + + logger.info("Load and prepare annotation results.") + tic = time.time() + + if not isinstance(annotations, list): + raise ValueError('annotations is not a list of objects') + annotation_img_ids = [ann['image_id'] for ann in annotations] + if (set(annotation_img_ids) != (set(annotation_img_ids) + & set(self.getImgIds()))): + raise ValueError('Results do not correspond to current coco set') + results.dataset['categories'] = copy.deepcopy( + self.dataset['categories']) + if self._detection_type == 'bbox': + for idx, ann in enumerate(annotations): + bb = ann['bbox'] + ann['area'] = bb[2] * bb[3] + ann['id'] = idx + 1 + ann['iscrowd'] = 0 + elif self._detection_type == 'segmentation': + for idx, ann in enumerate(annotations): + ann['area'] = mask.area(ann['segmentation']) + ann['bbox'] = mask.toBbox(ann['segmentation']) + ann['id'] = idx + 1 + ann['iscrowd'] = 0 + logger.info('DONE (t=%0.2fs)', (time.time() - tic)) + + results.dataset['annotations'] = annotations + results.createIndex() + return results + + +class COCOEvalWrapper(cocoeval.COCOeval): + """Wrapper for the pycocotools COCOeval class. + To evaluate, create two objects (groundtruth_dict and detections_list) + using the conventions listed at http://mscoco.org/dataset/#format. + Then call evaluation as follows: + groundtruth = coco_tools.COCOWrapper(groundtruth_dict) + detections = groundtruth.LoadAnnotations(detections_list) + evaluator = coco_tools.COCOEvalWrapper(groundtruth, detections, + agnostic_mode=False) + metrics = evaluator.ComputeMetrics() + """ + + def __init__(self, + groundtruth: coco.COCO = None, + detections: coco.COCO = None, + agnostic_mode = False, + iou_type: str = 'bbox', + iou_thrs: Union[str, float] = None, + map_points=None): + """Construct a COCOEvalWrapper. + Note that for the area-based metrics to be meaningful, detection and + groundtruth boxes must be in image coordinates measured in pixels. + Args: + groundtruth: a coco.COCO (or coco_tools.COCOWrapper) object holding + groundtruth annotations + detections: a coco.COCO (or coco_tools.COCOWrapper) object holding + detections + agnostic_mode: boolean (default: False). If True, evaluation ignores + class labels, treating all detections as proposals. + iou_thrs: Minimal value for intersection over union that allows to + make decision that prediction bounding box is true positive. + You can specify one float value between 0 to 1 or + string "05:0.05:0.95" for standard COCO thresholds. + iou_type: IOU type to use for evaluation. Supports `bbox` or `segm`. + map_points: The way to calculate mAP. 101 for 101-point interpolated AP, 11 for + 11-point interpolated AP, 0 for area under PR curve. + """ + cocoeval.COCOeval.__init__(self, + groundtruth, + detections, + iouType=iou_type) + if agnostic_mode: + self.params.useCats = 0 + if iou_thrs == '0.5:0.05:0.95': + self.params.iouThrs = np.linspace(.5, 0.95, int(np.round((0.95 - .5) / .05)) + 1, \ + endpoint=True) + elif isinstance(iou_thrs, float): + self.params.iouThrs = [iou_thrs] + + if map_points == 101: + self.params.recThrs = np.linspace(.0, 1.00, int(np.round((1.00 - .0) / .01)) + 1, \ + endpoint=True) + if map_points == 11: + self.params.recThrs = np.linspace(.0, 1.00, int(np.round((1.00 - .0) / .1)) + 1, \ + endpoint=True) + if map_points == 0: + self.params.recThrs = [-1] + + + def GetCategory(self, category_id: int) -> dict: + """Fetch dictionary holding category information given category id. + Args: + category_id: integer id + Returns: + dictionary holding 'id', 'name'. + """ + return self.cocoGt.cats[category_id] + + def GetAgnosticMode(self) -> bool: + """Return whether COCO Eval is configured to evaluate in agnostic mode.""" + return self.params.useCats == 0 + + def GetCategoryIdList(self) -> List[int]: + """Return the list of IDs of all valid categories.""" + return self.params.catIds + + def accumulate(self, p: cocoeval.Params = None): + """Accumulate evaluation results per image and store it to self.eval. + + Args: + p: input params for evaluation + """ + print('Accumulating evaluation results...') + tic = time.time() + if not self.evalImgs: + print('Please run evaluate() first') + # allows input customized parameters + if p is None: + p = self.params + p.catIds = p.catIds if p.useCats == 1 else [-1] + T = len(p.iouThrs) + R = len(p.recThrs) + K = len(p.catIds) if p.useCats else 1 + A = len(p.areaRng) + M = len(p.maxDets) + precision = -np.ones((T,R,K,A,M)) # -1 for the precision of absent categories + recall = -np.ones((T,K,A,M)) + scores = -np.ones((T,R,K,A,M)) + + # create dictionary for future indexing + _pe = self._paramsEval + print('-pe', _pe) + catIds = _pe.catIds if _pe.useCats else [-1] + setK = set(catIds) + setA = set(map(tuple, _pe.areaRng)) + setM = set(_pe.maxDets) + setI = set(_pe.imgIds) + # get inds to evaluate + k_list = [n for n, k in enumerate(p.catIds) if k in setK] + m_list = [m for n, m in enumerate(p.maxDets) if m in setM] + a_list = [n for n, a in enumerate(map(lambda x: tuple(x), p.areaRng)) if a in setA] + i_list = [n for n, i in enumerate(p.imgIds) if i in setI] + I0 = len(_pe.imgIds) + A0 = len(_pe.areaRng) + # retrieve E at each category, area range, and max number of detections + for k, k0 in enumerate(k_list): + Nk = k0*A0*I0 + for a, a0 in enumerate(a_list): + Na = a0*I0 + for m, maxDet in enumerate(m_list): + E = [self.evalImgs[Nk + Na + i] for i in i_list] + E = [e for e in E if not e is None] + if len(E) == 0: continue + dtScores = np.concatenate([e['dtScores'][0:maxDet] for e in E]) + + # different sorting method generates slightly different results. + # mergesort is used to be consistent as Matlab implementation. + inds = np.argsort(-dtScores, kind='mergesort') + dtScoresSorted = dtScores[inds] + + dtm = np.concatenate([e['dtMatches'][:,0:maxDet] for e in E], axis=1)[:,inds] + dtIg = np.concatenate([e['dtIgnore'][:,0:maxDet] for e in E], axis=1)[:,inds] + gtIg = np.concatenate([e['gtIgnore'] for e in E]) + npig = np.count_nonzero(gtIg==0 ) + if npig == 0: continue + tps = np.logical_and( dtm, np.logical_not(dtIg) ) + fps = np.logical_and(np.logical_not(dtm), np.logical_not(dtIg) ) + + tp_sum = np.cumsum(tps, axis=1).astype(dtype=np.float32) + fp_sum = np.cumsum(fps, axis=1).astype(dtype=np.float32) + for t, (tp, fp) in enumerate(zip(tp_sum, fp_sum)): + tp = np.array(tp) + fp = np.array(fp) + nd = len(tp) + rc = tp / npig + pr = tp / (fp+tp+np.spacing(1)) + + # calculate precision + if R == 1: + rc = np.concatenate(([0.], rc, [1.])) + pr = np.concatenate(([0.], pr, [0.])) + + # compute the precision envelope + for i in range(pr.size - 1, 0, -1): + pr[i - 1] = np.maximum(pr[i - 1], pr[i]) + + # to calculate area under PR curve, look for points + # where X axis (recall) changes value + change_point = np.where(rc[1:] != rc[:-1])[0] + # and sum (\Delta recall) * recall + res = np.sum((rc[change_point + 1] - rc[change_point]) \ + * pr[change_point + 1]) + precision[t,:,k,a,m] = np.array([res]) + else: + q = np.zeros((R,)) + + # numpy is slow without cython optimization for accessing elements + # use python array gets significant speed improvement + pr = pr.tolist(); q = q.tolist() + + for i in range(nd-1, 0, -1): + if pr[i] > pr[i-1]: + pr[i-1] = pr[i] + + inds = np.searchsorted(rc, p.recThrs, side='left') + try: + for ri, pi in enumerate(inds): + q[ri] = pr[pi] + except: + pass + precision[t,:,k,a,m] = np.array(q) + + # calculate recall + if nd: + recall[t,k,a,m] = rc[-1] + else: + recall[t,k,a,m] = 0 + + # calculate score + ss = np.zeros((R,)) + inds = np.searchsorted(rc, p.recThrs, side='left') + try: + for ri, pi in enumerate(inds): + ss[ri] = dtScoresSorted[pi] + except: + pass + scores[t,:,k,a,m] = np.array(ss) + # exit(0) + self.eval = { + 'params': p, + 'counts': [T, R, K, A, M], + 'precision': precision, + 'recall': recall, + 'scores': scores, + } + toc = time.time() + print('DONE (t={:0.2f}s).'.format( toc-tic)) + + + def ComputeMetrics(self, + include_metrics_per_category: bool = False, + all_metrics_per_category: bool = False): # pragma: no cover + """Compute detection metrics. + Args: + include_metrics_per_category: Whether include metrics per category. + all_metrics_per_category: Whether include all the summery metrics for + each category in per_category_ap. Be careful with setting it to true if + you have more than handful of categories, because it will pollute + your mldash. + Returns: + A tuple of (summary_metrics, per_category_ap), in which + (1) summary_metrics is a dictionary holding: + 'Precision/mAP': mean average precision over classes averaged over IOU + thresholds ranging from .5 to .95 with .05 increments; + 'Precision/mAP@.50IOU': mean average precision at 50% IOU; + 'Precision/mAP@.75IOU': mean average precision at 75% IOU; + 'Precision/mAP (small)': mean average precision for small objects + (area < 32^2 pixels); + 'Precision/mAP (medium)': mean average precision for medium sized + objects (32^2 pixels < area < 96^2 pixels); + 'Precision/mAP (large)': mean average precision for large objects + (96^2 pixels < area < 10000^2 pixels); + 'Recall/AR@1': average recall with 1 detection; + 'Recall/AR@10': average recall with 10 detections; + 'Recall/AR@100': average recall with 100 detections; + 'Recall/AR@100 (small)': average recall for small objects with 100 + detections; + 'Recall/AR@100 (medium)': average recall for medium objects with 100 + detections; + 'Recall/AR@100 (large)': average recall for large objects with 100 + detections; + and (2) per_category_ap is a dictionary holding category specific results with + keys of the form: 'Precision mAP ByCategory/category' + (without the supercategory part if no supercategories exist). + For backward compatibility 'PerformanceByCategory' is included in the + output regardless of all_metrics_per_category. If evaluating class-agnostic + mode, per_category_ap is an empty dictionary. + Raises: + ValueError: If category_stats does not exist. + """ + self.evaluate() + self.accumulate() + self.summarize() + + summary_metrics = OrderedDict([ + ('Precision/mAP', self.stats[0]), + ('Precision/mAP@.50IOU', self.stats[1]), + ('Precision/mAP@.75IOU', self.stats[2]), + ('Precision/mAP (small)', self.stats[3]), + ('Precision/mAP (medium)', self.stats[4]), + ('Precision/mAP (large)', self.stats[5]), + ('Recall/AR@1', self.stats[6]), ('Recall/AR@10', self.stats[7]), + ('Recall/AR@100', self.stats[8]), + ('Recall/AR@100 (small)', self.stats[9]), + ('Recall/AR@100 (medium)', self.stats[10]), + ('Recall/AR@100 (large)', self.stats[11]) + ]) + if not include_metrics_per_category: + return summary_metrics, {} + if not hasattr(self, 'category_stats'): + raise ValueError('Category stats do not exist') + per_category_ap = OrderedDict([]) + if self.GetAgnosticMode(): + return summary_metrics, per_category_ap + for category_index, category_id in enumerate(self.GetCategoryIdList()): + category = self.GetCategory(category_id)['name'] + # Kept for backward compatilbility + # pylint: disable=no-member + per_category_ap['PerformanceByCategory/mAP/{}'.format( + category)] = self.category_stats[0][category_index] + if all_metrics_per_category: + per_category_ap['Precision mAP ByCategory/{}'.format( + category)] = self.category_stats[0][category_index] + per_category_ap['Precision mAP@.50IOU ByCategory/{}'.format( + category)] = self.category_stats[1][category_index] + per_category_ap['Precision mAP@.75IOU ByCategory/{}'.format( + category)] = self.category_stats[2][category_index] + per_category_ap['Precision mAP (small) ByCategory/{}'.format( + category)] = self.category_stats[3][category_index] + per_category_ap['Precision mAP (medium) ByCategory/{}'.format( + category)] = self.category_stats[4][category_index] + per_category_ap['Precision mAP (large) ByCategory/{}'.format( + category)] = self.category_stats[5][category_index] + per_category_ap['Recall AR@1 ByCategory/{}'.format( + category)] = self.category_stats[6][category_index] + per_category_ap['Recall AR@10 ByCategory/{}'.format( + category)] = self.category_stats[7][category_index] + per_category_ap['Recall AR@100 ByCategory/{}'.format( + category)] = self.category_stats[8][category_index] + per_category_ap['Recall AR@100 (small) ByCategory/{}'.format( + category)] = self.category_stats[9][category_index] + per_category_ap['Recall AR@100 (medium) ByCategory/{}'.format( + category)] = self.category_stats[10][category_index] + per_category_ap['Recall AR@100 (large) ByCategory/{}'.format( + category)] = self.category_stats[11][category_index] + + return summary_metrics, per_category_ap + + +def _ConvertBoxToCOCOFormat(box): + """Convert a box in [ymin, xmin, ymax, xmax] format to COCO format. + This is a utility function for converting from our internal + [ymin, xmin, ymax, xmax] convention to the convention used by the COCO API + i.e., [xmin, ymin, width, height]. + Args: + box: a numpy array in format of [ymin, xmin, ymax, xmax] + Returns: + A list of floats, in COCO format, representing [xmin, ymin, width, height] + """ + return [ + float(box[1]), + float(box[0]), + float(box[3] - box[1]), + float(box[2] - box[0]) + ] + + +def _RleCompress(masks): + """Compresses mask using Run-length encoding provided by pycocotools. + Args: + masks: uint8 numpy array of shape [mask_height, mask_width] with values in + {0, 1}. + Returns: + A pycocotools Run-length encoding of the mask. + """ + return mask.encode(np.asfortranarray(masks)) + + +def ExportSingleImageGroundtruthToCoco(image_id: Union[int, str], + next_annotation_id: int, + category_id_set: Set[str], + groundtruth_boxes: np.array, + groundtruth_classes: np.array, + groundtruth_masks: Union[np.array, None] = None, + groundtruth_is_crowd: Union[np.array, None] = None) -> list: + """Export groundtruth of a single image to COCO format. + This function converts groundtruth detection annotations represented as numpy + arrays to dictionaries that can be ingested by the COCO evaluation API. Note + that the image_ids provided here must match the ones given to + ExportSingleImageDetectionsToCoco. We assume that boxes and classes are in + correspondence - that is: groundtruth_boxes[i, :], and + groundtruth_classes[i] are associated with the same groundtruth annotation. + In the exported result, "area" fields are always set to the area of the + groundtruth bounding box. + Args: + image_id: a unique image identifier either of type integer or string. + next_annotation_id: integer specifying the first id to use for the + groundtruth annotations. All annotations are assigned a continuous integer + id starting from this value. + category_id_set: A set of valid class ids. Groundtruth with classes not in + category_id_set are dropped. + groundtruth_boxes: numpy array (float32) with shape [num_gt_boxes, 4] + groundtruth_classes: numpy array (int) with shape [num_gt_boxes] + groundtruth_masks: optional uint8 numpy array of shape [num_detections, + image_height, image_width] containing detection_masks. + groundtruth_is_crowd: optional numpy array (int) with shape [num_gt_boxes] + indicating whether groundtruth boxes are crowd. + Returns: + A list of groundtruth annotations for a single image in the COCO format. + Raises: + ValueError: if (1) groundtruth_boxes and groundtruth_classes do not have the + right lengths or (2) if each of the elements inside these lists do not + have the correct shapes or (3) if image_ids are not integers + """ + if len(groundtruth_classes.shape) != 1: + raise ValueError('groundtruth_classes is ' 'expected to be of rank 1.') + if len(groundtruth_boxes.shape) != 2: + raise ValueError('groundtruth_boxes is expected to be of ' 'rank 2.') + if groundtruth_boxes.shape[1] != 4: + raise ValueError('groundtruth_boxes should have ' 'shape[1] == 4.') + num_boxes = groundtruth_classes.shape[0] + if num_boxes != groundtruth_boxes.shape[0]: + raise ValueError( + 'Corresponding entries in groundtruth_classes, ' + 'and groundtruth_boxes should have ' + 'compatible shapes (i.e., agree on the 0th dimension).' + 'Classes shape: %d. Boxes shape: %d. Image ID: %s' % + (groundtruth_classes.shape[0], groundtruth_boxes.shape[0], + image_id)) + has_is_crowd = groundtruth_is_crowd is not None + if has_is_crowd and len(groundtruth_is_crowd.shape) != 1: + raise ValueError('groundtruth_is_crowd is expected to be of rank 1.') + groundtruth_list = [] + for i in range(num_boxes): + if groundtruth_classes[i] in category_id_set: + iscrowd = groundtruth_is_crowd[i] if has_is_crowd else 0 + export_dict = { + 'id': + next_annotation_id + i, + 'image_id': + image_id, + 'category_id': + int(groundtruth_classes[i]), + 'bbox': + list(_ConvertBoxToCOCOFormat(groundtruth_boxes[i, :])), + 'area': + float((groundtruth_boxes[i, 2] - groundtruth_boxes[i, 0]) * + (groundtruth_boxes[i, 3] - groundtruth_boxes[i, 1])), + 'iscrowd': + iscrowd + } + if groundtruth_masks is not None: + export_dict['segmentation'] = _RleCompress( + groundtruth_masks[i]) + groundtruth_list.append(export_dict) + return groundtruth_list + + +def ExportSingleImageDetectionBoxesToCoco(image_id: Union[int, str], + category_id_set: Set[int], + detection_boxes: np.array, + detection_scores: np.array, + detection_classes: np.array) -> list: + """Export detections of a single image to COCO format. + This function converts detections represented as numpy arrays to dictionaries + that can be ingested by the COCO evaluation API. Note that the image_ids + provided here must match the ones given to the + ExporSingleImageDetectionBoxesToCoco. We assume that boxes, and classes are in + correspondence - that is: boxes[i, :], and classes[i] + are associated with the same groundtruth annotation. + Args: + image_id: unique image identifier either of type integer or string. + category_id_set: A set of valid class ids. Detections with classes not in + category_id_set are dropped. + detection_boxes: float numpy array of shape [num_detections, 4] containing + detection boxes. + detection_scores: float numpy array of shape [num_detections] containing + scored for the detection boxes. + detection_classes: integer numpy array of shape [num_detections] containing + the classes for detection boxes. + Returns: + A list of detection annotations for a single image in the COCO format. + Raises: + ValueError: if (1) detection_boxes, detection_scores and detection_classes + do not have the right lengths or (2) if each of the elements inside these + lists do not have the correct shapes or (3) if image_ids are not integers. + """ + if len(detection_classes.shape) != 1 or len(detection_scores.shape) != 1: + raise ValueError( + 'All entries in detection_classes and detection_scores' + 'expected to be of rank 1.') + if len(detection_boxes.shape) != 2: + raise ValueError('All entries in detection_boxes expected to be of ' + 'rank 2.') + if detection_boxes.shape[1] != 4: + raise ValueError('All entries in detection_boxes should have ' + 'shape[1] == 4.') + num_boxes = detection_classes.shape[0] + if not num_boxes == detection_boxes.shape[0] == detection_scores.shape[0]: + raise ValueError( + 'Corresponding entries in detection_classes, ' + 'detection_scores and detection_boxes should have ' + 'compatible shapes (i.e., agree on the 0th dimension). ' + 'Classes shape: %d. Boxes shape: %d. ' + 'Scores shape: %d' % + (detection_classes.shape[0], detection_boxes.shape[0], + detection_scores.shape[0])) + detections_list = [] + for i in range(num_boxes): + if detection_classes[i] in category_id_set: + detections_list.append({ + 'image_id': + image_id, + 'category_id': + int(detection_classes[i]), + 'bbox': + list(_ConvertBoxToCOCOFormat(detection_boxes[i, :])), + 'score': + float(detection_scores[i]) + }) + return detections_list + + +def ExportSingleImageDetectionMasksToCoco(image_id: Union[str, int], + category_id_set: Set[int], + detection_masks: np.array, + detection_scores: np.array, + detection_classes: np.array) -> list: + """Export detection masks of a single image to COCO format. + This function converts detections represented as numpy arrays to dictionaries + that can be ingested by the COCO evaluation API. We assume that + detection_masks, detection_scores, and detection_classes are in correspondence + - that is: detection_masks[i, :], detection_classes[i] and detection_scores[i] + are associated with the same annotation. + Args: + image_id: unique image identifier either of type integer or string. + category_id_set: A set of valid class ids. Detections with classes not in + category_id_set are dropped. + detection_masks: uint8 numpy array of shape [num_detections, image_height, + image_width] containing detection_masks. + detection_scores: float numpy array of shape [num_detections] containing + scores for detection masks. + detection_classes: integer numpy array of shape [num_detections] containing + the classes for detection masks. + Returns: + A list of detection mask annotations for a single image in the COCO format. + Raises: + ValueError: if (1) detection_masks, detection_scores and detection_classes + do not have the right lengths or (2) if each of the elements inside these + lists do not have the correct shapes or (3) if image_ids are not integers. + """ + if len(detection_classes.shape) != 1 or len(detection_scores.shape) != 1: + raise ValueError( + 'All entries in detection_classes and detection_scores' + 'expected to be of rank 1.') + num_boxes = detection_classes.shape[0] + if not num_boxes == len(detection_masks) == detection_scores.shape[0]: + raise ValueError('Corresponding entries in detection_classes, ' + 'detection_scores and detection_masks should have ' + 'compatible lengths and shapes ' + 'Classes length: %d. Masks length: %d. ' + 'Scores length: %d' % + (detection_classes.shape[0], len(detection_masks), + detection_scores.shape[0])) + detections_list = [] + for i in range(num_boxes): + if detection_classes[i] in category_id_set: + detections_list.append({ + 'image_id': + image_id, + 'category_id': + int(detection_classes[i]), + 'segmentation': + _RleCompress(detection_masks[i]), + 'score': + float(detection_scores[i]) + }) + return detections_list \ No newline at end of file diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/tiny_yolov3/quantization/ptq_static/label_map.yaml b/examples/onnxrt/object_detection/onnx_model_zoo/tiny_yolov3/quantization/ptq_static/label_map.yaml new file mode 100644 index 00000000000..f8015ec76b3 --- /dev/null +++ b/examples/onnxrt/object_detection/onnx_model_zoo/tiny_yolov3/quantization/ptq_static/label_map.yaml @@ -0,0 +1,80 @@ +person: 0 +bicycle: 1 +car: 2 +motorcycle: 3 +airplane: 4 +bus: 5 +train: 6 +truck: 7 +boat: 8 +traffic light: 9 +fire hydrant: 10 +stop sign: 11 +parking meter: 12 +bench: 13 +bird: 14 +cat: 15 +dog: 16 +horse: 17 +sheep: 18 +cow: 19 +elephant: 20 +bear: 21 +zebra: 22 +giraffe: 23 +backpack: 24 +umbrella: 25 +handbag: 26 +tie: 27 +suitcase: 28 +frisbee: 29 +skis: 30 +snowboard: 31 +sports ball: 32 +kite: 33 +baseball bat: 34 +baseball glove: 35 +skateboard: 36 +surfboard: 37 +tennis racket: 38 +bottle: 39 +wine glass: 40 +cup: 41 +fork: 42 +knife: 43 +spoon: 44 +bowl: 45 +banana: 46 +apple: 47 +sandwich: 48 +orange: 49 +broccoli: 50 +carrot: 51 +hot dog: 52 +pizza: 53 +donut: 54 +cake: 55 +chair: 56 +couch: 57 +potted plant: 58 +bed: 59 +dining table: 60 +toilet: 61 +tv: 62 +laptop: 63 +mouse: 64 +remote: 65 +keyboard: 66 +cell phone: 67 +microwave: 68 +oven: 69 +toaster: 70 +sink: 71 +refrigerator: 72 +book: 73 +clock: 74 +vase: 75 +scissors: 76 +teddy bear: 77 +hair drier: 78 +toothbrush: 79 diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/tiny_yolov3/quantization/ptq_static/main.py b/examples/onnxrt/object_detection/onnx_model_zoo/tiny_yolov3/quantization/ptq_static/main.py new file mode 100644 index 00000000000..52f8d8e5ce5 --- /dev/null +++ b/examples/onnxrt/object_detection/onnx_model_zoo/tiny_yolov3/quantization/ptq_static/main.py @@ -0,0 +1,401 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# pylint:disable=redefined-outer-name,logging-format-interpolation + + +import logging +import argparse + +import onnx +from PIL import Image +import math +import numpy as np +import os +import onnxruntime as ort +import yaml + +logger = logging.getLogger(__name__) +logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s - %(message)s', + datefmt = '%m/%d/%Y %H:%M:%S', + level = logging.WARN) +logger.info("Evaluating ONNXRuntime full precision accuracy and performance:") +parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter +) +parser.add_argument( + '--data_path', + type=str, + help="Path of COCO dataset, it contains val2017 and annotations subfolder" +) +parser.add_argument( + '--label_path', + type=str, + default='label_map.yaml', + help="Path of label map yaml file" +) +parser.add_argument( + '--model_path', + type=str, + help="Pre-trained model on onnx file" +) +parser.add_argument( + '--benchmark', + action='store_true', \ + default=False +) +parser.add_argument( + '--tune', + action='store_true', \ + default=False, + help="whether quantize the model" +) +parser.add_argument( + '--config', + type=str, + help="config yaml path" +) +parser.add_argument( + '--output_model', + type=str, + help="output model path" +) +parser.add_argument( + '--mode', + type=str, + help="benchmark mode of performance or accuracy" +) +parser.add_argument( + '--quant_format', + type=str, + choices=['QOperator', 'QDQ'], + help="quantization format" +) +parser.add_argument( + '--batch_size', + type=int, + default=1, + help="quantization format" +) +args = parser.parse_args() + +class Dataloader: + def __init__(self, root, batch_size=1, size=416, img_dir='val2017', \ + anno_dir='annotations/instances_val2017.json'): + import json + import os + import numpy as np + from pycocotools.coco import COCO + from coco_label_map import category_map + self.batch_size = batch_size + self.image_list = [] + self.model_image_size = (size, size) + img_path = os.path.join(root, img_dir) + anno_path = os.path.join(root, anno_dir) + coco = COCO(anno_path) + img_ids = coco.getImgIds() + cat_ids = coco.getCatIds() + for idx, img_id in enumerate(img_ids): + img_info = {} + bboxes = [] + labels = [] + ids = [] + img_detail = coco.loadImgs(img_id)[0] + ids.append(img_detail['file_name'].encode('utf-8')) + pic_height = img_detail['height'] + pic_width = img_detail['width'] + + ann_ids = coco.getAnnIds(imgIds=img_id,catIds=cat_ids) + anns = coco.loadAnns(ann_ids) + for ann in anns: + bbox = ann['bbox'] + if len(bbox) == 0: + continue + bboxes.append([bbox[1], bbox[0], bbox[1]+bbox[3], bbox[2]+bbox[0]]) + labels.append(category_map[ann['category_id']].encode('utf8')) + img_file = os.path.join(img_path, img_detail['file_name']) + if not os.path.exists(img_file) or len(bboxes) == 0: + continue + + if filter and not filter(None, bboxes): + continue + label = [np.array([bboxes]), np.array([labels]), np.zeros((1,0)), np.array([img_detail['file_name'].encode('utf-8')])] + with Image.open(img_file) as image: + image = image.convert('RGB') + data, label = self.preprocess((image, label)) + self.image_list.append((data, label)) + + def __iter__(self): + for item in self.image_list: + yield item + + def letterbox_image(self, image, size): + '''resize image with unchanged aspect ratio using padding''' + iw, ih = image.size + w, h = size + scale = min(w/iw, h/ih) + nw = int(iw*scale) + nh = int(ih*scale) + + image = image.resize((nw,nh), Image.BICUBIC) + new_image = Image.new('RGB', size, (128,128,128)) + new_image.paste(image, ((w-nw)//2, (h-nh)//2)) + return new_image + + def preprocess(self, sample): + image, label = sample + boxed_image = self.letterbox_image(image, tuple(reversed(self.model_image_size))) + image_data = np.array(boxed_image, dtype='float32') + image_data /= 255. + image_data = np.transpose(image_data, [2, 0, 1]) + image_data = np.expand_dims(image_data, 0) + image_size = np.array([image.size[1], image.size[0]], dtype=np.float32).reshape(1, 2) + return (image_data, image_size), label + +class COCOmAPv2(): + """Compute mean average precision of the detection task.""" + + def __init__(self, + anno_path=None, + iou_thrs='0.5:0.05:0.95', + map_points=101, + map_key='DetectionBoxes_Precision/mAP', + output_index_mapping={'num_detections':-1, 'boxes':0, 'scores':1, 'classes':2}): + """Initialize the metric. + Args: + anno_path: The path of annotation file. + iou_thrs: Minimal value for intersection over union that allows to make decision + that prediction bounding box is true positive. You can specify one float value + between 0 to 1 or string "05:0.05:0.95" for standard COCO thresholds. + map_points: The way to calculate mAP. 101 for 101-point interpolated AP, 11 for + 11-point interpolated AP, 0 for area under PR curve. + map_key: The key that mapping to pycocotools COCOeval. + Defaults to 'DetectionBoxes_Precision/mAP'. + output_index_mapping: The output index mapping. + Defaults to {'num_detections':-1, 'boxes':0, 'scores':1, 'classes':2}. + """ + self.output_index_mapping = output_index_mapping + from coco_label_map import category_map + if anno_path: + assert os.path.exists(anno_path), 'Annotation path does not exists!' + with open(anno_path, 'r') as f: + label_map = yaml.safe_load(f.read()) + self.category_map_reverse = {k: v for k,v in label_map.items()} + else: + # label: index + self.category_map_reverse = {v: k for k, v in category_map.items()} + self.image_ids = [] + self.ground_truth_list = [] + self.detection_list = [] + self.annotation_id = 1 + self.category_map = category_map + self.category_id_set = set( + [cat for cat in self.category_map]) #index + self.iou_thrs = iou_thrs + self.map_points = map_points + self.map_key = map_key + + def update(self, predicts, labels, sample_weight=None): + """Add the predictions and labels. + Args: + predicts: The predictions. + labels: The labels corresponding to the predictions. + sample_weight: The sample weight. Defaults to None. + """ + from coco_tools import ExportSingleImageGroundtruthToCoco,\ + ExportSingleImageDetectionBoxesToCoco + detections = [] + if 'num_detections' in self.output_index_mapping and \ + self.output_index_mapping['num_detections'] > -1: + for item in zip(*predicts): + detection = {} + num = int(item[self.output_index_mapping['num_detections']]) + detection['boxes'] = np.asarray( + item[self.output_index_mapping['boxes']])[0:num] + detection['scores'] = np.asarray( + item[self.output_index_mapping['scores']])[0:num] + detection['classes'] = np.asarray( + item[self.output_index_mapping['classes']])[0:num] + detections.append(detection) + else: + for item in zip(*predicts): + detection = {} + detection['boxes'] = np.asarray(item[self.output_index_mapping['boxes']]) + detection['scores'] = np.asarray(item[self.output_index_mapping['scores']]) + detection['classes'] = np.asarray(item[self.output_index_mapping['classes']]) + detections.append(detection) + + bboxes, str_labels,int_labels, image_ids = labels + labels = [] + if len(int_labels[0]) == 0: + for str_label in str_labels: + str_label = [ + x if type(x) == 'str' else x.decode('utf-8') + for x in str_label + ] + labels.append([self.category_map_reverse[x] for x in str_label]) + elif len(str_labels[0]) == 0: + for int_label in int_labels: + labels.append([x for x in int_label]) + + for idx, image_id in enumerate(image_ids): + image_id = image_id if type( + image_id) == 'str' else image_id.decode('utf-8') + if image_id in self.image_ids: + continue + self.image_ids.append(image_id) + + ground_truth = {} + ground_truth['boxes'] = np.asarray(bboxes[idx]) + ground_truth['classes'] = np.asarray(labels[idx]) + + self.ground_truth_list.extend( + ExportSingleImageGroundtruthToCoco( + image_id=image_id, + next_annotation_id=self.annotation_id, + category_id_set=self.category_id_set, + groundtruth_boxes=ground_truth['boxes'], + groundtruth_classes=ground_truth['classes'])) + self.annotation_id += ground_truth['boxes'].shape[0] + + self.detection_list.extend( + ExportSingleImageDetectionBoxesToCoco( + image_id=image_id, + category_id_set=self.category_id_set, + detection_boxes=detections[idx]['boxes'], + detection_scores=detections[idx]['scores'], + detection_classes=detections[idx]['classes'])) + + def reset(self): + """Reset the prediction and labels.""" + self.image_ids = [] + self.ground_truth_list = [] + self.detection_list = [] + self.annotation_id = 1 + + def result(self): + """Compute mean average precision. + Returns: + The mean average precision score. + """ + from coco_tools import COCOWrapper, COCOEvalWrapper + if len(self.ground_truth_list) == 0: + logger.warning("Sample num during evaluation is 0.") + return 0 + else: + groundtruth_dict = { + 'annotations': + self.ground_truth_list, + 'images': [{ + 'id': image_id + } for image_id in self.image_ids], + 'categories': [{ + 'id': k, + 'name': v + } for k, v in self.category_map.items()] + } + coco_wrapped_groundtruth = COCOWrapper(groundtruth_dict) + coco_wrapped_detections = coco_wrapped_groundtruth.LoadAnnotations( + self.detection_list) + box_evaluator = COCOEvalWrapper(coco_wrapped_groundtruth, + coco_wrapped_detections, + agnostic_mode=False, + iou_thrs = self.iou_thrs, + map_points = self.map_points) + box_metrics, box_per_category_ap = box_evaluator.ComputeMetrics( + include_metrics_per_category=False, all_metrics_per_category=False) + box_metrics.update(box_per_category_ap) + box_metrics = { + 'DetectionBoxes_' + key: value + for key, value in iter(box_metrics.items()) + } + + return box_metrics[self.map_key] + +class Post: + def __call__(self, sample): + preds, labels = sample + boxes, scores, indices = preds + out_boxes, out_scores, out_classes = [], [], [] + if len(indices) == 0 or len(indices[0]) == 0: + return ([np.zeros((0,4))], [[]], [[]]), labels + for idx_ in indices[0]: + out_classes.append(idx_[1]) + out_scores.append(scores[tuple(idx_)]) + idx_1 = (idx_[0], idx_[2]) + out_boxes.append(boxes[idx_1]) + return ([out_boxes], [out_scores], [out_classes]), labels + +if __name__ == "__main__": + model = onnx.load(args.model_path) + dataloader = Dataloader(args.data_path, batch_size=args.batch_size) + metric = COCOmAPv2(anno_path=args.label_path, output_index_mapping={'boxes':0, 'scores':1, 'classes':2}) + postprocess = Post() + + def eval_func(model): + metric.reset() + session = ort.InferenceSession(model.SerializeToString(), + providers=ort.get_available_providers()) + ort_inputs = {} + len_inputs = len(session.get_inputs()) + inputs_names = [session.get_inputs()[i].name for i in range(len_inputs)] + for idx, (inputs, labels) in enumerate(dataloader): + if not isinstance(labels, list): + labels = [labels] + if len_inputs == 1: + ort_inputs.update( + inputs if isinstance(inputs, dict) else {inputs_names[0]: inputs} + ) + else: + assert len_inputs == len(inputs), 'number of input tensors must align with graph inputs' + if isinstance(inputs, dict): + ort_inputs.update(inputs) + else: + for i in range(len_inputs): + if not isinstance(inputs[i], np.ndarray): + ort_inputs.update({inputs_names[i]: np.array(inputs[i])}) + else: + ort_inputs.update({inputs_names[i]: inputs[i]}) + predictions = session.run(None, ort_inputs) + predictions, labels = postprocess((predictions, labels)) + metric.update(predictions, labels) + return metric.result() + + if args.benchmark: + if args.mode == 'performance': + from neural_compressor.benchmark import fit + from neural_compressor.config import BenchmarkConfig + conf = BenchmarkConfig(iteration=100, + cores_per_instance=4, + num_of_instance=1) + fit(model, conf, b_dataloader=dataloader) + elif args.mode == 'accuracy': + acc_result = eval_func(model) + print("Batch size = %d" % args.batch_size) + print("Accuracy: %.5f" % acc_result) + + + if args.tune: + from neural_compressor import quantization + from neural_compressor.config import AccuracyCriterion, PostTrainingQuantConfig + accuracy_criterion = AccuracyCriterion() + accuracy_criterion.absolute = 0.02 + config = PostTrainingQuantConfig(approach='static', + quant_format=args.quant_format, + calibration_sampling_size=[1], + accuracy_criterion=accuracy_criterion) + q_model = quantization.fit(model, config, calib_dataloader=dataloader, eval_func=eval_func) + q_model.save(args.output_model) \ No newline at end of file diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/tiny_yolov3/quantization/ptq_static/requirements.txt b/examples/onnxrt/object_detection/onnx_model_zoo/tiny_yolov3/quantization/ptq_static/requirements.txt new file mode 100644 index 00000000000..b0063c5dc44 --- /dev/null +++ b/examples/onnxrt/object_detection/onnx_model_zoo/tiny_yolov3/quantization/ptq_static/requirements.txt @@ -0,0 +1,4 @@ +onnx +onnxruntime +onnxruntime_extensions; python_version < '3.10' +pillow>=8.2.0 # not directly required, pinned by Snyk to avoid a vulnerability diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/tiny_yolov3/quantization/ptq_static/run_benchmark.sh b/examples/onnxrt/object_detection/onnx_model_zoo/tiny_yolov3/quantization/ptq_static/run_benchmark.sh new file mode 100644 index 00000000000..f17716e49ce --- /dev/null +++ b/examples/onnxrt/object_detection/onnx_model_zoo/tiny_yolov3/quantization/ptq_static/run_benchmark.sh @@ -0,0 +1,51 @@ +#!/bin/bash +set -x + +function main { + init_params "$@" + run_benchmark + +} + +# init params +function init_params { + + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + --label_path=*) + label_path=$(echo $var |cut -f2 -d=) + ;; + --mode=*) + mode=$(echo $var |cut -f2 -d=) + ;; + --batch_size=*) + batch_size=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_benchmark +function run_benchmark { + if [ ! $label_path ]; then + label_path='label_map.yaml' + fi + + python main.py \ + --model_path ${input_model} \ + --mode ${mode} \ + --data_path ${dataset_location} \ + --label_path ${label_path} \ + --batch_size ${batch_size} \ + --benchmark +} + +main "$@" \ No newline at end of file diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/tiny_yolov3/quantization/ptq_static/run_tuning.sh b/examples/onnxrt/object_detection/onnx_model_zoo/tiny_yolov3/quantization/ptq_static/run_tuning.sh new file mode 100644 index 00000000000..161010ad4bc --- /dev/null +++ b/examples/onnxrt/object_detection/onnx_model_zoo/tiny_yolov3/quantization/ptq_static/run_tuning.sh @@ -0,0 +1,51 @@ +#!/bin/bash +set -x + +function main { + init_params "$@" + run_tuning + +} + +# init params +function init_params { + + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + --label_path=*) + label_path=$(echo $var |cut -f2 -d=) + ;; + --output_model=*) + output_model=$(echo $var |cut -f2 -d=) + ;; + --quant_format=*) + quant_format=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_tuning +function run_tuning { + if [ ! $label_path ]; then + label_path='label_map.yaml' + fi + + python main.py \ + --model_path ${input_model} \ + --output_model ${output_model} \ + --data_path ${dataset_location} \ + --label_path ${label_path} \ + --quant_format ${quant_format} \ + --tune +} + +main "$@" \ No newline at end of file diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/yolov3/quantization/ptq_static/README.md b/examples/onnxrt/object_detection/onnx_model_zoo/yolov3/quantization/ptq_static/README.md new file mode 100644 index 00000000000..5f833d87e62 --- /dev/null +++ b/examples/onnxrt/object_detection/onnx_model_zoo/yolov3/quantization/ptq_static/README.md @@ -0,0 +1,61 @@ +Step-by-Step +============ + +This example load an object detection model converted from [ONNX Model Zoo](https://github.com/onnx/models) and confirm its accuracy and speed based on [MS COCO 2017 dataset](https://cocodataset.org/#download). + +# Prerequisite + +## 1. Environment + +```shell +pip install neural-compressor +pip install -r requirements.txt +``` +> Note: Validated ONNX Runtime [Version](/docs/source/installation_guide.md#validated-software-environment). + +## 2. Prepare Model + +Download model from [ONNX Model Zoo](https://github.com/onnx/models) + +```shell +wget https://github.com/onnx/models/raw/main/vision/object_detection_segmentation/yolov3/model/yolov3-12.onnx +``` + +## 3. Prepare Dataset + +Download [MS COCO 2017 dataset](https://cocodataset.org/#download). + +Dataset directories: + +```bash +coco2017 +├── annotations +| ├── instances_val2017.json +| └── ... +├── test2017 +├── train2017 +└── val2017 +``` + +# Run + +## 1. Quantization + +Static quantization with QOperator format: + +```bash +bash run_tuning.sh --input_model=path/to/model \ # model path as *.onnx + --output_model=path/to/save \ # model path as *.onnx + --dataset_location=path/to/coco2017 \ # dataset path containing 'val2017' and 'annotations' folders + --label_path=label_map.yaml \ + --quant_format="QOperator" +``` + +## 2. Benchmark + +```bash +bash run_benchmark.sh --input_model=path/to/model \ # model path as *.onnx + --dataset_location=path/to/coco2017 \ # dataset path containing 'val2017' and 'annotations' folders + --label_path=label_map.yaml \ + --mode=performance # or accuracy +``` diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/yolov3/quantization/ptq_static/coco_label_map.py b/examples/onnxrt/object_detection/onnx_model_zoo/yolov3/quantization/ptq_static/coco_label_map.py new file mode 100644 index 00000000000..1e88f8abad8 --- /dev/null +++ b/examples/onnxrt/object_detection/onnx_model_zoo/yolov3/quantization/ptq_static/coco_label_map.py @@ -0,0 +1,103 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# + +"""The dict mapping category IDs to its names of labels.""" + +category_map = { + 1: 'person', + 2: 'bicycle', + 3: 'car', + 4: 'motorcycle', + 5: 'airplane', + 6: 'bus', + 7: 'train', + 8: 'truck', + 9: 'boat', + 10: 'traffic light', + 11: 'fire hydrant', + 13: 'stop sign', + 14: 'parking meter', + 15: 'bench', + 16: 'bird', + 17: 'cat', + 18: 'dog', + 19: 'horse', + 20: 'sheep', + 21: 'cow', + 22: 'elephant', + 23: 'bear', + 24: 'zebra', + 25: 'giraffe', + 27: 'backpack', + 28: 'umbrella', + 31: 'handbag', + 32: 'tie', + 33: 'suitcase', + 34: 'frisbee', + 35: 'skis', + 36: 'snowboard', + 37: 'sports ball', + 38: 'kite', + 39: 'baseball bat', + 40: 'baseball glove', + 41: 'skateboard', + 42: 'surfboard', + 43: 'tennis racket', + 44: 'bottle', + 46: 'wine glass', + 47: 'cup', + 48: 'fork', + 49: 'knife', + 50: 'spoon', + 51: 'bowl', + 52: 'banana', + 53: 'apple', + 54: 'sandwich', + 55: 'orange', + 56: 'broccoli', + 57: 'carrot', + 58: 'hot dog', + 59: 'pizza', + 60: 'donut', + 61: 'cake', + 62: 'chair', + 63: 'couch', + 64: 'potted plant', + 65: 'bed', + 67: 'dining table', + 70: 'toilet', + 72: 'tv', + 73: 'laptop', + 74: 'mouse', + 75: 'remote', + 76: 'keyboard', + 77: 'cell phone', + 78: 'microwave', + 79: 'oven', + 80: 'toaster', + 81: 'sink', + 82: 'refrigerator', + 84: 'book', + 85: 'clock', + 86: 'vase', + 87: 'scissors', + 88: 'teddy bear', + 89: 'hair drier', + 90: 'toothbrush' +} \ No newline at end of file diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/yolov3/quantization/ptq_static/coco_tools.py b/examples/onnxrt/object_detection/onnx_model_zoo/yolov3/quantization/ptq_static/coco_tools.py new file mode 100644 index 00000000000..2c57fd61302 --- /dev/null +++ b/examples/onnxrt/object_detection/onnx_model_zoo/yolov3/quantization/ptq_static/coco_tools.py @@ -0,0 +1,672 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +"""Wrappers for third party pycocotools to be used within object_detection. +Note that nothing in this file is tensorflow related and thus cannot +be called directly as a slim metric, for example. +TODO(jonathanhuang): wrap as a slim metric in metrics.py +Usage example: given a set of images with ids in the list image_ids +and corresponding lists of numpy arrays encoding groundtruth (boxes and classes) +and detections (boxes, scores and classes), where elements of each list +correspond to detections/annotations of a single image, +then evaluation (in multi-class mode) can be invoked as follows: + groundtruth_dict = coco_tools.ExportGroundtruthToCOCO( + image_ids, groundtruth_boxes_list, groundtruth_classes_list, + max_num_classes, output_path=None) + detections_list = coco_tools.ExportDetectionsToCOCO( + image_ids, detection_boxes_list, detection_scores_list, + detection_classes_list, output_path=None) + groundtruth = coco_tools.COCOWrapper(groundtruth_dict) + detections = groundtruth.LoadAnnotations(detections_list) + evaluator = coco_tools.COCOEvalWrapper(groundtruth, detections, + agnostic_mode=False) + metrics = evaluator.ComputeMetrics() +""" + +import copy +import time + +import numpy as np + +from collections import OrderedDict +from neural_compressor.utils import logger +from pycocotools import coco +from pycocotools import cocoeval +from pycocotools import mask +from typing import Any, Dict, List, Set, Union + + +class COCOWrapper(coco.COCO): + """Wrapper for the pycocotools COCO class. + + Attributes: + dataset: a dictionary holding bounding box annotations in the COCO format. + detection_type: type of detections being wrapped. Can be one of ['bbox', + 'segmentation'] + """ + + def __init__(self, dataset: Dict[str, Any], detection_type: str = 'bbox'): + """Construct a COCOWrapper. + See http://mscoco.org/dataset/#format for a description of the format. + By default, the coco.COCO class constructor reads from a JSON file. + This function duplicates the same behavior but loads from a dictionary, + allowing us to perform evaluation without writing to external storage. + Args: + dataset: a dictionary holding bounding box annotations in the COCO format. + detection_type: type of detections being wrapped. Can be one of ['bbox', + 'segmentation'] + Raises: + ValueError: if detection_type is unsupported. + """ + supported_detection_types = ['bbox', 'segmentation'] + if detection_type not in supported_detection_types: + raise ValueError('Unsupported detection type: {}. ' + 'Supported values are: {}'.format( + detection_type, supported_detection_types)) + self._detection_type = detection_type + coco.COCO.__init__(self) + self.dataset = dataset + self.createIndex() + + def LoadAnnotations(self, annotations: list) -> coco.COCO: + """Load annotations dictionary into COCO datastructure. + See http://mscoco.org/dataset/#format for a description of the annotations + format. As above, this function replicates the default behavior of the API + but does not require writing to external storage. + Args: + annotations: python list holding object detection results where each + detection is encoded as a dict with required keys ['image_id', + 'category_id', 'score'] and one of ['bbox', 'segmentation'] based on + `detection_type`. + Returns: + a coco.COCO datastructure holding object detection annotations results + Raises: + ValueError: if (1) annotations is not a list or annotations do not + correspond to the images contained in self. + """ + results = coco.COCO() + results.dataset['images'] = [img for img in self.dataset['images']] + + logger.info("Load and prepare annotation results.") + tic = time.time() + + if not isinstance(annotations, list): + raise ValueError('annotations is not a list of objects') + annotation_img_ids = [ann['image_id'] for ann in annotations] + if (set(annotation_img_ids) != (set(annotation_img_ids) + & set(self.getImgIds()))): + raise ValueError('Results do not correspond to current coco set') + results.dataset['categories'] = copy.deepcopy( + self.dataset['categories']) + if self._detection_type == 'bbox': + for idx, ann in enumerate(annotations): + bb = ann['bbox'] + ann['area'] = bb[2] * bb[3] + ann['id'] = idx + 1 + ann['iscrowd'] = 0 + elif self._detection_type == 'segmentation': + for idx, ann in enumerate(annotations): + ann['area'] = mask.area(ann['segmentation']) + ann['bbox'] = mask.toBbox(ann['segmentation']) + ann['id'] = idx + 1 + ann['iscrowd'] = 0 + logger.info('DONE (t=%0.2fs)', (time.time() - tic)) + + results.dataset['annotations'] = annotations + results.createIndex() + return results + + +class COCOEvalWrapper(cocoeval.COCOeval): + """Wrapper for the pycocotools COCOeval class. + To evaluate, create two objects (groundtruth_dict and detections_list) + using the conventions listed at http://mscoco.org/dataset/#format. + Then call evaluation as follows: + groundtruth = coco_tools.COCOWrapper(groundtruth_dict) + detections = groundtruth.LoadAnnotations(detections_list) + evaluator = coco_tools.COCOEvalWrapper(groundtruth, detections, + agnostic_mode=False) + metrics = evaluator.ComputeMetrics() + """ + + def __init__(self, + groundtruth: coco.COCO = None, + detections: coco.COCO = None, + agnostic_mode = False, + iou_type: str = 'bbox', + iou_thrs: Union[str, float] = None, + map_points=None): + """Construct a COCOEvalWrapper. + Note that for the area-based metrics to be meaningful, detection and + groundtruth boxes must be in image coordinates measured in pixels. + Args: + groundtruth: a coco.COCO (or coco_tools.COCOWrapper) object holding + groundtruth annotations + detections: a coco.COCO (or coco_tools.COCOWrapper) object holding + detections + agnostic_mode: boolean (default: False). If True, evaluation ignores + class labels, treating all detections as proposals. + iou_thrs: Minimal value for intersection over union that allows to + make decision that prediction bounding box is true positive. + You can specify one float value between 0 to 1 or + string "05:0.05:0.95" for standard COCO thresholds. + iou_type: IOU type to use for evaluation. Supports `bbox` or `segm`. + map_points: The way to calculate mAP. 101 for 101-point interpolated AP, 11 for + 11-point interpolated AP, 0 for area under PR curve. + """ + cocoeval.COCOeval.__init__(self, + groundtruth, + detections, + iouType=iou_type) + if agnostic_mode: + self.params.useCats = 0 + if iou_thrs == '0.5:0.05:0.95': + self.params.iouThrs = np.linspace(.5, 0.95, int(np.round((0.95 - .5) / .05)) + 1, \ + endpoint=True) + elif isinstance(iou_thrs, float): + self.params.iouThrs = [iou_thrs] + + if map_points == 101: + self.params.recThrs = np.linspace(.0, 1.00, int(np.round((1.00 - .0) / .01)) + 1, \ + endpoint=True) + if map_points == 11: + self.params.recThrs = np.linspace(.0, 1.00, int(np.round((1.00 - .0) / .1)) + 1, \ + endpoint=True) + if map_points == 0: + self.params.recThrs = [-1] + + + def GetCategory(self, category_id: int) -> dict: + """Fetch dictionary holding category information given category id. + Args: + category_id: integer id + Returns: + dictionary holding 'id', 'name'. + """ + return self.cocoGt.cats[category_id] + + def GetAgnosticMode(self) -> bool: + """Return whether COCO Eval is configured to evaluate in agnostic mode.""" + return self.params.useCats == 0 + + def GetCategoryIdList(self) -> List[int]: + """Return the list of IDs of all valid categories.""" + return self.params.catIds + + def accumulate(self, p: cocoeval.Params = None): + """Accumulate evaluation results per image and store it to self.eval. + + Args: + p: input params for evaluation + """ + print('Accumulating evaluation results...') + tic = time.time() + if not self.evalImgs: + print('Please run evaluate() first') + # allows input customized parameters + if p is None: + p = self.params + p.catIds = p.catIds if p.useCats == 1 else [-1] + T = len(p.iouThrs) + R = len(p.recThrs) + K = len(p.catIds) if p.useCats else 1 + A = len(p.areaRng) + M = len(p.maxDets) + precision = -np.ones((T,R,K,A,M)) # -1 for the precision of absent categories + recall = -np.ones((T,K,A,M)) + scores = -np.ones((T,R,K,A,M)) + + # create dictionary for future indexing + _pe = self._paramsEval + print('-pe', _pe) + catIds = _pe.catIds if _pe.useCats else [-1] + setK = set(catIds) + setA = set(map(tuple, _pe.areaRng)) + setM = set(_pe.maxDets) + setI = set(_pe.imgIds) + # get inds to evaluate + k_list = [n for n, k in enumerate(p.catIds) if k in setK] + m_list = [m for n, m in enumerate(p.maxDets) if m in setM] + a_list = [n for n, a in enumerate(map(lambda x: tuple(x), p.areaRng)) if a in setA] + i_list = [n for n, i in enumerate(p.imgIds) if i in setI] + I0 = len(_pe.imgIds) + A0 = len(_pe.areaRng) + # retrieve E at each category, area range, and max number of detections + for k, k0 in enumerate(k_list): + Nk = k0*A0*I0 + for a, a0 in enumerate(a_list): + Na = a0*I0 + for m, maxDet in enumerate(m_list): + E = [self.evalImgs[Nk + Na + i] for i in i_list] + E = [e for e in E if not e is None] + if len(E) == 0: continue + dtScores = np.concatenate([e['dtScores'][0:maxDet] for e in E]) + + # different sorting method generates slightly different results. + # mergesort is used to be consistent as Matlab implementation. + inds = np.argsort(-dtScores, kind='mergesort') + dtScoresSorted = dtScores[inds] + + dtm = np.concatenate([e['dtMatches'][:,0:maxDet] for e in E], axis=1)[:,inds] + dtIg = np.concatenate([e['dtIgnore'][:,0:maxDet] for e in E], axis=1)[:,inds] + gtIg = np.concatenate([e['gtIgnore'] for e in E]) + npig = np.count_nonzero(gtIg==0 ) + if npig == 0: continue + tps = np.logical_and( dtm, np.logical_not(dtIg) ) + fps = np.logical_and(np.logical_not(dtm), np.logical_not(dtIg) ) + + tp_sum = np.cumsum(tps, axis=1).astype(dtype=np.float32) + fp_sum = np.cumsum(fps, axis=1).astype(dtype=np.float32) + for t, (tp, fp) in enumerate(zip(tp_sum, fp_sum)): + tp = np.array(tp) + fp = np.array(fp) + nd = len(tp) + rc = tp / npig + pr = tp / (fp+tp+np.spacing(1)) + + # calculate precision + if R == 1: + rc = np.concatenate(([0.], rc, [1.])) + pr = np.concatenate(([0.], pr, [0.])) + + # compute the precision envelope + for i in range(pr.size - 1, 0, -1): + pr[i - 1] = np.maximum(pr[i - 1], pr[i]) + + # to calculate area under PR curve, look for points + # where X axis (recall) changes value + change_point = np.where(rc[1:] != rc[:-1])[0] + # and sum (\Delta recall) * recall + res = np.sum((rc[change_point + 1] - rc[change_point]) \ + * pr[change_point + 1]) + precision[t,:,k,a,m] = np.array([res]) + else: + q = np.zeros((R,)) + + # numpy is slow without cython optimization for accessing elements + # use python array gets significant speed improvement + pr = pr.tolist(); q = q.tolist() + + for i in range(nd-1, 0, -1): + if pr[i] > pr[i-1]: + pr[i-1] = pr[i] + + inds = np.searchsorted(rc, p.recThrs, side='left') + try: + for ri, pi in enumerate(inds): + q[ri] = pr[pi] + except: + pass + precision[t,:,k,a,m] = np.array(q) + + # calculate recall + if nd: + recall[t,k,a,m] = rc[-1] + else: + recall[t,k,a,m] = 0 + + # calculate score + ss = np.zeros((R,)) + inds = np.searchsorted(rc, p.recThrs, side='left') + try: + for ri, pi in enumerate(inds): + ss[ri] = dtScoresSorted[pi] + except: + pass + scores[t,:,k,a,m] = np.array(ss) + # exit(0) + self.eval = { + 'params': p, + 'counts': [T, R, K, A, M], + 'precision': precision, + 'recall': recall, + 'scores': scores, + } + toc = time.time() + print('DONE (t={:0.2f}s).'.format( toc-tic)) + + + def ComputeMetrics(self, + include_metrics_per_category: bool = False, + all_metrics_per_category: bool = False): # pragma: no cover + """Compute detection metrics. + Args: + include_metrics_per_category: Whether include metrics per category. + all_metrics_per_category: Whether include all the summery metrics for + each category in per_category_ap. Be careful with setting it to true if + you have more than handful of categories, because it will pollute + your mldash. + Returns: + A tuple of (summary_metrics, per_category_ap), in which + (1) summary_metrics is a dictionary holding: + 'Precision/mAP': mean average precision over classes averaged over IOU + thresholds ranging from .5 to .95 with .05 increments; + 'Precision/mAP@.50IOU': mean average precision at 50% IOU; + 'Precision/mAP@.75IOU': mean average precision at 75% IOU; + 'Precision/mAP (small)': mean average precision for small objects + (area < 32^2 pixels); + 'Precision/mAP (medium)': mean average precision for medium sized + objects (32^2 pixels < area < 96^2 pixels); + 'Precision/mAP (large)': mean average precision for large objects + (96^2 pixels < area < 10000^2 pixels); + 'Recall/AR@1': average recall with 1 detection; + 'Recall/AR@10': average recall with 10 detections; + 'Recall/AR@100': average recall with 100 detections; + 'Recall/AR@100 (small)': average recall for small objects with 100 + detections; + 'Recall/AR@100 (medium)': average recall for medium objects with 100 + detections; + 'Recall/AR@100 (large)': average recall for large objects with 100 + detections; + and (2) per_category_ap is a dictionary holding category specific results with + keys of the form: 'Precision mAP ByCategory/category' + (without the supercategory part if no supercategories exist). + For backward compatibility 'PerformanceByCategory' is included in the + output regardless of all_metrics_per_category. If evaluating class-agnostic + mode, per_category_ap is an empty dictionary. + Raises: + ValueError: If category_stats does not exist. + """ + self.evaluate() + self.accumulate() + self.summarize() + + summary_metrics = OrderedDict([ + ('Precision/mAP', self.stats[0]), + ('Precision/mAP@.50IOU', self.stats[1]), + ('Precision/mAP@.75IOU', self.stats[2]), + ('Precision/mAP (small)', self.stats[3]), + ('Precision/mAP (medium)', self.stats[4]), + ('Precision/mAP (large)', self.stats[5]), + ('Recall/AR@1', self.stats[6]), ('Recall/AR@10', self.stats[7]), + ('Recall/AR@100', self.stats[8]), + ('Recall/AR@100 (small)', self.stats[9]), + ('Recall/AR@100 (medium)', self.stats[10]), + ('Recall/AR@100 (large)', self.stats[11]) + ]) + if not include_metrics_per_category: + return summary_metrics, {} + if not hasattr(self, 'category_stats'): + raise ValueError('Category stats do not exist') + per_category_ap = OrderedDict([]) + if self.GetAgnosticMode(): + return summary_metrics, per_category_ap + for category_index, category_id in enumerate(self.GetCategoryIdList()): + category = self.GetCategory(category_id)['name'] + # Kept for backward compatilbility + # pylint: disable=no-member + per_category_ap['PerformanceByCategory/mAP/{}'.format( + category)] = self.category_stats[0][category_index] + if all_metrics_per_category: + per_category_ap['Precision mAP ByCategory/{}'.format( + category)] = self.category_stats[0][category_index] + per_category_ap['Precision mAP@.50IOU ByCategory/{}'.format( + category)] = self.category_stats[1][category_index] + per_category_ap['Precision mAP@.75IOU ByCategory/{}'.format( + category)] = self.category_stats[2][category_index] + per_category_ap['Precision mAP (small) ByCategory/{}'.format( + category)] = self.category_stats[3][category_index] + per_category_ap['Precision mAP (medium) ByCategory/{}'.format( + category)] = self.category_stats[4][category_index] + per_category_ap['Precision mAP (large) ByCategory/{}'.format( + category)] = self.category_stats[5][category_index] + per_category_ap['Recall AR@1 ByCategory/{}'.format( + category)] = self.category_stats[6][category_index] + per_category_ap['Recall AR@10 ByCategory/{}'.format( + category)] = self.category_stats[7][category_index] + per_category_ap['Recall AR@100 ByCategory/{}'.format( + category)] = self.category_stats[8][category_index] + per_category_ap['Recall AR@100 (small) ByCategory/{}'.format( + category)] = self.category_stats[9][category_index] + per_category_ap['Recall AR@100 (medium) ByCategory/{}'.format( + category)] = self.category_stats[10][category_index] + per_category_ap['Recall AR@100 (large) ByCategory/{}'.format( + category)] = self.category_stats[11][category_index] + + return summary_metrics, per_category_ap + + +def _ConvertBoxToCOCOFormat(box): + """Convert a box in [ymin, xmin, ymax, xmax] format to COCO format. + This is a utility function for converting from our internal + [ymin, xmin, ymax, xmax] convention to the convention used by the COCO API + i.e., [xmin, ymin, width, height]. + Args: + box: a numpy array in format of [ymin, xmin, ymax, xmax] + Returns: + A list of floats, in COCO format, representing [xmin, ymin, width, height] + """ + return [ + float(box[1]), + float(box[0]), + float(box[3] - box[1]), + float(box[2] - box[0]) + ] + + +def _RleCompress(masks): + """Compresses mask using Run-length encoding provided by pycocotools. + Args: + masks: uint8 numpy array of shape [mask_height, mask_width] with values in + {0, 1}. + Returns: + A pycocotools Run-length encoding of the mask. + """ + return mask.encode(np.asfortranarray(masks)) + + +def ExportSingleImageGroundtruthToCoco(image_id: Union[int, str], + next_annotation_id: int, + category_id_set: Set[str], + groundtruth_boxes: np.array, + groundtruth_classes: np.array, + groundtruth_masks: Union[np.array, None] = None, + groundtruth_is_crowd: Union[np.array, None] = None) -> list: + """Export groundtruth of a single image to COCO format. + This function converts groundtruth detection annotations represented as numpy + arrays to dictionaries that can be ingested by the COCO evaluation API. Note + that the image_ids provided here must match the ones given to + ExportSingleImageDetectionsToCoco. We assume that boxes and classes are in + correspondence - that is: groundtruth_boxes[i, :], and + groundtruth_classes[i] are associated with the same groundtruth annotation. + In the exported result, "area" fields are always set to the area of the + groundtruth bounding box. + Args: + image_id: a unique image identifier either of type integer or string. + next_annotation_id: integer specifying the first id to use for the + groundtruth annotations. All annotations are assigned a continuous integer + id starting from this value. + category_id_set: A set of valid class ids. Groundtruth with classes not in + category_id_set are dropped. + groundtruth_boxes: numpy array (float32) with shape [num_gt_boxes, 4] + groundtruth_classes: numpy array (int) with shape [num_gt_boxes] + groundtruth_masks: optional uint8 numpy array of shape [num_detections, + image_height, image_width] containing detection_masks. + groundtruth_is_crowd: optional numpy array (int) with shape [num_gt_boxes] + indicating whether groundtruth boxes are crowd. + Returns: + A list of groundtruth annotations for a single image in the COCO format. + Raises: + ValueError: if (1) groundtruth_boxes and groundtruth_classes do not have the + right lengths or (2) if each of the elements inside these lists do not + have the correct shapes or (3) if image_ids are not integers + """ + if len(groundtruth_classes.shape) != 1: + raise ValueError('groundtruth_classes is ' 'expected to be of rank 1.') + if len(groundtruth_boxes.shape) != 2: + raise ValueError('groundtruth_boxes is expected to be of ' 'rank 2.') + if groundtruth_boxes.shape[1] != 4: + raise ValueError('groundtruth_boxes should have ' 'shape[1] == 4.') + num_boxes = groundtruth_classes.shape[0] + if num_boxes != groundtruth_boxes.shape[0]: + raise ValueError( + 'Corresponding entries in groundtruth_classes, ' + 'and groundtruth_boxes should have ' + 'compatible shapes (i.e., agree on the 0th dimension).' + 'Classes shape: %d. Boxes shape: %d. Image ID: %s' % + (groundtruth_classes.shape[0], groundtruth_boxes.shape[0], + image_id)) + has_is_crowd = groundtruth_is_crowd is not None + if has_is_crowd and len(groundtruth_is_crowd.shape) != 1: + raise ValueError('groundtruth_is_crowd is expected to be of rank 1.') + groundtruth_list = [] + for i in range(num_boxes): + if groundtruth_classes[i] in category_id_set: + iscrowd = groundtruth_is_crowd[i] if has_is_crowd else 0 + export_dict = { + 'id': + next_annotation_id + i, + 'image_id': + image_id, + 'category_id': + int(groundtruth_classes[i]), + 'bbox': + list(_ConvertBoxToCOCOFormat(groundtruth_boxes[i, :])), + 'area': + float((groundtruth_boxes[i, 2] - groundtruth_boxes[i, 0]) * + (groundtruth_boxes[i, 3] - groundtruth_boxes[i, 1])), + 'iscrowd': + iscrowd + } + if groundtruth_masks is not None: + export_dict['segmentation'] = _RleCompress( + groundtruth_masks[i]) + groundtruth_list.append(export_dict) + return groundtruth_list + + +def ExportSingleImageDetectionBoxesToCoco(image_id: Union[int, str], + category_id_set: Set[int], + detection_boxes: np.array, + detection_scores: np.array, + detection_classes: np.array) -> list: + """Export detections of a single image to COCO format. + This function converts detections represented as numpy arrays to dictionaries + that can be ingested by the COCO evaluation API. Note that the image_ids + provided here must match the ones given to the + ExporSingleImageDetectionBoxesToCoco. We assume that boxes, and classes are in + correspondence - that is: boxes[i, :], and classes[i] + are associated with the same groundtruth annotation. + Args: + image_id: unique image identifier either of type integer or string. + category_id_set: A set of valid class ids. Detections with classes not in + category_id_set are dropped. + detection_boxes: float numpy array of shape [num_detections, 4] containing + detection boxes. + detection_scores: float numpy array of shape [num_detections] containing + scored for the detection boxes. + detection_classes: integer numpy array of shape [num_detections] containing + the classes for detection boxes. + Returns: + A list of detection annotations for a single image in the COCO format. + Raises: + ValueError: if (1) detection_boxes, detection_scores and detection_classes + do not have the right lengths or (2) if each of the elements inside these + lists do not have the correct shapes or (3) if image_ids are not integers. + """ + if len(detection_classes.shape) != 1 or len(detection_scores.shape) != 1: + raise ValueError( + 'All entries in detection_classes and detection_scores' + 'expected to be of rank 1.') + if len(detection_boxes.shape) != 2: + raise ValueError('All entries in detection_boxes expected to be of ' + 'rank 2.') + if detection_boxes.shape[1] != 4: + raise ValueError('All entries in detection_boxes should have ' + 'shape[1] == 4.') + num_boxes = detection_classes.shape[0] + if not num_boxes == detection_boxes.shape[0] == detection_scores.shape[0]: + raise ValueError( + 'Corresponding entries in detection_classes, ' + 'detection_scores and detection_boxes should have ' + 'compatible shapes (i.e., agree on the 0th dimension). ' + 'Classes shape: %d. Boxes shape: %d. ' + 'Scores shape: %d' % + (detection_classes.shape[0], detection_boxes.shape[0], + detection_scores.shape[0])) + detections_list = [] + for i in range(num_boxes): + if detection_classes[i] in category_id_set: + detections_list.append({ + 'image_id': + image_id, + 'category_id': + int(detection_classes[i]), + 'bbox': + list(_ConvertBoxToCOCOFormat(detection_boxes[i, :])), + 'score': + float(detection_scores[i]) + }) + return detections_list + + +def ExportSingleImageDetectionMasksToCoco(image_id: Union[str, int], + category_id_set: Set[int], + detection_masks: np.array, + detection_scores: np.array, + detection_classes: np.array) -> list: + """Export detection masks of a single image to COCO format. + This function converts detections represented as numpy arrays to dictionaries + that can be ingested by the COCO evaluation API. We assume that + detection_masks, detection_scores, and detection_classes are in correspondence + - that is: detection_masks[i, :], detection_classes[i] and detection_scores[i] + are associated with the same annotation. + Args: + image_id: unique image identifier either of type integer or string. + category_id_set: A set of valid class ids. Detections with classes not in + category_id_set are dropped. + detection_masks: uint8 numpy array of shape [num_detections, image_height, + image_width] containing detection_masks. + detection_scores: float numpy array of shape [num_detections] containing + scores for detection masks. + detection_classes: integer numpy array of shape [num_detections] containing + the classes for detection masks. + Returns: + A list of detection mask annotations for a single image in the COCO format. + Raises: + ValueError: if (1) detection_masks, detection_scores and detection_classes + do not have the right lengths or (2) if each of the elements inside these + lists do not have the correct shapes or (3) if image_ids are not integers. + """ + if len(detection_classes.shape) != 1 or len(detection_scores.shape) != 1: + raise ValueError( + 'All entries in detection_classes and detection_scores' + 'expected to be of rank 1.') + num_boxes = detection_classes.shape[0] + if not num_boxes == len(detection_masks) == detection_scores.shape[0]: + raise ValueError('Corresponding entries in detection_classes, ' + 'detection_scores and detection_masks should have ' + 'compatible lengths and shapes ' + 'Classes length: %d. Masks length: %d. ' + 'Scores length: %d' % + (detection_classes.shape[0], len(detection_masks), + detection_scores.shape[0])) + detections_list = [] + for i in range(num_boxes): + if detection_classes[i] in category_id_set: + detections_list.append({ + 'image_id': + image_id, + 'category_id': + int(detection_classes[i]), + 'segmentation': + _RleCompress(detection_masks[i]), + 'score': + float(detection_scores[i]) + }) + return detections_list \ No newline at end of file diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/yolov3/quantization/ptq_static/label_map.yaml b/examples/onnxrt/object_detection/onnx_model_zoo/yolov3/quantization/ptq_static/label_map.yaml new file mode 100644 index 00000000000..f8015ec76b3 --- /dev/null +++ b/examples/onnxrt/object_detection/onnx_model_zoo/yolov3/quantization/ptq_static/label_map.yaml @@ -0,0 +1,80 @@ +person: 0 +bicycle: 1 +car: 2 +motorcycle: 3 +airplane: 4 +bus: 5 +train: 6 +truck: 7 +boat: 8 +traffic light: 9 +fire hydrant: 10 +stop sign: 11 +parking meter: 12 +bench: 13 +bird: 14 +cat: 15 +dog: 16 +horse: 17 +sheep: 18 +cow: 19 +elephant: 20 +bear: 21 +zebra: 22 +giraffe: 23 +backpack: 24 +umbrella: 25 +handbag: 26 +tie: 27 +suitcase: 28 +frisbee: 29 +skis: 30 +snowboard: 31 +sports ball: 32 +kite: 33 +baseball bat: 34 +baseball glove: 35 +skateboard: 36 +surfboard: 37 +tennis racket: 38 +bottle: 39 +wine glass: 40 +cup: 41 +fork: 42 +knife: 43 +spoon: 44 +bowl: 45 +banana: 46 +apple: 47 +sandwich: 48 +orange: 49 +broccoli: 50 +carrot: 51 +hot dog: 52 +pizza: 53 +donut: 54 +cake: 55 +chair: 56 +couch: 57 +potted plant: 58 +bed: 59 +dining table: 60 +toilet: 61 +tv: 62 +laptop: 63 +mouse: 64 +remote: 65 +keyboard: 66 +cell phone: 67 +microwave: 68 +oven: 69 +toaster: 70 +sink: 71 +refrigerator: 72 +book: 73 +clock: 74 +vase: 75 +scissors: 76 +teddy bear: 77 +hair drier: 78 +toothbrush: 79 diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/yolov3/quantization/ptq_static/main.py b/examples/onnxrt/object_detection/onnx_model_zoo/yolov3/quantization/ptq_static/main.py new file mode 100644 index 00000000000..932b0a435d8 --- /dev/null +++ b/examples/onnxrt/object_detection/onnx_model_zoo/yolov3/quantization/ptq_static/main.py @@ -0,0 +1,403 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# pylint:disable=redefined-outer-name,logging-format-interpolation + + +import logging +import argparse + +import onnx +from PIL import Image +import math +import numpy as np +import os +import onnxruntime as ort +import yaml + +logger = logging.getLogger(__name__) +logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s - %(message)s', + datefmt = '%m/%d/%Y %H:%M:%S', + level = logging.WARN) +logger.info("Evaluating ONNXRuntime full precision accuracy and performance:") +parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter +) +parser.add_argument( + '--data_path', + type=str, + help="Path of COCO dataset, it contains val2017 and annotations subfolder" +) +parser.add_argument( + '--label_path', + type=str, + default='label_map.yaml', + help="Path of label map yaml file" +) +parser.add_argument( + '--model_path', + type=str, + help="Pre-trained model on onnx file" +) +parser.add_argument( + '--benchmark', + action='store_true', \ + default=False +) +parser.add_argument( + '--tune', + action='store_true', \ + default=False, + help="whether quantize the model" +) +parser.add_argument( + '--config', + type=str, + help="config yaml path" +) +parser.add_argument( + '--output_model', + type=str, + help="output model path" +) +parser.add_argument( + '--mode', + type=str, + help="benchmark mode of performance or accuracy" +) +parser.add_argument( + '--quant_format', + type=str, + choices=['QOperator', 'QDQ'], + help="quantization format" +) +parser.add_argument( + '--batch_size', + type=int, + default=1, + help="quantization format" +) +args = parser.parse_args() + +class Dataloader: + def __init__(self, root, batch_size=1, size=416, img_dir='val2017', \ + anno_dir='annotations/instances_val2017.json'): + import json + import os + import numpy as np + from pycocotools.coco import COCO + from coco_label_map import category_map + self.batch_size = batch_size + self.image_list = [] + self.model_image_size = (size, size) + img_path = os.path.join(root, img_dir) + anno_path = os.path.join(root, anno_dir) + coco = COCO(anno_path) + img_ids = coco.getImgIds() + cat_ids = coco.getCatIds() + for idx, img_id in enumerate(img_ids): + img_info = {} + bboxes = [] + labels = [] + ids = [] + img_detail = coco.loadImgs(img_id)[0] + ids.append(img_detail['file_name'].encode('utf-8')) + pic_height = img_detail['height'] + pic_width = img_detail['width'] + + ann_ids = coco.getAnnIds(imgIds=img_id,catIds=cat_ids) + anns = coco.loadAnns(ann_ids) + for ann in anns: + bbox = ann['bbox'] + if len(bbox) == 0: + continue + bboxes.append([bbox[1], bbox[0], bbox[1]+bbox[3], bbox[2]+bbox[0]]) + labels.append(category_map[ann['category_id']].encode('utf8')) + img_file = os.path.join(img_path, img_detail['file_name']) + if not os.path.exists(img_file) or len(bboxes) == 0: + continue + + if filter and not filter(None, bboxes): + continue + label = [np.array([bboxes]), np.array([labels]), np.zeros((1,0)), np.array([img_detail['file_name'].encode('utf-8')])] + with Image.open(img_file) as image: + image = image.convert('RGB') + data, label = self.preprocess((image, label)) + self.image_list.append((data, label)) + + def __iter__(self): + for item in self.image_list: + yield item + + def letterbox_image(self, image, size): + '''resize image with unchanged aspect ratio using padding''' + iw, ih = image.size + w, h = size + scale = min(w/iw, h/ih) + nw = int(iw*scale) + nh = int(ih*scale) + + image = image.resize((nw,nh), Image.BICUBIC) + new_image = Image.new('RGB', size, (128,128,128)) + new_image.paste(image, ((w-nw)//2, (h-nh)//2)) + return new_image + + def preprocess(self, sample): + image, label = sample + boxed_image = self.letterbox_image(image, tuple(reversed(self.model_image_size))) + image_data = np.array(boxed_image, dtype='float32') + image_data /= 255. + image_data = np.transpose(image_data, [2, 0, 1]) + image_data = np.expand_dims(image_data, 0) + image_size = np.array([image.size[1], image.size[0]], dtype=np.float32).reshape(1, 2) + return (image_data, image_size), label + +class COCOmAPv2(): + """Compute mean average precision of the detection task.""" + + def __init__(self, + anno_path=None, + iou_thrs='0.5:0.05:0.95', + map_points=101, + map_key='DetectionBoxes_Precision/mAP', + output_index_mapping={'num_detections':-1, 'boxes':0, 'scores':1, 'classes':2}): + """Initialize the metric. + Args: + anno_path: The path of annotation file. + iou_thrs: Minimal value for intersection over union that allows to make decision + that prediction bounding box is true positive. You can specify one float value + between 0 to 1 or string "05:0.05:0.95" for standard COCO thresholds. + map_points: The way to calculate mAP. 101 for 101-point interpolated AP, 11 for + 11-point interpolated AP, 0 for area under PR curve. + map_key: The key that mapping to pycocotools COCOeval. + Defaults to 'DetectionBoxes_Precision/mAP'. + output_index_mapping: The output index mapping. + Defaults to {'num_detections':-1, 'boxes':0, 'scores':1, 'classes':2}. + """ + self.output_index_mapping = output_index_mapping + from coco_label_map import category_map + if anno_path: + assert os.path.exists(anno_path), 'Annotation path does not exists!' + with open(anno_path, 'r') as f: + label_map = yaml.safe_load(f.read()) + self.category_map_reverse = {k: v for k,v in label_map.items()} + else: + # label: index + self.category_map_reverse = {v: k for k, v in category_map.items()} + self.image_ids = [] + self.ground_truth_list = [] + self.detection_list = [] + self.annotation_id = 1 + self.category_map = category_map + self.category_id_set = set( + [cat for cat in self.category_map]) #index + self.iou_thrs = iou_thrs + self.map_points = map_points + self.map_key = map_key + + def update(self, predicts, labels, sample_weight=None): + """Add the predictions and labels. + Args: + predicts: The predictions. + labels: The labels corresponding to the predictions. + sample_weight: The sample weight. Defaults to None. + """ + from coco_tools import ExportSingleImageGroundtruthToCoco,\ + ExportSingleImageDetectionBoxesToCoco + detections = [] + if 'num_detections' in self.output_index_mapping and \ + self.output_index_mapping['num_detections'] > -1: + for item in zip(*predicts): + detection = {} + num = int(item[self.output_index_mapping['num_detections']]) + detection['boxes'] = np.asarray( + item[self.output_index_mapping['boxes']])[0:num] + detection['scores'] = np.asarray( + item[self.output_index_mapping['scores']])[0:num] + detection['classes'] = np.asarray( + item[self.output_index_mapping['classes']])[0:num] + detections.append(detection) + else: + for item in zip(*predicts): + detection = {} + detection['boxes'] = np.asarray(item[self.output_index_mapping['boxes']]) + detection['scores'] = np.asarray(item[self.output_index_mapping['scores']]) + detection['classes'] = np.asarray(item[self.output_index_mapping['classes']]) + detections.append(detection) + + bboxes, str_labels,int_labels, image_ids = labels + labels = [] + if len(int_labels[0]) == 0: + for str_label in str_labels: + str_label = [ + x if type(x) == 'str' else x.decode('utf-8') + for x in str_label + ] + labels.append([self.category_map_reverse[x] for x in str_label]) + elif len(str_labels[0]) == 0: + for int_label in int_labels: + labels.append([x for x in int_label]) + + for idx, image_id in enumerate(image_ids): + image_id = image_id if type( + image_id) == 'str' else image_id.decode('utf-8') + if image_id in self.image_ids: + continue + self.image_ids.append(image_id) + + ground_truth = {} + ground_truth['boxes'] = np.asarray(bboxes[idx]) + ground_truth['classes'] = np.asarray(labels[idx]) + + self.ground_truth_list.extend( + ExportSingleImageGroundtruthToCoco( + image_id=image_id, + next_annotation_id=self.annotation_id, + category_id_set=self.category_id_set, + groundtruth_boxes=ground_truth['boxes'], + groundtruth_classes=ground_truth['classes'])) + self.annotation_id += ground_truth['boxes'].shape[0] + + self.detection_list.extend( + ExportSingleImageDetectionBoxesToCoco( + image_id=image_id, + category_id_set=self.category_id_set, + detection_boxes=detections[idx]['boxes'], + detection_scores=detections[idx]['scores'], + detection_classes=detections[idx]['classes'])) + + def reset(self): + """Reset the prediction and labels.""" + self.image_ids = [] + self.ground_truth_list = [] + self.detection_list = [] + self.annotation_id = 1 + + def result(self): + """Compute mean average precision. + Returns: + The mean average precision score. + """ + from coco_tools import COCOWrapper, COCOEvalWrapper + if len(self.ground_truth_list) == 0: + logger.warning("Sample num during evaluation is 0.") + return 0 + else: + groundtruth_dict = { + 'annotations': + self.ground_truth_list, + 'images': [{ + 'id': image_id + } for image_id in self.image_ids], + 'categories': [{ + 'id': k, + 'name': v + } for k, v in self.category_map.items()] + } + coco_wrapped_groundtruth = COCOWrapper(groundtruth_dict) + coco_wrapped_detections = coco_wrapped_groundtruth.LoadAnnotations( + self.detection_list) + box_evaluator = COCOEvalWrapper(coco_wrapped_groundtruth, + coco_wrapped_detections, + agnostic_mode=False, + iou_thrs = self.iou_thrs, + map_points = self.map_points) + box_metrics, box_per_category_ap = box_evaluator.ComputeMetrics( + include_metrics_per_category=False, all_metrics_per_category=False) + box_metrics.update(box_per_category_ap) + box_metrics = { + 'DetectionBoxes_' + key: value + for key, value in iter(box_metrics.items()) + } + + return box_metrics[self.map_key] + +class Post: + def __call__(self, sample): + preds, labels = sample + boxes, scores, indices = preds + out_boxes, out_scores, out_classes = [], [], [] + if len(indices) == 0: + return ([np.zeros((0,4))], [[]], [[]]), labels + for idx_ in indices: + out_classes.append(idx_[1]) + out_scores.append(scores[tuple(idx_)]) + idx_1 = (idx_[0], idx_[2]) + out_boxes.append(boxes[idx_1]) + return ([out_boxes], [out_scores], [out_classes]), labels + +if __name__ == "__main__": + model = onnx.load(args.model_path) + dataloader = Dataloader(args.data_path, batch_size=args.batch_size) + metric = COCOmAPv2(anno_path=args.label_path, output_index_mapping={'boxes':0, 'scores':1, 'classes':2}) + postprocess = Post() + + def eval_func(model): + metric.reset() + session = ort.InferenceSession(model.SerializeToString(), + providers=ort.get_available_providers()) + ort_inputs = {} + len_inputs = len(session.get_inputs()) + inputs_names = [session.get_inputs()[i].name for i in range(len_inputs)] + for idx, (inputs, labels) in enumerate(dataloader): + if not isinstance(labels, list): + labels = [labels] + if len_inputs == 1: + ort_inputs.update( + inputs if isinstance(inputs, dict) else {inputs_names[0]: inputs} + ) + else: + assert len_inputs == len(inputs), 'number of input tensors must align with graph inputs' + if isinstance(inputs, dict): + ort_inputs.update(inputs) + else: + for i in range(len_inputs): + if not isinstance(inputs[i], np.ndarray): + ort_inputs.update({inputs_names[i]: np.array(inputs[i])}) + else: + ort_inputs.update({inputs_names[i]: inputs[i]}) + predictions = session.run(None, ort_inputs) + predictions, labels = postprocess((predictions, labels)) + metric.update(predictions, labels) + return metric.result() + + if args.benchmark: + if args.mode == 'performance': + from neural_compressor.benchmark import fit + from neural_compressor.config import BenchmarkConfig + conf = BenchmarkConfig(iteration=100, + cores_per_instance=4, + num_of_instance=1) + fit(model, conf, b_dataloader=dataloader) + elif args.mode == 'accuracy': + acc_result = eval_func(model) + print("Batch size = %d" % args.batch_size) + print("Accuracy: %.5f" % acc_result) + + if args.tune: + from neural_compressor import quantization + from neural_compressor.config import AccuracyCriterion, PostTrainingQuantConfig + accuracy_criterion = AccuracyCriterion() + accuracy_criterion.absolute = 0.02 + config = PostTrainingQuantConfig(approach='static', + quant_format=args.quant_format, + accuracy_criterion=accuracy_criterion, + recipes={'first_conv_or_matmul_quantization': False, + 'last_conv_or_matmul_quantization': False, + 'pre_post_process_quantization': False}) + q_model = quantization.fit(model, config, calib_dataloader=dataloader, eval_func=eval_func) + q_model.save(args.output_model) + \ No newline at end of file diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/yolov3/quantization/ptq_static/requirements.txt b/examples/onnxrt/object_detection/onnx_model_zoo/yolov3/quantization/ptq_static/requirements.txt new file mode 100644 index 00000000000..d92c94766dc --- /dev/null +++ b/examples/onnxrt/object_detection/onnx_model_zoo/yolov3/quantization/ptq_static/requirements.txt @@ -0,0 +1,4 @@ +onnx +onnxruntime +onnxruntime-extensions; python_version < '3.10' +pillow>=8.2.0 # not directly required, pinned by Snyk to avoid a vulnerability diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/yolov3/quantization/ptq_static/run_benchmark.sh b/examples/onnxrt/object_detection/onnx_model_zoo/yolov3/quantization/ptq_static/run_benchmark.sh new file mode 100644 index 00000000000..f17716e49ce --- /dev/null +++ b/examples/onnxrt/object_detection/onnx_model_zoo/yolov3/quantization/ptq_static/run_benchmark.sh @@ -0,0 +1,51 @@ +#!/bin/bash +set -x + +function main { + init_params "$@" + run_benchmark + +} + +# init params +function init_params { + + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + --label_path=*) + label_path=$(echo $var |cut -f2 -d=) + ;; + --mode=*) + mode=$(echo $var |cut -f2 -d=) + ;; + --batch_size=*) + batch_size=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_benchmark +function run_benchmark { + if [ ! $label_path ]; then + label_path='label_map.yaml' + fi + + python main.py \ + --model_path ${input_model} \ + --mode ${mode} \ + --data_path ${dataset_location} \ + --label_path ${label_path} \ + --batch_size ${batch_size} \ + --benchmark +} + +main "$@" \ No newline at end of file diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/yolov3/quantization/ptq_static/run_tuning.sh b/examples/onnxrt/object_detection/onnx_model_zoo/yolov3/quantization/ptq_static/run_tuning.sh new file mode 100644 index 00000000000..161010ad4bc --- /dev/null +++ b/examples/onnxrt/object_detection/onnx_model_zoo/yolov3/quantization/ptq_static/run_tuning.sh @@ -0,0 +1,51 @@ +#!/bin/bash +set -x + +function main { + init_params "$@" + run_tuning + +} + +# init params +function init_params { + + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + --label_path=*) + label_path=$(echo $var |cut -f2 -d=) + ;; + --output_model=*) + output_model=$(echo $var |cut -f2 -d=) + ;; + --quant_format=*) + quant_format=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_tuning +function run_tuning { + if [ ! $label_path ]; then + label_path='label_map.yaml' + fi + + python main.py \ + --model_path ${input_model} \ + --output_model ${output_model} \ + --data_path ${dataset_location} \ + --label_path ${label_path} \ + --quant_format ${quant_format} \ + --tune +} + +main "$@" \ No newline at end of file diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/yolov4/quantization/ptq_static/README.md b/examples/onnxrt/object_detection/onnx_model_zoo/yolov4/quantization/ptq_static/README.md new file mode 100644 index 00000000000..38f33ae5467 --- /dev/null +++ b/examples/onnxrt/object_detection/onnx_model_zoo/yolov4/quantization/ptq_static/README.md @@ -0,0 +1,61 @@ +Step-by-Step +============ + +This example load an object detection model converted from [ONNX Model Zoo](https://github.com/onnx/models) and confirm its accuracy and speed based on [MS COCO 2017 dataset](https://cocodataset.org/#download). + +# Prerequisite + +## 1. Environment + +```shell +pip install neural-compressor +pip install -r requirements.txt +``` +> Note: Validated ONNX Runtime [Version](/docs/source/installation_guide.md#validated-software-environment). + +## 2. Prepare Model + +Download model from [ONNX Model Zoo](https://github.com/onnx/models) + +```shell +wget https://github.com/onnx/models/raw/main/vision/object_detection_segmentation/yolov4/model/yolov4.onnx +``` + +## 3. Prepare Dataset + +Download [MS COCO 2017 dataset](https://cocodataset.org/#download). + +Dataset directories: + +```bash +coco2017 +├── annotations +| ├── instances_val2017.json +| └── ... +├── test2017 +├── train2017 +└── val2017 +``` + +# Run + +## 1. Quantization + +Static quantization with QOperator format: + +```bash +bash run_tuning.sh --input_model=path/to/model \ # model path as *.onnx + --output_model=path/to/save \ # model path as *.onnx + --dataset_location=path/to/coco2017 \ # dataset path containing 'val2017' and 'annotations' folders + --label_path=label_map.yaml \ + --quant_format="QOperator" +``` + +## 2. Benchmark + +```bash +bash run_benchmark.sh --input_model=path/to/model \ # model path as *.onnx + ---dataset_location=path/to/coco2017 \ # dataset path containing 'val2017' and 'annotations' folders + --label_path=label_map.yaml \ + --mode=performance # or accuracy +``` diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/yolov4/quantization/ptq_static/coco_label_map.py b/examples/onnxrt/object_detection/onnx_model_zoo/yolov4/quantization/ptq_static/coco_label_map.py new file mode 100644 index 00000000000..1e88f8abad8 --- /dev/null +++ b/examples/onnxrt/object_detection/onnx_model_zoo/yolov4/quantization/ptq_static/coco_label_map.py @@ -0,0 +1,103 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# + +"""The dict mapping category IDs to its names of labels.""" + +category_map = { + 1: 'person', + 2: 'bicycle', + 3: 'car', + 4: 'motorcycle', + 5: 'airplane', + 6: 'bus', + 7: 'train', + 8: 'truck', + 9: 'boat', + 10: 'traffic light', + 11: 'fire hydrant', + 13: 'stop sign', + 14: 'parking meter', + 15: 'bench', + 16: 'bird', + 17: 'cat', + 18: 'dog', + 19: 'horse', + 20: 'sheep', + 21: 'cow', + 22: 'elephant', + 23: 'bear', + 24: 'zebra', + 25: 'giraffe', + 27: 'backpack', + 28: 'umbrella', + 31: 'handbag', + 32: 'tie', + 33: 'suitcase', + 34: 'frisbee', + 35: 'skis', + 36: 'snowboard', + 37: 'sports ball', + 38: 'kite', + 39: 'baseball bat', + 40: 'baseball glove', + 41: 'skateboard', + 42: 'surfboard', + 43: 'tennis racket', + 44: 'bottle', + 46: 'wine glass', + 47: 'cup', + 48: 'fork', + 49: 'knife', + 50: 'spoon', + 51: 'bowl', + 52: 'banana', + 53: 'apple', + 54: 'sandwich', + 55: 'orange', + 56: 'broccoli', + 57: 'carrot', + 58: 'hot dog', + 59: 'pizza', + 60: 'donut', + 61: 'cake', + 62: 'chair', + 63: 'couch', + 64: 'potted plant', + 65: 'bed', + 67: 'dining table', + 70: 'toilet', + 72: 'tv', + 73: 'laptop', + 74: 'mouse', + 75: 'remote', + 76: 'keyboard', + 77: 'cell phone', + 78: 'microwave', + 79: 'oven', + 80: 'toaster', + 81: 'sink', + 82: 'refrigerator', + 84: 'book', + 85: 'clock', + 86: 'vase', + 87: 'scissors', + 88: 'teddy bear', + 89: 'hair drier', + 90: 'toothbrush' +} \ No newline at end of file diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/yolov4/quantization/ptq_static/coco_tools.py b/examples/onnxrt/object_detection/onnx_model_zoo/yolov4/quantization/ptq_static/coco_tools.py new file mode 100644 index 00000000000..2c57fd61302 --- /dev/null +++ b/examples/onnxrt/object_detection/onnx_model_zoo/yolov4/quantization/ptq_static/coco_tools.py @@ -0,0 +1,672 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +"""Wrappers for third party pycocotools to be used within object_detection. +Note that nothing in this file is tensorflow related and thus cannot +be called directly as a slim metric, for example. +TODO(jonathanhuang): wrap as a slim metric in metrics.py +Usage example: given a set of images with ids in the list image_ids +and corresponding lists of numpy arrays encoding groundtruth (boxes and classes) +and detections (boxes, scores and classes), where elements of each list +correspond to detections/annotations of a single image, +then evaluation (in multi-class mode) can be invoked as follows: + groundtruth_dict = coco_tools.ExportGroundtruthToCOCO( + image_ids, groundtruth_boxes_list, groundtruth_classes_list, + max_num_classes, output_path=None) + detections_list = coco_tools.ExportDetectionsToCOCO( + image_ids, detection_boxes_list, detection_scores_list, + detection_classes_list, output_path=None) + groundtruth = coco_tools.COCOWrapper(groundtruth_dict) + detections = groundtruth.LoadAnnotations(detections_list) + evaluator = coco_tools.COCOEvalWrapper(groundtruth, detections, + agnostic_mode=False) + metrics = evaluator.ComputeMetrics() +""" + +import copy +import time + +import numpy as np + +from collections import OrderedDict +from neural_compressor.utils import logger +from pycocotools import coco +from pycocotools import cocoeval +from pycocotools import mask +from typing import Any, Dict, List, Set, Union + + +class COCOWrapper(coco.COCO): + """Wrapper for the pycocotools COCO class. + + Attributes: + dataset: a dictionary holding bounding box annotations in the COCO format. + detection_type: type of detections being wrapped. Can be one of ['bbox', + 'segmentation'] + """ + + def __init__(self, dataset: Dict[str, Any], detection_type: str = 'bbox'): + """Construct a COCOWrapper. + See http://mscoco.org/dataset/#format for a description of the format. + By default, the coco.COCO class constructor reads from a JSON file. + This function duplicates the same behavior but loads from a dictionary, + allowing us to perform evaluation without writing to external storage. + Args: + dataset: a dictionary holding bounding box annotations in the COCO format. + detection_type: type of detections being wrapped. Can be one of ['bbox', + 'segmentation'] + Raises: + ValueError: if detection_type is unsupported. + """ + supported_detection_types = ['bbox', 'segmentation'] + if detection_type not in supported_detection_types: + raise ValueError('Unsupported detection type: {}. ' + 'Supported values are: {}'.format( + detection_type, supported_detection_types)) + self._detection_type = detection_type + coco.COCO.__init__(self) + self.dataset = dataset + self.createIndex() + + def LoadAnnotations(self, annotations: list) -> coco.COCO: + """Load annotations dictionary into COCO datastructure. + See http://mscoco.org/dataset/#format for a description of the annotations + format. As above, this function replicates the default behavior of the API + but does not require writing to external storage. + Args: + annotations: python list holding object detection results where each + detection is encoded as a dict with required keys ['image_id', + 'category_id', 'score'] and one of ['bbox', 'segmentation'] based on + `detection_type`. + Returns: + a coco.COCO datastructure holding object detection annotations results + Raises: + ValueError: if (1) annotations is not a list or annotations do not + correspond to the images contained in self. + """ + results = coco.COCO() + results.dataset['images'] = [img for img in self.dataset['images']] + + logger.info("Load and prepare annotation results.") + tic = time.time() + + if not isinstance(annotations, list): + raise ValueError('annotations is not a list of objects') + annotation_img_ids = [ann['image_id'] for ann in annotations] + if (set(annotation_img_ids) != (set(annotation_img_ids) + & set(self.getImgIds()))): + raise ValueError('Results do not correspond to current coco set') + results.dataset['categories'] = copy.deepcopy( + self.dataset['categories']) + if self._detection_type == 'bbox': + for idx, ann in enumerate(annotations): + bb = ann['bbox'] + ann['area'] = bb[2] * bb[3] + ann['id'] = idx + 1 + ann['iscrowd'] = 0 + elif self._detection_type == 'segmentation': + for idx, ann in enumerate(annotations): + ann['area'] = mask.area(ann['segmentation']) + ann['bbox'] = mask.toBbox(ann['segmentation']) + ann['id'] = idx + 1 + ann['iscrowd'] = 0 + logger.info('DONE (t=%0.2fs)', (time.time() - tic)) + + results.dataset['annotations'] = annotations + results.createIndex() + return results + + +class COCOEvalWrapper(cocoeval.COCOeval): + """Wrapper for the pycocotools COCOeval class. + To evaluate, create two objects (groundtruth_dict and detections_list) + using the conventions listed at http://mscoco.org/dataset/#format. + Then call evaluation as follows: + groundtruth = coco_tools.COCOWrapper(groundtruth_dict) + detections = groundtruth.LoadAnnotations(detections_list) + evaluator = coco_tools.COCOEvalWrapper(groundtruth, detections, + agnostic_mode=False) + metrics = evaluator.ComputeMetrics() + """ + + def __init__(self, + groundtruth: coco.COCO = None, + detections: coco.COCO = None, + agnostic_mode = False, + iou_type: str = 'bbox', + iou_thrs: Union[str, float] = None, + map_points=None): + """Construct a COCOEvalWrapper. + Note that for the area-based metrics to be meaningful, detection and + groundtruth boxes must be in image coordinates measured in pixels. + Args: + groundtruth: a coco.COCO (or coco_tools.COCOWrapper) object holding + groundtruth annotations + detections: a coco.COCO (or coco_tools.COCOWrapper) object holding + detections + agnostic_mode: boolean (default: False). If True, evaluation ignores + class labels, treating all detections as proposals. + iou_thrs: Minimal value for intersection over union that allows to + make decision that prediction bounding box is true positive. + You can specify one float value between 0 to 1 or + string "05:0.05:0.95" for standard COCO thresholds. + iou_type: IOU type to use for evaluation. Supports `bbox` or `segm`. + map_points: The way to calculate mAP. 101 for 101-point interpolated AP, 11 for + 11-point interpolated AP, 0 for area under PR curve. + """ + cocoeval.COCOeval.__init__(self, + groundtruth, + detections, + iouType=iou_type) + if agnostic_mode: + self.params.useCats = 0 + if iou_thrs == '0.5:0.05:0.95': + self.params.iouThrs = np.linspace(.5, 0.95, int(np.round((0.95 - .5) / .05)) + 1, \ + endpoint=True) + elif isinstance(iou_thrs, float): + self.params.iouThrs = [iou_thrs] + + if map_points == 101: + self.params.recThrs = np.linspace(.0, 1.00, int(np.round((1.00 - .0) / .01)) + 1, \ + endpoint=True) + if map_points == 11: + self.params.recThrs = np.linspace(.0, 1.00, int(np.round((1.00 - .0) / .1)) + 1, \ + endpoint=True) + if map_points == 0: + self.params.recThrs = [-1] + + + def GetCategory(self, category_id: int) -> dict: + """Fetch dictionary holding category information given category id. + Args: + category_id: integer id + Returns: + dictionary holding 'id', 'name'. + """ + return self.cocoGt.cats[category_id] + + def GetAgnosticMode(self) -> bool: + """Return whether COCO Eval is configured to evaluate in agnostic mode.""" + return self.params.useCats == 0 + + def GetCategoryIdList(self) -> List[int]: + """Return the list of IDs of all valid categories.""" + return self.params.catIds + + def accumulate(self, p: cocoeval.Params = None): + """Accumulate evaluation results per image and store it to self.eval. + + Args: + p: input params for evaluation + """ + print('Accumulating evaluation results...') + tic = time.time() + if not self.evalImgs: + print('Please run evaluate() first') + # allows input customized parameters + if p is None: + p = self.params + p.catIds = p.catIds if p.useCats == 1 else [-1] + T = len(p.iouThrs) + R = len(p.recThrs) + K = len(p.catIds) if p.useCats else 1 + A = len(p.areaRng) + M = len(p.maxDets) + precision = -np.ones((T,R,K,A,M)) # -1 for the precision of absent categories + recall = -np.ones((T,K,A,M)) + scores = -np.ones((T,R,K,A,M)) + + # create dictionary for future indexing + _pe = self._paramsEval + print('-pe', _pe) + catIds = _pe.catIds if _pe.useCats else [-1] + setK = set(catIds) + setA = set(map(tuple, _pe.areaRng)) + setM = set(_pe.maxDets) + setI = set(_pe.imgIds) + # get inds to evaluate + k_list = [n for n, k in enumerate(p.catIds) if k in setK] + m_list = [m for n, m in enumerate(p.maxDets) if m in setM] + a_list = [n for n, a in enumerate(map(lambda x: tuple(x), p.areaRng)) if a in setA] + i_list = [n for n, i in enumerate(p.imgIds) if i in setI] + I0 = len(_pe.imgIds) + A0 = len(_pe.areaRng) + # retrieve E at each category, area range, and max number of detections + for k, k0 in enumerate(k_list): + Nk = k0*A0*I0 + for a, a0 in enumerate(a_list): + Na = a0*I0 + for m, maxDet in enumerate(m_list): + E = [self.evalImgs[Nk + Na + i] for i in i_list] + E = [e for e in E if not e is None] + if len(E) == 0: continue + dtScores = np.concatenate([e['dtScores'][0:maxDet] for e in E]) + + # different sorting method generates slightly different results. + # mergesort is used to be consistent as Matlab implementation. + inds = np.argsort(-dtScores, kind='mergesort') + dtScoresSorted = dtScores[inds] + + dtm = np.concatenate([e['dtMatches'][:,0:maxDet] for e in E], axis=1)[:,inds] + dtIg = np.concatenate([e['dtIgnore'][:,0:maxDet] for e in E], axis=1)[:,inds] + gtIg = np.concatenate([e['gtIgnore'] for e in E]) + npig = np.count_nonzero(gtIg==0 ) + if npig == 0: continue + tps = np.logical_and( dtm, np.logical_not(dtIg) ) + fps = np.logical_and(np.logical_not(dtm), np.logical_not(dtIg) ) + + tp_sum = np.cumsum(tps, axis=1).astype(dtype=np.float32) + fp_sum = np.cumsum(fps, axis=1).astype(dtype=np.float32) + for t, (tp, fp) in enumerate(zip(tp_sum, fp_sum)): + tp = np.array(tp) + fp = np.array(fp) + nd = len(tp) + rc = tp / npig + pr = tp / (fp+tp+np.spacing(1)) + + # calculate precision + if R == 1: + rc = np.concatenate(([0.], rc, [1.])) + pr = np.concatenate(([0.], pr, [0.])) + + # compute the precision envelope + for i in range(pr.size - 1, 0, -1): + pr[i - 1] = np.maximum(pr[i - 1], pr[i]) + + # to calculate area under PR curve, look for points + # where X axis (recall) changes value + change_point = np.where(rc[1:] != rc[:-1])[0] + # and sum (\Delta recall) * recall + res = np.sum((rc[change_point + 1] - rc[change_point]) \ + * pr[change_point + 1]) + precision[t,:,k,a,m] = np.array([res]) + else: + q = np.zeros((R,)) + + # numpy is slow without cython optimization for accessing elements + # use python array gets significant speed improvement + pr = pr.tolist(); q = q.tolist() + + for i in range(nd-1, 0, -1): + if pr[i] > pr[i-1]: + pr[i-1] = pr[i] + + inds = np.searchsorted(rc, p.recThrs, side='left') + try: + for ri, pi in enumerate(inds): + q[ri] = pr[pi] + except: + pass + precision[t,:,k,a,m] = np.array(q) + + # calculate recall + if nd: + recall[t,k,a,m] = rc[-1] + else: + recall[t,k,a,m] = 0 + + # calculate score + ss = np.zeros((R,)) + inds = np.searchsorted(rc, p.recThrs, side='left') + try: + for ri, pi in enumerate(inds): + ss[ri] = dtScoresSorted[pi] + except: + pass + scores[t,:,k,a,m] = np.array(ss) + # exit(0) + self.eval = { + 'params': p, + 'counts': [T, R, K, A, M], + 'precision': precision, + 'recall': recall, + 'scores': scores, + } + toc = time.time() + print('DONE (t={:0.2f}s).'.format( toc-tic)) + + + def ComputeMetrics(self, + include_metrics_per_category: bool = False, + all_metrics_per_category: bool = False): # pragma: no cover + """Compute detection metrics. + Args: + include_metrics_per_category: Whether include metrics per category. + all_metrics_per_category: Whether include all the summery metrics for + each category in per_category_ap. Be careful with setting it to true if + you have more than handful of categories, because it will pollute + your mldash. + Returns: + A tuple of (summary_metrics, per_category_ap), in which + (1) summary_metrics is a dictionary holding: + 'Precision/mAP': mean average precision over classes averaged over IOU + thresholds ranging from .5 to .95 with .05 increments; + 'Precision/mAP@.50IOU': mean average precision at 50% IOU; + 'Precision/mAP@.75IOU': mean average precision at 75% IOU; + 'Precision/mAP (small)': mean average precision for small objects + (area < 32^2 pixels); + 'Precision/mAP (medium)': mean average precision for medium sized + objects (32^2 pixels < area < 96^2 pixels); + 'Precision/mAP (large)': mean average precision for large objects + (96^2 pixels < area < 10000^2 pixels); + 'Recall/AR@1': average recall with 1 detection; + 'Recall/AR@10': average recall with 10 detections; + 'Recall/AR@100': average recall with 100 detections; + 'Recall/AR@100 (small)': average recall for small objects with 100 + detections; + 'Recall/AR@100 (medium)': average recall for medium objects with 100 + detections; + 'Recall/AR@100 (large)': average recall for large objects with 100 + detections; + and (2) per_category_ap is a dictionary holding category specific results with + keys of the form: 'Precision mAP ByCategory/category' + (without the supercategory part if no supercategories exist). + For backward compatibility 'PerformanceByCategory' is included in the + output regardless of all_metrics_per_category. If evaluating class-agnostic + mode, per_category_ap is an empty dictionary. + Raises: + ValueError: If category_stats does not exist. + """ + self.evaluate() + self.accumulate() + self.summarize() + + summary_metrics = OrderedDict([ + ('Precision/mAP', self.stats[0]), + ('Precision/mAP@.50IOU', self.stats[1]), + ('Precision/mAP@.75IOU', self.stats[2]), + ('Precision/mAP (small)', self.stats[3]), + ('Precision/mAP (medium)', self.stats[4]), + ('Precision/mAP (large)', self.stats[5]), + ('Recall/AR@1', self.stats[6]), ('Recall/AR@10', self.stats[7]), + ('Recall/AR@100', self.stats[8]), + ('Recall/AR@100 (small)', self.stats[9]), + ('Recall/AR@100 (medium)', self.stats[10]), + ('Recall/AR@100 (large)', self.stats[11]) + ]) + if not include_metrics_per_category: + return summary_metrics, {} + if not hasattr(self, 'category_stats'): + raise ValueError('Category stats do not exist') + per_category_ap = OrderedDict([]) + if self.GetAgnosticMode(): + return summary_metrics, per_category_ap + for category_index, category_id in enumerate(self.GetCategoryIdList()): + category = self.GetCategory(category_id)['name'] + # Kept for backward compatilbility + # pylint: disable=no-member + per_category_ap['PerformanceByCategory/mAP/{}'.format( + category)] = self.category_stats[0][category_index] + if all_metrics_per_category: + per_category_ap['Precision mAP ByCategory/{}'.format( + category)] = self.category_stats[0][category_index] + per_category_ap['Precision mAP@.50IOU ByCategory/{}'.format( + category)] = self.category_stats[1][category_index] + per_category_ap['Precision mAP@.75IOU ByCategory/{}'.format( + category)] = self.category_stats[2][category_index] + per_category_ap['Precision mAP (small) ByCategory/{}'.format( + category)] = self.category_stats[3][category_index] + per_category_ap['Precision mAP (medium) ByCategory/{}'.format( + category)] = self.category_stats[4][category_index] + per_category_ap['Precision mAP (large) ByCategory/{}'.format( + category)] = self.category_stats[5][category_index] + per_category_ap['Recall AR@1 ByCategory/{}'.format( + category)] = self.category_stats[6][category_index] + per_category_ap['Recall AR@10 ByCategory/{}'.format( + category)] = self.category_stats[7][category_index] + per_category_ap['Recall AR@100 ByCategory/{}'.format( + category)] = self.category_stats[8][category_index] + per_category_ap['Recall AR@100 (small) ByCategory/{}'.format( + category)] = self.category_stats[9][category_index] + per_category_ap['Recall AR@100 (medium) ByCategory/{}'.format( + category)] = self.category_stats[10][category_index] + per_category_ap['Recall AR@100 (large) ByCategory/{}'.format( + category)] = self.category_stats[11][category_index] + + return summary_metrics, per_category_ap + + +def _ConvertBoxToCOCOFormat(box): + """Convert a box in [ymin, xmin, ymax, xmax] format to COCO format. + This is a utility function for converting from our internal + [ymin, xmin, ymax, xmax] convention to the convention used by the COCO API + i.e., [xmin, ymin, width, height]. + Args: + box: a numpy array in format of [ymin, xmin, ymax, xmax] + Returns: + A list of floats, in COCO format, representing [xmin, ymin, width, height] + """ + return [ + float(box[1]), + float(box[0]), + float(box[3] - box[1]), + float(box[2] - box[0]) + ] + + +def _RleCompress(masks): + """Compresses mask using Run-length encoding provided by pycocotools. + Args: + masks: uint8 numpy array of shape [mask_height, mask_width] with values in + {0, 1}. + Returns: + A pycocotools Run-length encoding of the mask. + """ + return mask.encode(np.asfortranarray(masks)) + + +def ExportSingleImageGroundtruthToCoco(image_id: Union[int, str], + next_annotation_id: int, + category_id_set: Set[str], + groundtruth_boxes: np.array, + groundtruth_classes: np.array, + groundtruth_masks: Union[np.array, None] = None, + groundtruth_is_crowd: Union[np.array, None] = None) -> list: + """Export groundtruth of a single image to COCO format. + This function converts groundtruth detection annotations represented as numpy + arrays to dictionaries that can be ingested by the COCO evaluation API. Note + that the image_ids provided here must match the ones given to + ExportSingleImageDetectionsToCoco. We assume that boxes and classes are in + correspondence - that is: groundtruth_boxes[i, :], and + groundtruth_classes[i] are associated with the same groundtruth annotation. + In the exported result, "area" fields are always set to the area of the + groundtruth bounding box. + Args: + image_id: a unique image identifier either of type integer or string. + next_annotation_id: integer specifying the first id to use for the + groundtruth annotations. All annotations are assigned a continuous integer + id starting from this value. + category_id_set: A set of valid class ids. Groundtruth with classes not in + category_id_set are dropped. + groundtruth_boxes: numpy array (float32) with shape [num_gt_boxes, 4] + groundtruth_classes: numpy array (int) with shape [num_gt_boxes] + groundtruth_masks: optional uint8 numpy array of shape [num_detections, + image_height, image_width] containing detection_masks. + groundtruth_is_crowd: optional numpy array (int) with shape [num_gt_boxes] + indicating whether groundtruth boxes are crowd. + Returns: + A list of groundtruth annotations for a single image in the COCO format. + Raises: + ValueError: if (1) groundtruth_boxes and groundtruth_classes do not have the + right lengths or (2) if each of the elements inside these lists do not + have the correct shapes or (3) if image_ids are not integers + """ + if len(groundtruth_classes.shape) != 1: + raise ValueError('groundtruth_classes is ' 'expected to be of rank 1.') + if len(groundtruth_boxes.shape) != 2: + raise ValueError('groundtruth_boxes is expected to be of ' 'rank 2.') + if groundtruth_boxes.shape[1] != 4: + raise ValueError('groundtruth_boxes should have ' 'shape[1] == 4.') + num_boxes = groundtruth_classes.shape[0] + if num_boxes != groundtruth_boxes.shape[0]: + raise ValueError( + 'Corresponding entries in groundtruth_classes, ' + 'and groundtruth_boxes should have ' + 'compatible shapes (i.e., agree on the 0th dimension).' + 'Classes shape: %d. Boxes shape: %d. Image ID: %s' % + (groundtruth_classes.shape[0], groundtruth_boxes.shape[0], + image_id)) + has_is_crowd = groundtruth_is_crowd is not None + if has_is_crowd and len(groundtruth_is_crowd.shape) != 1: + raise ValueError('groundtruth_is_crowd is expected to be of rank 1.') + groundtruth_list = [] + for i in range(num_boxes): + if groundtruth_classes[i] in category_id_set: + iscrowd = groundtruth_is_crowd[i] if has_is_crowd else 0 + export_dict = { + 'id': + next_annotation_id + i, + 'image_id': + image_id, + 'category_id': + int(groundtruth_classes[i]), + 'bbox': + list(_ConvertBoxToCOCOFormat(groundtruth_boxes[i, :])), + 'area': + float((groundtruth_boxes[i, 2] - groundtruth_boxes[i, 0]) * + (groundtruth_boxes[i, 3] - groundtruth_boxes[i, 1])), + 'iscrowd': + iscrowd + } + if groundtruth_masks is not None: + export_dict['segmentation'] = _RleCompress( + groundtruth_masks[i]) + groundtruth_list.append(export_dict) + return groundtruth_list + + +def ExportSingleImageDetectionBoxesToCoco(image_id: Union[int, str], + category_id_set: Set[int], + detection_boxes: np.array, + detection_scores: np.array, + detection_classes: np.array) -> list: + """Export detections of a single image to COCO format. + This function converts detections represented as numpy arrays to dictionaries + that can be ingested by the COCO evaluation API. Note that the image_ids + provided here must match the ones given to the + ExporSingleImageDetectionBoxesToCoco. We assume that boxes, and classes are in + correspondence - that is: boxes[i, :], and classes[i] + are associated with the same groundtruth annotation. + Args: + image_id: unique image identifier either of type integer or string. + category_id_set: A set of valid class ids. Detections with classes not in + category_id_set are dropped. + detection_boxes: float numpy array of shape [num_detections, 4] containing + detection boxes. + detection_scores: float numpy array of shape [num_detections] containing + scored for the detection boxes. + detection_classes: integer numpy array of shape [num_detections] containing + the classes for detection boxes. + Returns: + A list of detection annotations for a single image in the COCO format. + Raises: + ValueError: if (1) detection_boxes, detection_scores and detection_classes + do not have the right lengths or (2) if each of the elements inside these + lists do not have the correct shapes or (3) if image_ids are not integers. + """ + if len(detection_classes.shape) != 1 or len(detection_scores.shape) != 1: + raise ValueError( + 'All entries in detection_classes and detection_scores' + 'expected to be of rank 1.') + if len(detection_boxes.shape) != 2: + raise ValueError('All entries in detection_boxes expected to be of ' + 'rank 2.') + if detection_boxes.shape[1] != 4: + raise ValueError('All entries in detection_boxes should have ' + 'shape[1] == 4.') + num_boxes = detection_classes.shape[0] + if not num_boxes == detection_boxes.shape[0] == detection_scores.shape[0]: + raise ValueError( + 'Corresponding entries in detection_classes, ' + 'detection_scores and detection_boxes should have ' + 'compatible shapes (i.e., agree on the 0th dimension). ' + 'Classes shape: %d. Boxes shape: %d. ' + 'Scores shape: %d' % + (detection_classes.shape[0], detection_boxes.shape[0], + detection_scores.shape[0])) + detections_list = [] + for i in range(num_boxes): + if detection_classes[i] in category_id_set: + detections_list.append({ + 'image_id': + image_id, + 'category_id': + int(detection_classes[i]), + 'bbox': + list(_ConvertBoxToCOCOFormat(detection_boxes[i, :])), + 'score': + float(detection_scores[i]) + }) + return detections_list + + +def ExportSingleImageDetectionMasksToCoco(image_id: Union[str, int], + category_id_set: Set[int], + detection_masks: np.array, + detection_scores: np.array, + detection_classes: np.array) -> list: + """Export detection masks of a single image to COCO format. + This function converts detections represented as numpy arrays to dictionaries + that can be ingested by the COCO evaluation API. We assume that + detection_masks, detection_scores, and detection_classes are in correspondence + - that is: detection_masks[i, :], detection_classes[i] and detection_scores[i] + are associated with the same annotation. + Args: + image_id: unique image identifier either of type integer or string. + category_id_set: A set of valid class ids. Detections with classes not in + category_id_set are dropped. + detection_masks: uint8 numpy array of shape [num_detections, image_height, + image_width] containing detection_masks. + detection_scores: float numpy array of shape [num_detections] containing + scores for detection masks. + detection_classes: integer numpy array of shape [num_detections] containing + the classes for detection masks. + Returns: + A list of detection mask annotations for a single image in the COCO format. + Raises: + ValueError: if (1) detection_masks, detection_scores and detection_classes + do not have the right lengths or (2) if each of the elements inside these + lists do not have the correct shapes or (3) if image_ids are not integers. + """ + if len(detection_classes.shape) != 1 or len(detection_scores.shape) != 1: + raise ValueError( + 'All entries in detection_classes and detection_scores' + 'expected to be of rank 1.') + num_boxes = detection_classes.shape[0] + if not num_boxes == len(detection_masks) == detection_scores.shape[0]: + raise ValueError('Corresponding entries in detection_classes, ' + 'detection_scores and detection_masks should have ' + 'compatible lengths and shapes ' + 'Classes length: %d. Masks length: %d. ' + 'Scores length: %d' % + (detection_classes.shape[0], len(detection_masks), + detection_scores.shape[0])) + detections_list = [] + for i in range(num_boxes): + if detection_classes[i] in category_id_set: + detections_list.append({ + 'image_id': + image_id, + 'category_id': + int(detection_classes[i]), + 'segmentation': + _RleCompress(detection_masks[i]), + 'score': + float(detection_scores[i]) + }) + return detections_list \ No newline at end of file diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/yolov4/quantization/ptq_static/label_map.yaml b/examples/onnxrt/object_detection/onnx_model_zoo/yolov4/quantization/ptq_static/label_map.yaml new file mode 100644 index 00000000000..1fbc9263dc9 --- /dev/null +++ b/examples/onnxrt/object_detection/onnx_model_zoo/yolov4/quantization/ptq_static/label_map.yaml @@ -0,0 +1,80 @@ +person: 1 +bicycle: 2 +car: 3 +motorcycle: 4 +airplane: 5 +bus: 6 +train: 7 +truck: 8 +boat: 9 +traffic light: 10 +fire hydrant: 11 +stop sign: 12 +parking meter: 13 +bench: 14 +bird: 15 +cat: 16 +dog: 17 +horse: 18 +sheep: 19 +cow: 20 +elephant: 21 +bear: 22 +zebra: 23 +giraffe: 24 +backpack: 25 +umbrella: 26 +handbag: 27 +tie: 28 +suitcase: 29 +frisbee: 30 +skis: 31 +snowboard: 32 +sports ball: 33 +kite: 34 +baseball bat: 35 +baseball glove: 36 +skateboard: 37 +surfboard: 38 +tennis racket: 39 +bottle: 40 +wine glass: 41 +cup: 42 +fork: 43 +knife: 44 +spoon: 45 +bowl: 46 +banana: 47 +apple: 48 +sandwich: 49 +orange: 50 +broccoli: 51 +carrot: 52 +hot dog: 53 +pizza: 54 +donut: 55 +cake: 56 +chair: 57 +couch: 58 +potted plant: 59 +bed: 60 +dining table: 61 +toilet: 62 +tv: 63 +laptop: 64 +mouse: 65 +remote: 66 +keyboard: 67 +cell phone: 68 +microwave: 69 +oven: 70 +toaster: 71 +sink: 72 +refrigerator: 73 +book: 74 +clock: 75 +vase: 76 +scissors: 77 +teddy bear: 78 +hair drier: 79 +toothbrush: 80 diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/yolov4/quantization/ptq_static/main.py b/examples/onnxrt/object_detection/onnx_model_zoo/yolov4/quantization/ptq_static/main.py new file mode 100644 index 00000000000..9fa1281553d --- /dev/null +++ b/examples/onnxrt/object_detection/onnx_model_zoo/yolov4/quantization/ptq_static/main.py @@ -0,0 +1,546 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# pylint:disable=redefined-outer-name,logging-format-interpolation + +import cv2 +import onnx +import logging +import argparse +import numpy as np +from PIL import Image +from scipy import special +import os +import onnxruntime as ort +import yaml + +logger = logging.getLogger(__name__) +logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s - %(message)s', + datefmt = '%m/%d/%Y %H:%M:%S', + level = logging.WARN) +logger.info("Evaluating ONNXRuntime full precision accuracy and performance:") +parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter +) +parser.add_argument( + '--data_path', + type=str, + help="Path of COCO dataset, it contains val2017 and annotations subfolder" +) +parser.add_argument( + '--label_path', + type=str, + default='label_map.yaml', + help="Path of label map yaml file" +) +parser.add_argument( + '--model_path', + type=str, + help="Pre-trained model on onnx file" +) +parser.add_argument( + '--benchmark', + action='store_true', \ + default=False +) +parser.add_argument( + '--tune', + action='store_true', \ + default=False, + help="whether quantize the model" +) +parser.add_argument( + '--config', + type=str, + help="config yaml path" +) +parser.add_argument( + '--output_model', + type=str, + help="output model path" +) +parser.add_argument( + '--mode', + type=str, + help="benchmark mode of performance or accuracy" +) +parser.add_argument( + '--quant_format', + type=str, + choices=['QOperator', 'QDQ'], + help="quantization format" +) +parser.add_argument( + '--batch_size', + type=int, + default=1, + help="quantization format" +) +args = parser.parse_args() + +def get_anchors(anchors_path, tiny=False): + '''loads the anchors from a file''' + with open(anchors_path) as f: + anchors = f.readline() + anchors = np.array(anchors.split(','), dtype=np.float32) + return anchors.reshape(3, 3, 2) + +IMAGE_INPUT_SZIE = 416 +ANCHORS = get_anchors("./yolov4_anchors.txt") +STRIDES = np.array([8, 16, 32]) +XYSCALE = [1.2, 1.1, 1.05] + +class Dataloader: + def __init__(self, root, batch_size=1, img_dir='val2017', \ + anno_dir='annotations/instances_val2017.json', filter=None): + import json + import os + import numpy as np + from pycocotools.coco import COCO + from coco_label_map import category_map + self.batch_size = batch_size + self.image_list = [] + img_path = os.path.join(root, img_dir) + anno_path = os.path.join(root, anno_dir) + coco = COCO(anno_path) + img_ids = coco.getImgIds() + cat_ids = coco.getCatIds() + for idx, img_id in enumerate(img_ids): + img_info = {} + bboxes = [] + labels = [] + ids = [] + img_detail = coco.loadImgs(img_id)[0] + ids.append(img_detail['file_name'].encode('utf-8')) + pic_height = img_detail['height'] + pic_width = img_detail['width'] + + ann_ids = coco.getAnnIds(imgIds=img_id,catIds=cat_ids) + anns = coco.loadAnns(ann_ids) + for ann in anns: + bbox = ann['bbox'] + if len(bbox) == 0: + continue + bboxes.append([bbox[0], bbox[1], bbox[0]+bbox[2], bbox[1]+bbox[3]]) + labels.append(category_map[ann['category_id']].encode('utf8')) + img_file = os.path.join(img_path, img_detail['file_name']) + if not os.path.exists(img_file) or len(bboxes) == 0: + continue + + if filter and not filter(None, bboxes): + continue + label = [np.array([bboxes]), np.array([labels]), np.zeros((1,0)), np.array([img_detail['file_name'].encode('utf-8')])] + with Image.open(img_file) as image: + image = image.convert('RGB') + image, label = self.preprocess((image, label)) + self.image_list.append((image, label)) + + def __iter__(self): + for item in self.image_list: + yield item + + def preprocess(self, sample): + image, label = sample + image = np.array(image) + ih = iw = IMAGE_INPUT_SZIE + h, w, _ = image.shape + + scale = min(iw/w, ih/h) + nw, nh = int(scale * w), int(scale * h) + image_resized = cv2.resize(image, (nw, nh)) + + image_padded = np.full(shape=[ih, iw, 3], fill_value=128.0) + dw, dh = (iw - nw) // 2, (ih-nh) // 2 + image_padded[dh:nh+dh, dw:nw+dw, :] = image_resized + image_padded = image_padded / 255. + + gt_boxes, str_labels, int_labels, image_ids = label + return image_padded[np.newaxis, ...].astype(np.float32), \ + (gt_boxes, str_labels, int_labels, image_ids, (h, w)) + +class Post: + def __init__(self) -> None: + self.ANCHORS = ANCHORS + self.STRIDES = STRIDES + self.XYSCALE = XYSCALE + self.input_size = IMAGE_INPUT_SZIE + + def __call__(self, sample): + preds, labels = sample + labels = labels[0] + + pred_bbox = postprocess_bbbox(preds, self.ANCHORS, self.STRIDES, self.XYSCALE) + bboxes = postprocess_boxes(pred_bbox, labels[4], self.input_size, 0.25) + if len(bboxes) == 0: + return (np.zeros((1,0,4)), np.zeros((1,0)), np.zeros((1,0))), labels[:4] + bboxes_ = np.array(nms(bboxes, 0.63, method='nms')) + bboxes, scores, classes = bboxes_[:, :4], bboxes_[:, 4], bboxes_[:, 5] + + bboxes = np.reshape(bboxes, (1, -1, 4)) + classes = np.reshape(classes, (1, -1)).astype('int64') + 1 + scores = np.reshape(scores, (1, -1)) + return (bboxes, classes, scores), labels[:4] + +def postprocess_bbbox(pred_bbox, ANCHORS, STRIDES, XYSCALE=[1,1,1]): + '''define anchor boxes''' + for i, pred in enumerate(pred_bbox): + conv_shape = pred.shape + output_size = conv_shape[1] + conv_raw_dxdy = pred[:, :, :, :, 0:2] + conv_raw_dwdh = pred[:, :, :, :, 2:4] + xy_grid = np.meshgrid(np.arange(output_size), np.arange(output_size)) + xy_grid = np.expand_dims(np.stack(xy_grid, axis=-1), axis=2) + + xy_grid = np.tile(np.expand_dims(xy_grid, axis=0), [1, 1, 1, 3, 1]) + xy_grid = xy_grid.astype(np.float32) + + pred_xy = ((special.expit(conv_raw_dxdy) * XYSCALE[i]) - 0.5 * (XYSCALE[i] - 1) + xy_grid) * STRIDES[i] + pred_wh = (np.exp(conv_raw_dwdh) * ANCHORS[i]) + pred[:, :, :, :, 0:4] = np.concatenate([pred_xy, pred_wh], axis=-1) + + pred_bbox = [np.reshape(x, (-1, np.shape(x)[-1])) for x in pred_bbox] + pred_bbox = np.concatenate(pred_bbox, axis=0) + return pred_bbox + +def postprocess_boxes(pred_bbox, org_img_shape, input_size, score_threshold): + '''remove boundary boxs with a low detection probability''' + valid_scale=[0, np.inf] + pred_bbox = np.array(pred_bbox) + + pred_xywh = pred_bbox[:, 0:4] + pred_conf = pred_bbox[:, 4] + pred_prob = pred_bbox[:, 5:] + + # # (1) (x, y, w, h) --> (xmin, ymin, xmax, ymax) + pred_coor = np.concatenate([pred_xywh[:, :2] - pred_xywh[:, 2:] * 0.5, + pred_xywh[:, :2] + pred_xywh[:, 2:] * 0.5], axis=-1) + # # (2) (xmin, ymin, xmax, ymax) -> (xmin_org, ymin_org, xmax_org, ymax_org) + org_h, org_w = org_img_shape + resize_ratio = min(input_size / org_w, input_size / org_h) + + dw = (input_size - resize_ratio * org_w) / 2 + dh = (input_size - resize_ratio * org_h) / 2 + + pred_coor[:, 0::2] = 1.0 * (pred_coor[:, 0::2] - dw) / resize_ratio + pred_coor[:, 1::2] = 1.0 * (pred_coor[:, 1::2] - dh) / resize_ratio + + # # (3) clip some boxes that are out of range + pred_coor = np.concatenate([np.maximum(pred_coor[:, :2], [0, 0]), + np.minimum(pred_coor[:, 2:], [org_w - 1, org_h - 1])], axis=-1) + invalid_mask = np.logical_or((pred_coor[:, 0] > pred_coor[:, 2]), (pred_coor[:, 1] > pred_coor[:, 3])) + pred_coor[invalid_mask] = 0 + + # # (4) discard some invalid boxes + bboxes_scale = np.sqrt(np.multiply.reduce(pred_coor[:, 2:4] - pred_coor[:, 0:2], axis=-1)) + scale_mask = np.logical_and((valid_scale[0] < bboxes_scale), (bboxes_scale < valid_scale[1])) + + # # (5) discard some boxes with low scores + classes = np.argmax(pred_prob, axis=-1) + scores = pred_conf * pred_prob[np.arange(len(pred_coor)), classes] + score_mask = scores > score_threshold + mask = np.logical_and(scale_mask, score_mask) + coors, scores, classes = pred_coor[mask], scores[mask], classes[mask] + return np.concatenate([coors, scores[:, np.newaxis], classes[:, np.newaxis]], axis=-1) + +def bboxes_iou(boxes1, boxes2): + '''calculate the Intersection Over Union value''' + boxes1 = np.array(boxes1) + boxes2 = np.array(boxes2) + + boxes1_area = (boxes1[..., 2] - boxes1[..., 0]) * (boxes1[..., 3] - boxes1[..., 1]) + boxes2_area = (boxes2[..., 2] - boxes2[..., 0]) * (boxes2[..., 3] - boxes2[..., 1]) + + left_up = np.maximum(boxes1[..., :2], boxes2[..., :2]) + right_down = np.minimum(boxes1[..., 2:], boxes2[..., 2:]) + + inter_section = np.maximum(right_down - left_up, 0.0) + inter_area = inter_section[..., 0] * inter_section[..., 1] + union_area = boxes1_area + boxes2_area - inter_area + ious = np.maximum(1.0 * inter_area / union_area, np.finfo(np.float32).eps) + + return ious + +def nms(bboxes, iou_threshold, sigma=0.3, method='nms'): + """ + :param bboxes: (xmin, ymin, xmax, ymax, score, class) + Note: soft-nms, https://arxiv.org/pdf/1704.04503.pdf + https://github.com/bharatsingh430/soft-nms + """ + classes_in_img = list(set(bboxes[:, 5])) + best_bboxes = [] + + for cls in classes_in_img: + cls_mask = (bboxes[:, 5] == cls) + cls_bboxes = bboxes[cls_mask] + + while len(cls_bboxes) > 0: + max_ind = np.argmax(cls_bboxes[:, 4]) + best_bbox = cls_bboxes[max_ind] + best_bboxes.append(best_bbox) + cls_bboxes = np.concatenate([cls_bboxes[: max_ind], cls_bboxes[max_ind + 1:]]) + iou = bboxes_iou(best_bbox[np.newaxis, :4], cls_bboxes[:, :4]) + weight = np.ones((len(iou),), dtype=np.float32) + + assert method in ['nms', 'soft-nms'] + + if method == 'nms': + iou_mask = iou > iou_threshold + weight[iou_mask] = 0.0 + + if method == 'soft-nms': + weight = np.exp(-(1.0 * iou ** 2 / sigma)) + + cls_bboxes[:, 4] = cls_bboxes[:, 4] * weight + score_mask = cls_bboxes[:, 4] > 0. + cls_bboxes = cls_bboxes[score_mask] + + return best_bboxes + +class COCOmAPv2(): + """Compute mean average precision of the detection task.""" + + def __init__(self, + anno_path=None, + iou_thrs='0.5:0.05:0.95', + map_points=101, + map_key='DetectionBoxes_Precision/mAP', + output_index_mapping={'num_detections':-1, 'boxes':0, 'scores':1, 'classes':2}): + """Initialize the metric. + Args: + anno_path: The path of annotation file. + iou_thrs: Minimal value for intersection over union that allows to make decision + that prediction bounding box is true positive. You can specify one float value + between 0 to 1 or string "05:0.05:0.95" for standard COCO thresholds. + map_points: The way to calculate mAP. 101 for 101-point interpolated AP, 11 for + 11-point interpolated AP, 0 for area under PR curve. + map_key: The key that mapping to pycocotools COCOeval. + Defaults to 'DetectionBoxes_Precision/mAP'. + output_index_mapping: The output index mapping. + Defaults to {'num_detections':-1, 'boxes':0, 'scores':1, 'classes':2}. + """ + self.output_index_mapping = output_index_mapping + from coco_label_map import category_map + if anno_path: + assert os.path.exists(anno_path), 'Annotation path does not exists!' + with open(anno_path, 'r') as f: + label_map = yaml.safe_load(f.read()) + self.category_map_reverse = {k: v for k,v in label_map.items()} + else: + # label: index + self.category_map_reverse = {v: k for k, v in category_map.items()} + self.image_ids = [] + self.ground_truth_list = [] + self.detection_list = [] + self.annotation_id = 1 + self.category_map = category_map + self.category_id_set = set( + [cat for cat in self.category_map]) #index + self.iou_thrs = iou_thrs + self.map_points = map_points + self.map_key = map_key + + def update(self, predicts, labels, sample_weight=None): + """Add the predictions and labels. + Args: + predicts: The predictions. + labels: The labels corresponding to the predictions. + sample_weight: The sample weight. Defaults to None. + """ + from coco_tools import ExportSingleImageGroundtruthToCoco,\ + ExportSingleImageDetectionBoxesToCoco + detections = [] + if 'num_detections' in self.output_index_mapping and \ + self.output_index_mapping['num_detections'] > -1: + for item in zip(*predicts): + detection = {} + num = int(item[self.output_index_mapping['num_detections']]) + detection['boxes'] = np.asarray( + item[self.output_index_mapping['boxes']])[0:num] + detection['scores'] = np.asarray( + item[self.output_index_mapping['scores']])[0:num] + detection['classes'] = np.asarray( + item[self.output_index_mapping['classes']])[0:num] + detections.append(detection) + else: + for item in zip(*predicts): + detection = {} + detection['boxes'] = np.asarray(item[self.output_index_mapping['boxes']]) + detection['scores'] = np.asarray(item[self.output_index_mapping['scores']]) + detection['classes'] = np.asarray(item[self.output_index_mapping['classes']]) + detections.append(detection) + + bboxes, str_labels,int_labels, image_ids = labels + labels = [] + if len(int_labels[0]) == 0: + for str_label in str_labels: + str_label = [ + x if type(x) == 'str' else x.decode('utf-8') + for x in str_label + ] + labels.append([self.category_map_reverse[x] for x in str_label]) + elif len(str_labels[0]) == 0: + for int_label in int_labels: + labels.append([x for x in int_label]) + + for idx, image_id in enumerate(image_ids): + image_id = image_id if type( + image_id) == 'str' else image_id.decode('utf-8') + if image_id in self.image_ids: + continue + self.image_ids.append(image_id) + + ground_truth = {} + ground_truth['boxes'] = np.asarray(bboxes[idx]) + ground_truth['classes'] = np.asarray(labels[idx]) + + self.ground_truth_list.extend( + ExportSingleImageGroundtruthToCoco( + image_id=image_id, + next_annotation_id=self.annotation_id, + category_id_set=self.category_id_set, + groundtruth_boxes=ground_truth['boxes'], + groundtruth_classes=ground_truth['classes'])) + self.annotation_id += ground_truth['boxes'].shape[0] + + self.detection_list.extend( + ExportSingleImageDetectionBoxesToCoco( + image_id=image_id, + category_id_set=self.category_id_set, + detection_boxes=detections[idx]['boxes'], + detection_scores=detections[idx]['scores'], + detection_classes=detections[idx]['classes'])) + + def reset(self): + """Reset the prediction and labels.""" + self.image_ids = [] + self.ground_truth_list = [] + self.detection_list = [] + self.annotation_id = 1 + + def result(self): + """Compute mean average precision. + Returns: + The mean average precision score. + """ + from coco_tools import COCOWrapper, COCOEvalWrapper + if len(self.ground_truth_list) == 0: + logger.warning("Sample num during evaluation is 0.") + return 0 + else: + groundtruth_dict = { + 'annotations': + self.ground_truth_list, + 'images': [{ + 'id': image_id + } for image_id in self.image_ids], + 'categories': [{ + 'id': k, + 'name': v + } for k, v in self.category_map.items()] + } + coco_wrapped_groundtruth = COCOWrapper(groundtruth_dict) + coco_wrapped_detections = coco_wrapped_groundtruth.LoadAnnotations( + self.detection_list) + box_evaluator = COCOEvalWrapper(coco_wrapped_groundtruth, + coco_wrapped_detections, + agnostic_mode=False, + iou_thrs = self.iou_thrs, + map_points = self.map_points) + box_metrics, box_per_category_ap = box_evaluator.ComputeMetrics( + include_metrics_per_category=False, all_metrics_per_category=False) + box_metrics.update(box_per_category_ap) + box_metrics = { + 'DetectionBoxes_' + key: value + for key, value in iter(box_metrics.items()) + } + + return box_metrics[self.map_key] + +class AccuracyLoss: + def __init__(self, loss=0.01): + self._loss = loss + + @property + def absolute(self): + return self._loss + + @absolute.setter + def absolute(self, absolute): + if isinstance(absolute, float): + self._loss = absolute + +if __name__ == "__main__": + model = onnx.load(args.model_path) + dataloader = Dataloader(args.data_path, batch_size=args.batch_size) + metric = COCOmAPv2(anno_path=args.label_path, output_index_mapping={'boxes':0, 'scores':2, 'classes':1}) + postprocess = Post() + + def eval_func(model): + metric.reset() + session = ort.InferenceSession(model.SerializeToString(), + providers=ort.get_available_providers()) + ort_inputs = {} + len_inputs = len(session.get_inputs()) + inputs_names = [session.get_inputs()[i].name for i in range(len_inputs)] + for idx, (inputs, labels) in enumerate(dataloader): + if not isinstance(labels, list): + labels = [labels] + if len_inputs == 1: + ort_inputs.update( + inputs if isinstance(inputs, dict) else {inputs_names[0]: inputs} + ) + else: + assert len_inputs == len(inputs), 'number of input tensors must align with graph inputs' + if isinstance(inputs, dict): + ort_inputs.update(inputs) + else: + for i in range(len_inputs): + if not isinstance(inputs[i], np.ndarray): + ort_inputs.update({inputs_names[i]: np.array(inputs[i])}) + else: + ort_inputs.update({inputs_names[i]: inputs[i]}) + predictions = session.run(None, ort_inputs) + predictions, labels = postprocess((predictions, labels)) + metric.update(predictions, labels) + return metric.result() + + if args.benchmark: + if args.mode == 'performance': + from neural_compressor.benchmark import fit + from neural_compressor.config import BenchmarkConfig + conf = BenchmarkConfig(iteration=100, + cores_per_instance=4, + num_of_instance=1) + fit(model, conf, b_dataloader=dataloader) + elif args.mode == 'accuracy': + acc_result = eval_func(model) + print("Batch size = %d" % args.batch_size) + print("Accuracy: %.5f" % acc_result) + + if args.tune: + from neural_compressor import quantization + from neural_compressor.config import AccuracyCriterion, PostTrainingQuantConfig + accuracy_criterion = AccuracyCriterion() + accuracy_criterion.absolute = 0.02 + config = PostTrainingQuantConfig(approach='static', + quant_format=args.quant_format, + calibration_sampling_size=[1], + accuracy_criterion=accuracy_criterion, + recipes={'first_conv_or_matmul_quantization': False, + 'last_conv_or_matmul_quantization': False, + 'pre_post_process_quantization': False}) + q_model = quantization.fit(model, config, calib_dataloader=dataloader, eval_func=eval_func) + q_model.save(args.output_model) \ No newline at end of file diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/yolov4/quantization/ptq_static/requirements.txt b/examples/onnxrt/object_detection/onnx_model_zoo/yolov4/quantization/ptq_static/requirements.txt new file mode 100644 index 00000000000..b0063c5dc44 --- /dev/null +++ b/examples/onnxrt/object_detection/onnx_model_zoo/yolov4/quantization/ptq_static/requirements.txt @@ -0,0 +1,4 @@ +onnx +onnxruntime +onnxruntime_extensions; python_version < '3.10' +pillow>=8.2.0 # not directly required, pinned by Snyk to avoid a vulnerability diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/yolov4/quantization/ptq_static/run_benchmark.sh b/examples/onnxrt/object_detection/onnx_model_zoo/yolov4/quantization/ptq_static/run_benchmark.sh new file mode 100644 index 00000000000..f17716e49ce --- /dev/null +++ b/examples/onnxrt/object_detection/onnx_model_zoo/yolov4/quantization/ptq_static/run_benchmark.sh @@ -0,0 +1,51 @@ +#!/bin/bash +set -x + +function main { + init_params "$@" + run_benchmark + +} + +# init params +function init_params { + + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + --label_path=*) + label_path=$(echo $var |cut -f2 -d=) + ;; + --mode=*) + mode=$(echo $var |cut -f2 -d=) + ;; + --batch_size=*) + batch_size=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_benchmark +function run_benchmark { + if [ ! $label_path ]; then + label_path='label_map.yaml' + fi + + python main.py \ + --model_path ${input_model} \ + --mode ${mode} \ + --data_path ${dataset_location} \ + --label_path ${label_path} \ + --batch_size ${batch_size} \ + --benchmark +} + +main "$@" \ No newline at end of file diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/yolov4/quantization/ptq_static/run_tuning.sh b/examples/onnxrt/object_detection/onnx_model_zoo/yolov4/quantization/ptq_static/run_tuning.sh new file mode 100644 index 00000000000..c4079a31f5d --- /dev/null +++ b/examples/onnxrt/object_detection/onnx_model_zoo/yolov4/quantization/ptq_static/run_tuning.sh @@ -0,0 +1,51 @@ +#!/bin/bash +set -x + +function main { + init_params "$@" + run_tuning + +} + +# init params +function init_params { + + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + --label_path=*) + label_path=$(echo $var |cut -f2 -d=) + ;; + --output_model=*) + output_model=$(echo $var |cut -f2 -d=) + ;; + --quant_format=*) + quant_format=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_tuning +function run_tuning { + if [ ! $label_path ]; then + label_path='label_map.yaml' + fi + + python main.py \ + --model_path ${input_model} \ + --output_model ${output_model} \ + --data_path ${dataset_location} \ + --label_path ${label_path} \ + --quant_format ${quant_format} \ + --tune +} + +main "$@" \ No newline at end of file diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/yolov4/quantization/ptq_static/yolov4_anchors.txt b/examples/onnxrt/object_detection/onnx_model_zoo/yolov4/quantization/ptq_static/yolov4_anchors.txt new file mode 100644 index 00000000000..d518a0b6fac --- /dev/null +++ b/examples/onnxrt/object_detection/onnx_model_zoo/yolov4/quantization/ptq_static/yolov4_anchors.txt @@ -0,0 +1 @@ +12,16, 19,36, 40,28, 36,75, 76,55, 72,146, 142,110, 192,243, 459,401 diff --git a/examples/onnxrt/object_detection/ssd_mobilenet_v1/quantization/ptq_static/README.md b/examples/onnxrt/object_detection/ssd_mobilenet_v1/quantization/ptq_static/README.md new file mode 100644 index 00000000000..3f312bb706b --- /dev/null +++ b/examples/onnxrt/object_detection/ssd_mobilenet_v1/quantization/ptq_static/README.md @@ -0,0 +1,71 @@ +Step-by-Step +============ + +This example load an object detection model converted from Tensorflow and confirm its accuracy and speed based on [MS COCO 2017 dataset](https://cocodataset.org/#download). + +# Prerequisite + +## 1. Environment + +```shell +pip install neural-compressor +pip install -r requirements.txt +``` +> Note: Validated ONNX Runtime [Version](/docs/source/installation_guide.md#validated-software-environment). + +## 2. Prepare Model +Please refer to [Converting SSDMobilenet To ONNX Tutorial](https://github.com/onnx/tensorflow-onnx/blob/master/tutorials/ConvertingSSDMobilenetToONNX.ipynb) for detailed model converted. The following is a simple example command: + +```shell +export MODEL=ssd_mobilenet_v1_coco_2018_01_28 +wget http://download.tensorflow.org/models/object_detection/$MODEL.tar.gz +tar -xvf $MODEL.tar.gz + +python -m tf2onnx.convert --graphdef $MODEL/frozen_inference_graph.pb --output ./$MODEL.onnx --fold_const --opset 11 --inputs image_tensor:0 --outputs num_detections:0,detection_boxes:0,detection_scores:0,detection_classes:0 +``` + +## 3. Prepare Dataset + +Download [MS COCO 2017 dataset](https://cocodataset.org/#download). + +Dataset directories: + +```bash +coco2017 +├── annotations +| ├── instances_val2017.json +| └── ... +├── test2017 +├── train2017 +└── val2017 +``` + +# Run + +## 1. Quantization + +Static quantization with QOperator format: + +```bash +bash run_tuning.sh --input_model=path/to/model \ # model path as *.onnx + --output_model=path/to/save \ # model path as *.onnx + --dataset_location=path/to/coco2017 \ # dataset path containing 'val2017' and 'annotations' folders + --quant_format="QOperator" +``` + +Static quantization with QDQ format: + +```bash +bash run_tuning.sh --input_model=path/to/model \ # model path as *.onnx + --output_model=path/to/save \ # model path as *.onnx + --dataset_location=path/to/coco2017 \ # dataset path containing 'val2017' and 'annotations' folders + --quant_format="QDQ" +``` + +## 2. Benchmark + +```bash +bash run_benchmark.sh --input_model=path/to/model \ # model path as *.onnx + --dataset_location=path/to/coco2017 \ # dataset path containing 'val2017' and 'annotations' folders + --mode=performance # or accuracy +``` diff --git a/examples/onnxrt/object_detection/ssd_mobilenet_v1/quantization/ptq_static/coco_label_map.py b/examples/onnxrt/object_detection/ssd_mobilenet_v1/quantization/ptq_static/coco_label_map.py new file mode 100644 index 00000000000..1e88f8abad8 --- /dev/null +++ b/examples/onnxrt/object_detection/ssd_mobilenet_v1/quantization/ptq_static/coco_label_map.py @@ -0,0 +1,103 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# + +"""The dict mapping category IDs to its names of labels.""" + +category_map = { + 1: 'person', + 2: 'bicycle', + 3: 'car', + 4: 'motorcycle', + 5: 'airplane', + 6: 'bus', + 7: 'train', + 8: 'truck', + 9: 'boat', + 10: 'traffic light', + 11: 'fire hydrant', + 13: 'stop sign', + 14: 'parking meter', + 15: 'bench', + 16: 'bird', + 17: 'cat', + 18: 'dog', + 19: 'horse', + 20: 'sheep', + 21: 'cow', + 22: 'elephant', + 23: 'bear', + 24: 'zebra', + 25: 'giraffe', + 27: 'backpack', + 28: 'umbrella', + 31: 'handbag', + 32: 'tie', + 33: 'suitcase', + 34: 'frisbee', + 35: 'skis', + 36: 'snowboard', + 37: 'sports ball', + 38: 'kite', + 39: 'baseball bat', + 40: 'baseball glove', + 41: 'skateboard', + 42: 'surfboard', + 43: 'tennis racket', + 44: 'bottle', + 46: 'wine glass', + 47: 'cup', + 48: 'fork', + 49: 'knife', + 50: 'spoon', + 51: 'bowl', + 52: 'banana', + 53: 'apple', + 54: 'sandwich', + 55: 'orange', + 56: 'broccoli', + 57: 'carrot', + 58: 'hot dog', + 59: 'pizza', + 60: 'donut', + 61: 'cake', + 62: 'chair', + 63: 'couch', + 64: 'potted plant', + 65: 'bed', + 67: 'dining table', + 70: 'toilet', + 72: 'tv', + 73: 'laptop', + 74: 'mouse', + 75: 'remote', + 76: 'keyboard', + 77: 'cell phone', + 78: 'microwave', + 79: 'oven', + 80: 'toaster', + 81: 'sink', + 82: 'refrigerator', + 84: 'book', + 85: 'clock', + 86: 'vase', + 87: 'scissors', + 88: 'teddy bear', + 89: 'hair drier', + 90: 'toothbrush' +} \ No newline at end of file diff --git a/examples/onnxrt/object_detection/ssd_mobilenet_v1/quantization/ptq_static/coco_tools.py b/examples/onnxrt/object_detection/ssd_mobilenet_v1/quantization/ptq_static/coco_tools.py new file mode 100644 index 00000000000..2c57fd61302 --- /dev/null +++ b/examples/onnxrt/object_detection/ssd_mobilenet_v1/quantization/ptq_static/coco_tools.py @@ -0,0 +1,672 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +"""Wrappers for third party pycocotools to be used within object_detection. +Note that nothing in this file is tensorflow related and thus cannot +be called directly as a slim metric, for example. +TODO(jonathanhuang): wrap as a slim metric in metrics.py +Usage example: given a set of images with ids in the list image_ids +and corresponding lists of numpy arrays encoding groundtruth (boxes and classes) +and detections (boxes, scores and classes), where elements of each list +correspond to detections/annotations of a single image, +then evaluation (in multi-class mode) can be invoked as follows: + groundtruth_dict = coco_tools.ExportGroundtruthToCOCO( + image_ids, groundtruth_boxes_list, groundtruth_classes_list, + max_num_classes, output_path=None) + detections_list = coco_tools.ExportDetectionsToCOCO( + image_ids, detection_boxes_list, detection_scores_list, + detection_classes_list, output_path=None) + groundtruth = coco_tools.COCOWrapper(groundtruth_dict) + detections = groundtruth.LoadAnnotations(detections_list) + evaluator = coco_tools.COCOEvalWrapper(groundtruth, detections, + agnostic_mode=False) + metrics = evaluator.ComputeMetrics() +""" + +import copy +import time + +import numpy as np + +from collections import OrderedDict +from neural_compressor.utils import logger +from pycocotools import coco +from pycocotools import cocoeval +from pycocotools import mask +from typing import Any, Dict, List, Set, Union + + +class COCOWrapper(coco.COCO): + """Wrapper for the pycocotools COCO class. + + Attributes: + dataset: a dictionary holding bounding box annotations in the COCO format. + detection_type: type of detections being wrapped. Can be one of ['bbox', + 'segmentation'] + """ + + def __init__(self, dataset: Dict[str, Any], detection_type: str = 'bbox'): + """Construct a COCOWrapper. + See http://mscoco.org/dataset/#format for a description of the format. + By default, the coco.COCO class constructor reads from a JSON file. + This function duplicates the same behavior but loads from a dictionary, + allowing us to perform evaluation without writing to external storage. + Args: + dataset: a dictionary holding bounding box annotations in the COCO format. + detection_type: type of detections being wrapped. Can be one of ['bbox', + 'segmentation'] + Raises: + ValueError: if detection_type is unsupported. + """ + supported_detection_types = ['bbox', 'segmentation'] + if detection_type not in supported_detection_types: + raise ValueError('Unsupported detection type: {}. ' + 'Supported values are: {}'.format( + detection_type, supported_detection_types)) + self._detection_type = detection_type + coco.COCO.__init__(self) + self.dataset = dataset + self.createIndex() + + def LoadAnnotations(self, annotations: list) -> coco.COCO: + """Load annotations dictionary into COCO datastructure. + See http://mscoco.org/dataset/#format for a description of the annotations + format. As above, this function replicates the default behavior of the API + but does not require writing to external storage. + Args: + annotations: python list holding object detection results where each + detection is encoded as a dict with required keys ['image_id', + 'category_id', 'score'] and one of ['bbox', 'segmentation'] based on + `detection_type`. + Returns: + a coco.COCO datastructure holding object detection annotations results + Raises: + ValueError: if (1) annotations is not a list or annotations do not + correspond to the images contained in self. + """ + results = coco.COCO() + results.dataset['images'] = [img for img in self.dataset['images']] + + logger.info("Load and prepare annotation results.") + tic = time.time() + + if not isinstance(annotations, list): + raise ValueError('annotations is not a list of objects') + annotation_img_ids = [ann['image_id'] for ann in annotations] + if (set(annotation_img_ids) != (set(annotation_img_ids) + & set(self.getImgIds()))): + raise ValueError('Results do not correspond to current coco set') + results.dataset['categories'] = copy.deepcopy( + self.dataset['categories']) + if self._detection_type == 'bbox': + for idx, ann in enumerate(annotations): + bb = ann['bbox'] + ann['area'] = bb[2] * bb[3] + ann['id'] = idx + 1 + ann['iscrowd'] = 0 + elif self._detection_type == 'segmentation': + for idx, ann in enumerate(annotations): + ann['area'] = mask.area(ann['segmentation']) + ann['bbox'] = mask.toBbox(ann['segmentation']) + ann['id'] = idx + 1 + ann['iscrowd'] = 0 + logger.info('DONE (t=%0.2fs)', (time.time() - tic)) + + results.dataset['annotations'] = annotations + results.createIndex() + return results + + +class COCOEvalWrapper(cocoeval.COCOeval): + """Wrapper for the pycocotools COCOeval class. + To evaluate, create two objects (groundtruth_dict and detections_list) + using the conventions listed at http://mscoco.org/dataset/#format. + Then call evaluation as follows: + groundtruth = coco_tools.COCOWrapper(groundtruth_dict) + detections = groundtruth.LoadAnnotations(detections_list) + evaluator = coco_tools.COCOEvalWrapper(groundtruth, detections, + agnostic_mode=False) + metrics = evaluator.ComputeMetrics() + """ + + def __init__(self, + groundtruth: coco.COCO = None, + detections: coco.COCO = None, + agnostic_mode = False, + iou_type: str = 'bbox', + iou_thrs: Union[str, float] = None, + map_points=None): + """Construct a COCOEvalWrapper. + Note that for the area-based metrics to be meaningful, detection and + groundtruth boxes must be in image coordinates measured in pixels. + Args: + groundtruth: a coco.COCO (or coco_tools.COCOWrapper) object holding + groundtruth annotations + detections: a coco.COCO (or coco_tools.COCOWrapper) object holding + detections + agnostic_mode: boolean (default: False). If True, evaluation ignores + class labels, treating all detections as proposals. + iou_thrs: Minimal value for intersection over union that allows to + make decision that prediction bounding box is true positive. + You can specify one float value between 0 to 1 or + string "05:0.05:0.95" for standard COCO thresholds. + iou_type: IOU type to use for evaluation. Supports `bbox` or `segm`. + map_points: The way to calculate mAP. 101 for 101-point interpolated AP, 11 for + 11-point interpolated AP, 0 for area under PR curve. + """ + cocoeval.COCOeval.__init__(self, + groundtruth, + detections, + iouType=iou_type) + if agnostic_mode: + self.params.useCats = 0 + if iou_thrs == '0.5:0.05:0.95': + self.params.iouThrs = np.linspace(.5, 0.95, int(np.round((0.95 - .5) / .05)) + 1, \ + endpoint=True) + elif isinstance(iou_thrs, float): + self.params.iouThrs = [iou_thrs] + + if map_points == 101: + self.params.recThrs = np.linspace(.0, 1.00, int(np.round((1.00 - .0) / .01)) + 1, \ + endpoint=True) + if map_points == 11: + self.params.recThrs = np.linspace(.0, 1.00, int(np.round((1.00 - .0) / .1)) + 1, \ + endpoint=True) + if map_points == 0: + self.params.recThrs = [-1] + + + def GetCategory(self, category_id: int) -> dict: + """Fetch dictionary holding category information given category id. + Args: + category_id: integer id + Returns: + dictionary holding 'id', 'name'. + """ + return self.cocoGt.cats[category_id] + + def GetAgnosticMode(self) -> bool: + """Return whether COCO Eval is configured to evaluate in agnostic mode.""" + return self.params.useCats == 0 + + def GetCategoryIdList(self) -> List[int]: + """Return the list of IDs of all valid categories.""" + return self.params.catIds + + def accumulate(self, p: cocoeval.Params = None): + """Accumulate evaluation results per image and store it to self.eval. + + Args: + p: input params for evaluation + """ + print('Accumulating evaluation results...') + tic = time.time() + if not self.evalImgs: + print('Please run evaluate() first') + # allows input customized parameters + if p is None: + p = self.params + p.catIds = p.catIds if p.useCats == 1 else [-1] + T = len(p.iouThrs) + R = len(p.recThrs) + K = len(p.catIds) if p.useCats else 1 + A = len(p.areaRng) + M = len(p.maxDets) + precision = -np.ones((T,R,K,A,M)) # -1 for the precision of absent categories + recall = -np.ones((T,K,A,M)) + scores = -np.ones((T,R,K,A,M)) + + # create dictionary for future indexing + _pe = self._paramsEval + print('-pe', _pe) + catIds = _pe.catIds if _pe.useCats else [-1] + setK = set(catIds) + setA = set(map(tuple, _pe.areaRng)) + setM = set(_pe.maxDets) + setI = set(_pe.imgIds) + # get inds to evaluate + k_list = [n for n, k in enumerate(p.catIds) if k in setK] + m_list = [m for n, m in enumerate(p.maxDets) if m in setM] + a_list = [n for n, a in enumerate(map(lambda x: tuple(x), p.areaRng)) if a in setA] + i_list = [n for n, i in enumerate(p.imgIds) if i in setI] + I0 = len(_pe.imgIds) + A0 = len(_pe.areaRng) + # retrieve E at each category, area range, and max number of detections + for k, k0 in enumerate(k_list): + Nk = k0*A0*I0 + for a, a0 in enumerate(a_list): + Na = a0*I0 + for m, maxDet in enumerate(m_list): + E = [self.evalImgs[Nk + Na + i] for i in i_list] + E = [e for e in E if not e is None] + if len(E) == 0: continue + dtScores = np.concatenate([e['dtScores'][0:maxDet] for e in E]) + + # different sorting method generates slightly different results. + # mergesort is used to be consistent as Matlab implementation. + inds = np.argsort(-dtScores, kind='mergesort') + dtScoresSorted = dtScores[inds] + + dtm = np.concatenate([e['dtMatches'][:,0:maxDet] for e in E], axis=1)[:,inds] + dtIg = np.concatenate([e['dtIgnore'][:,0:maxDet] for e in E], axis=1)[:,inds] + gtIg = np.concatenate([e['gtIgnore'] for e in E]) + npig = np.count_nonzero(gtIg==0 ) + if npig == 0: continue + tps = np.logical_and( dtm, np.logical_not(dtIg) ) + fps = np.logical_and(np.logical_not(dtm), np.logical_not(dtIg) ) + + tp_sum = np.cumsum(tps, axis=1).astype(dtype=np.float32) + fp_sum = np.cumsum(fps, axis=1).astype(dtype=np.float32) + for t, (tp, fp) in enumerate(zip(tp_sum, fp_sum)): + tp = np.array(tp) + fp = np.array(fp) + nd = len(tp) + rc = tp / npig + pr = tp / (fp+tp+np.spacing(1)) + + # calculate precision + if R == 1: + rc = np.concatenate(([0.], rc, [1.])) + pr = np.concatenate(([0.], pr, [0.])) + + # compute the precision envelope + for i in range(pr.size - 1, 0, -1): + pr[i - 1] = np.maximum(pr[i - 1], pr[i]) + + # to calculate area under PR curve, look for points + # where X axis (recall) changes value + change_point = np.where(rc[1:] != rc[:-1])[0] + # and sum (\Delta recall) * recall + res = np.sum((rc[change_point + 1] - rc[change_point]) \ + * pr[change_point + 1]) + precision[t,:,k,a,m] = np.array([res]) + else: + q = np.zeros((R,)) + + # numpy is slow without cython optimization for accessing elements + # use python array gets significant speed improvement + pr = pr.tolist(); q = q.tolist() + + for i in range(nd-1, 0, -1): + if pr[i] > pr[i-1]: + pr[i-1] = pr[i] + + inds = np.searchsorted(rc, p.recThrs, side='left') + try: + for ri, pi in enumerate(inds): + q[ri] = pr[pi] + except: + pass + precision[t,:,k,a,m] = np.array(q) + + # calculate recall + if nd: + recall[t,k,a,m] = rc[-1] + else: + recall[t,k,a,m] = 0 + + # calculate score + ss = np.zeros((R,)) + inds = np.searchsorted(rc, p.recThrs, side='left') + try: + for ri, pi in enumerate(inds): + ss[ri] = dtScoresSorted[pi] + except: + pass + scores[t,:,k,a,m] = np.array(ss) + # exit(0) + self.eval = { + 'params': p, + 'counts': [T, R, K, A, M], + 'precision': precision, + 'recall': recall, + 'scores': scores, + } + toc = time.time() + print('DONE (t={:0.2f}s).'.format( toc-tic)) + + + def ComputeMetrics(self, + include_metrics_per_category: bool = False, + all_metrics_per_category: bool = False): # pragma: no cover + """Compute detection metrics. + Args: + include_metrics_per_category: Whether include metrics per category. + all_metrics_per_category: Whether include all the summery metrics for + each category in per_category_ap. Be careful with setting it to true if + you have more than handful of categories, because it will pollute + your mldash. + Returns: + A tuple of (summary_metrics, per_category_ap), in which + (1) summary_metrics is a dictionary holding: + 'Precision/mAP': mean average precision over classes averaged over IOU + thresholds ranging from .5 to .95 with .05 increments; + 'Precision/mAP@.50IOU': mean average precision at 50% IOU; + 'Precision/mAP@.75IOU': mean average precision at 75% IOU; + 'Precision/mAP (small)': mean average precision for small objects + (area < 32^2 pixels); + 'Precision/mAP (medium)': mean average precision for medium sized + objects (32^2 pixels < area < 96^2 pixels); + 'Precision/mAP (large)': mean average precision for large objects + (96^2 pixels < area < 10000^2 pixels); + 'Recall/AR@1': average recall with 1 detection; + 'Recall/AR@10': average recall with 10 detections; + 'Recall/AR@100': average recall with 100 detections; + 'Recall/AR@100 (small)': average recall for small objects with 100 + detections; + 'Recall/AR@100 (medium)': average recall for medium objects with 100 + detections; + 'Recall/AR@100 (large)': average recall for large objects with 100 + detections; + and (2) per_category_ap is a dictionary holding category specific results with + keys of the form: 'Precision mAP ByCategory/category' + (without the supercategory part if no supercategories exist). + For backward compatibility 'PerformanceByCategory' is included in the + output regardless of all_metrics_per_category. If evaluating class-agnostic + mode, per_category_ap is an empty dictionary. + Raises: + ValueError: If category_stats does not exist. + """ + self.evaluate() + self.accumulate() + self.summarize() + + summary_metrics = OrderedDict([ + ('Precision/mAP', self.stats[0]), + ('Precision/mAP@.50IOU', self.stats[1]), + ('Precision/mAP@.75IOU', self.stats[2]), + ('Precision/mAP (small)', self.stats[3]), + ('Precision/mAP (medium)', self.stats[4]), + ('Precision/mAP (large)', self.stats[5]), + ('Recall/AR@1', self.stats[6]), ('Recall/AR@10', self.stats[7]), + ('Recall/AR@100', self.stats[8]), + ('Recall/AR@100 (small)', self.stats[9]), + ('Recall/AR@100 (medium)', self.stats[10]), + ('Recall/AR@100 (large)', self.stats[11]) + ]) + if not include_metrics_per_category: + return summary_metrics, {} + if not hasattr(self, 'category_stats'): + raise ValueError('Category stats do not exist') + per_category_ap = OrderedDict([]) + if self.GetAgnosticMode(): + return summary_metrics, per_category_ap + for category_index, category_id in enumerate(self.GetCategoryIdList()): + category = self.GetCategory(category_id)['name'] + # Kept for backward compatilbility + # pylint: disable=no-member + per_category_ap['PerformanceByCategory/mAP/{}'.format( + category)] = self.category_stats[0][category_index] + if all_metrics_per_category: + per_category_ap['Precision mAP ByCategory/{}'.format( + category)] = self.category_stats[0][category_index] + per_category_ap['Precision mAP@.50IOU ByCategory/{}'.format( + category)] = self.category_stats[1][category_index] + per_category_ap['Precision mAP@.75IOU ByCategory/{}'.format( + category)] = self.category_stats[2][category_index] + per_category_ap['Precision mAP (small) ByCategory/{}'.format( + category)] = self.category_stats[3][category_index] + per_category_ap['Precision mAP (medium) ByCategory/{}'.format( + category)] = self.category_stats[4][category_index] + per_category_ap['Precision mAP (large) ByCategory/{}'.format( + category)] = self.category_stats[5][category_index] + per_category_ap['Recall AR@1 ByCategory/{}'.format( + category)] = self.category_stats[6][category_index] + per_category_ap['Recall AR@10 ByCategory/{}'.format( + category)] = self.category_stats[7][category_index] + per_category_ap['Recall AR@100 ByCategory/{}'.format( + category)] = self.category_stats[8][category_index] + per_category_ap['Recall AR@100 (small) ByCategory/{}'.format( + category)] = self.category_stats[9][category_index] + per_category_ap['Recall AR@100 (medium) ByCategory/{}'.format( + category)] = self.category_stats[10][category_index] + per_category_ap['Recall AR@100 (large) ByCategory/{}'.format( + category)] = self.category_stats[11][category_index] + + return summary_metrics, per_category_ap + + +def _ConvertBoxToCOCOFormat(box): + """Convert a box in [ymin, xmin, ymax, xmax] format to COCO format. + This is a utility function for converting from our internal + [ymin, xmin, ymax, xmax] convention to the convention used by the COCO API + i.e., [xmin, ymin, width, height]. + Args: + box: a numpy array in format of [ymin, xmin, ymax, xmax] + Returns: + A list of floats, in COCO format, representing [xmin, ymin, width, height] + """ + return [ + float(box[1]), + float(box[0]), + float(box[3] - box[1]), + float(box[2] - box[0]) + ] + + +def _RleCompress(masks): + """Compresses mask using Run-length encoding provided by pycocotools. + Args: + masks: uint8 numpy array of shape [mask_height, mask_width] with values in + {0, 1}. + Returns: + A pycocotools Run-length encoding of the mask. + """ + return mask.encode(np.asfortranarray(masks)) + + +def ExportSingleImageGroundtruthToCoco(image_id: Union[int, str], + next_annotation_id: int, + category_id_set: Set[str], + groundtruth_boxes: np.array, + groundtruth_classes: np.array, + groundtruth_masks: Union[np.array, None] = None, + groundtruth_is_crowd: Union[np.array, None] = None) -> list: + """Export groundtruth of a single image to COCO format. + This function converts groundtruth detection annotations represented as numpy + arrays to dictionaries that can be ingested by the COCO evaluation API. Note + that the image_ids provided here must match the ones given to + ExportSingleImageDetectionsToCoco. We assume that boxes and classes are in + correspondence - that is: groundtruth_boxes[i, :], and + groundtruth_classes[i] are associated with the same groundtruth annotation. + In the exported result, "area" fields are always set to the area of the + groundtruth bounding box. + Args: + image_id: a unique image identifier either of type integer or string. + next_annotation_id: integer specifying the first id to use for the + groundtruth annotations. All annotations are assigned a continuous integer + id starting from this value. + category_id_set: A set of valid class ids. Groundtruth with classes not in + category_id_set are dropped. + groundtruth_boxes: numpy array (float32) with shape [num_gt_boxes, 4] + groundtruth_classes: numpy array (int) with shape [num_gt_boxes] + groundtruth_masks: optional uint8 numpy array of shape [num_detections, + image_height, image_width] containing detection_masks. + groundtruth_is_crowd: optional numpy array (int) with shape [num_gt_boxes] + indicating whether groundtruth boxes are crowd. + Returns: + A list of groundtruth annotations for a single image in the COCO format. + Raises: + ValueError: if (1) groundtruth_boxes and groundtruth_classes do not have the + right lengths or (2) if each of the elements inside these lists do not + have the correct shapes or (3) if image_ids are not integers + """ + if len(groundtruth_classes.shape) != 1: + raise ValueError('groundtruth_classes is ' 'expected to be of rank 1.') + if len(groundtruth_boxes.shape) != 2: + raise ValueError('groundtruth_boxes is expected to be of ' 'rank 2.') + if groundtruth_boxes.shape[1] != 4: + raise ValueError('groundtruth_boxes should have ' 'shape[1] == 4.') + num_boxes = groundtruth_classes.shape[0] + if num_boxes != groundtruth_boxes.shape[0]: + raise ValueError( + 'Corresponding entries in groundtruth_classes, ' + 'and groundtruth_boxes should have ' + 'compatible shapes (i.e., agree on the 0th dimension).' + 'Classes shape: %d. Boxes shape: %d. Image ID: %s' % + (groundtruth_classes.shape[0], groundtruth_boxes.shape[0], + image_id)) + has_is_crowd = groundtruth_is_crowd is not None + if has_is_crowd and len(groundtruth_is_crowd.shape) != 1: + raise ValueError('groundtruth_is_crowd is expected to be of rank 1.') + groundtruth_list = [] + for i in range(num_boxes): + if groundtruth_classes[i] in category_id_set: + iscrowd = groundtruth_is_crowd[i] if has_is_crowd else 0 + export_dict = { + 'id': + next_annotation_id + i, + 'image_id': + image_id, + 'category_id': + int(groundtruth_classes[i]), + 'bbox': + list(_ConvertBoxToCOCOFormat(groundtruth_boxes[i, :])), + 'area': + float((groundtruth_boxes[i, 2] - groundtruth_boxes[i, 0]) * + (groundtruth_boxes[i, 3] - groundtruth_boxes[i, 1])), + 'iscrowd': + iscrowd + } + if groundtruth_masks is not None: + export_dict['segmentation'] = _RleCompress( + groundtruth_masks[i]) + groundtruth_list.append(export_dict) + return groundtruth_list + + +def ExportSingleImageDetectionBoxesToCoco(image_id: Union[int, str], + category_id_set: Set[int], + detection_boxes: np.array, + detection_scores: np.array, + detection_classes: np.array) -> list: + """Export detections of a single image to COCO format. + This function converts detections represented as numpy arrays to dictionaries + that can be ingested by the COCO evaluation API. Note that the image_ids + provided here must match the ones given to the + ExporSingleImageDetectionBoxesToCoco. We assume that boxes, and classes are in + correspondence - that is: boxes[i, :], and classes[i] + are associated with the same groundtruth annotation. + Args: + image_id: unique image identifier either of type integer or string. + category_id_set: A set of valid class ids. Detections with classes not in + category_id_set are dropped. + detection_boxes: float numpy array of shape [num_detections, 4] containing + detection boxes. + detection_scores: float numpy array of shape [num_detections] containing + scored for the detection boxes. + detection_classes: integer numpy array of shape [num_detections] containing + the classes for detection boxes. + Returns: + A list of detection annotations for a single image in the COCO format. + Raises: + ValueError: if (1) detection_boxes, detection_scores and detection_classes + do not have the right lengths or (2) if each of the elements inside these + lists do not have the correct shapes or (3) if image_ids are not integers. + """ + if len(detection_classes.shape) != 1 or len(detection_scores.shape) != 1: + raise ValueError( + 'All entries in detection_classes and detection_scores' + 'expected to be of rank 1.') + if len(detection_boxes.shape) != 2: + raise ValueError('All entries in detection_boxes expected to be of ' + 'rank 2.') + if detection_boxes.shape[1] != 4: + raise ValueError('All entries in detection_boxes should have ' + 'shape[1] == 4.') + num_boxes = detection_classes.shape[0] + if not num_boxes == detection_boxes.shape[0] == detection_scores.shape[0]: + raise ValueError( + 'Corresponding entries in detection_classes, ' + 'detection_scores and detection_boxes should have ' + 'compatible shapes (i.e., agree on the 0th dimension). ' + 'Classes shape: %d. Boxes shape: %d. ' + 'Scores shape: %d' % + (detection_classes.shape[0], detection_boxes.shape[0], + detection_scores.shape[0])) + detections_list = [] + for i in range(num_boxes): + if detection_classes[i] in category_id_set: + detections_list.append({ + 'image_id': + image_id, + 'category_id': + int(detection_classes[i]), + 'bbox': + list(_ConvertBoxToCOCOFormat(detection_boxes[i, :])), + 'score': + float(detection_scores[i]) + }) + return detections_list + + +def ExportSingleImageDetectionMasksToCoco(image_id: Union[str, int], + category_id_set: Set[int], + detection_masks: np.array, + detection_scores: np.array, + detection_classes: np.array) -> list: + """Export detection masks of a single image to COCO format. + This function converts detections represented as numpy arrays to dictionaries + that can be ingested by the COCO evaluation API. We assume that + detection_masks, detection_scores, and detection_classes are in correspondence + - that is: detection_masks[i, :], detection_classes[i] and detection_scores[i] + are associated with the same annotation. + Args: + image_id: unique image identifier either of type integer or string. + category_id_set: A set of valid class ids. Detections with classes not in + category_id_set are dropped. + detection_masks: uint8 numpy array of shape [num_detections, image_height, + image_width] containing detection_masks. + detection_scores: float numpy array of shape [num_detections] containing + scores for detection masks. + detection_classes: integer numpy array of shape [num_detections] containing + the classes for detection masks. + Returns: + A list of detection mask annotations for a single image in the COCO format. + Raises: + ValueError: if (1) detection_masks, detection_scores and detection_classes + do not have the right lengths or (2) if each of the elements inside these + lists do not have the correct shapes or (3) if image_ids are not integers. + """ + if len(detection_classes.shape) != 1 or len(detection_scores.shape) != 1: + raise ValueError( + 'All entries in detection_classes and detection_scores' + 'expected to be of rank 1.') + num_boxes = detection_classes.shape[0] + if not num_boxes == len(detection_masks) == detection_scores.shape[0]: + raise ValueError('Corresponding entries in detection_classes, ' + 'detection_scores and detection_masks should have ' + 'compatible lengths and shapes ' + 'Classes length: %d. Masks length: %d. ' + 'Scores length: %d' % + (detection_classes.shape[0], len(detection_masks), + detection_scores.shape[0])) + detections_list = [] + for i in range(num_boxes): + if detection_classes[i] in category_id_set: + detections_list.append({ + 'image_id': + image_id, + 'category_id': + int(detection_classes[i]), + 'segmentation': + _RleCompress(detection_masks[i]), + 'score': + float(detection_scores[i]) + }) + return detections_list \ No newline at end of file diff --git a/examples/onnxrt/object_detection/ssd_mobilenet_v1/quantization/ptq_static/data_utils.py b/examples/onnxrt/object_detection/ssd_mobilenet_v1/quantization/ptq_static/data_utils.py new file mode 100644 index 00000000000..31653c25bc5 --- /dev/null +++ b/examples/onnxrt/object_detection/ssd_mobilenet_v1/quantization/ptq_static/data_utils.py @@ -0,0 +1,470 @@ +import numpy as np +import collections +from PIL import Image +import os +import yaml +from pycocotools.coco import COCO +import cv2 + +class SequentialSampler(): + def __init__(self, dataset): + self.whole_dataset = dataset + + def __iter__(self): + self.process_rank = 0 # The default rank is 0, which represents the main process + self.process_size = 1 # By default, process_size=1, only the main process is running + return iter(range(self.process_rank, len(self.whole_dataset), self.process_size)) + + def __len__(self): + return len(self.whole_dataset) + +class BatchSampler(): + def __init__(self, sampler, batch_size, drop_last=True): + if isinstance(drop_last, bool): + self.drop_last = drop_last + else: + raise ValueError("last_batch only support bool as input") + + self.sampler = sampler + self.batch_size = batch_size + self.drop_last = drop_last + + def __iter__(self): + batch = [] + for idx in self.sampler: + batch.append(idx) + if len(batch) == self.batch_size: + yield batch + batch = [] + if len(batch) > 0 and not self.drop_last: + yield batch + + def __len__(self): + if self.drop_last: + return len(self.sampler) // self.batch_size + else: + return (len(self.sampler) + self.batch_size - 1) // self.batch_size + +class IndexFetcher(): + def __init__(self, dataset, collate_fn, drop_last): + self.dataset = dataset + self.collate_fn = collate_fn + self.drop_last = drop_last + + def __call__(self, batched_indices): + data = [self.dataset[idx] for idx in batched_indices] + return self.collate_fn(data) + + +def default_collate(batch): + """Merge data with outer dimension batch size.""" + elem = batch[0] + if isinstance(elem, collections.abc.Mapping): + return {key: default_collate([d[key] for d in batch]) for key in elem} + elif isinstance(elem, collections.abc.Sequence): + batch = zip(*batch) + return [default_collate(samples) for samples in batch] + elif isinstance(elem, np.ndarray): + try: + return np.stack(batch) + except: + return batch + else: + return batch + +class COCORawDataloader(): + def __init__(self, dataset, batch_size=1, last_batch='rollover', collate_fn=None, + sampler=None, batch_sampler=None, num_workers=0, pin_memory=False, + shuffle=False): + self.dataset = dataset + self.last_batch = last_batch + self.sampler = sampler + self.batch_sampler = batch_sampler + self.num_workers = num_workers + self.pin_memory = pin_memory + self.collate_fn = collate_fn + self.batch_size = batch_size + self.shuffle = shuffle + self.drop_last = False if last_batch == 'rollover' else True + if self.collate_fn == None: + self.collate_fn = default_collate + + def __iter__(self): + """Yield data in iterative order.""" + return self._generate_dataloader( + self.dataset, + batch_size=self.batch_size, + last_batch=self.last_batch, + collate_fn=self.collate_fn, + sampler=self.sampler, + batch_sampler=self.batch_sampler, + num_workers=self.num_workers, + pin_memory=self.pin_memory, + shuffle=self.shuffle) + + def _generate_dataloader(self, dataset, batch_size, last_batch, collate_fn, sampler, + batch_sampler, num_workers, pin_memory, shuffle): + + sampler = self._generate_sampler(dataset) + self.batch_sampler = BatchSampler(sampler, batch_size, self.drop_last) + self.fetcher = IndexFetcher(dataset, collate_fn, self.drop_last) + + for batched_indices in self.batch_sampler: + try: + data = self.fetcher(batched_indices) + yield data + except StopIteration: + return + + def _generate_sampler(self, dataset): + if hasattr(dataset, "__getitem__"): + self.dataset_type = 'index' + return SequentialSampler(dataset) + else: + raise ValueError("dataset type only support (index, iter)") + + +class COCORawDataset(): + """Coco raw dataset. + Please arrange data in this way: + /root/img_dir/1.jpg + /root/img_dir/2.jpg + ... + /root/img_dir/n.jpg + /root/anno_dir + Please use Resize transform when batch_size > 1 + Args: root (str): Root directory of dataset. + img_dir (str, default='val2017'): image file directory. + anno_dir (str, default='annotations/instances_val2017.json'): annotation file directory. + transform (transform object, default=None): transform to process input data. + filter (Filter objects, default=None): filter out examples according + to specific conditions. + """ + + def __init__(self, root, img_dir='val2017', \ + anno_dir='annotations/instances_val2017.json', transform=None, filter=None): + """Initialize the attributes of class.""" + self.batch_size = 1 + self.image_list = [] + self.transform = transform + img_path = os.path.join(root, img_dir) + anno_path = os.path.join(root, anno_dir) + coco = COCO(anno_path) + img_ids = coco.getImgIds() + cat_ids = coco.getCatIds() + for idx, img_id in enumerate(img_ids): + img_info = {} + bboxes = [] + labels = [] + ids = [] + img_detail = coco.loadImgs(img_id)[0] + ids.append(img_detail['file_name'].encode('utf-8')) + pic_height = img_detail['height'] + pic_width = img_detail['width'] + + ann_ids = coco.getAnnIds(imgIds=img_id,catIds=cat_ids) + anns = coco.loadAnns(ann_ids) + for ann in anns: + bbox = ann['bbox'] + if len(bbox) == 0: + continue + bbox = [bbox[0]/float(pic_width), bbox[1]/float(pic_height),\ + bbox[2]/float(pic_width), bbox[3]/float(pic_height)] + bboxes.append([bbox[1], bbox[0], bbox[1]+bbox[3], bbox[0]+bbox[2]]) + labels.append(coco.cats[ann['category_id']]['name'].encode('utf8')) + img_file = os.path.join(img_path, img_detail['file_name']) + if not os.path.exists(img_file) or len(bboxes) == 0: + continue + + if filter and not filter(None, bboxes): + continue + + with Image.open(img_file) as image: + image = np.array(image.convert('RGB')) + self.image_list.append( + (image, [np.array(bboxes), np.array(labels), np.array([]),\ + np.array(img_detail['file_name'].encode('utf-8'))])) + + def __len__(self): + """Length of the dataset.""" + return len(self.image_list) + + def __getitem__(self, index): + """Magic method. + x[i] is roughly equivalent to type(x).__getitem__(x, index) + """ + sample = self.image_list[index] + if self.transform is not None: + sample= self.transform(sample) + return sample + +interpolation_map = { + 'nearest': cv2.INTER_NEAREST, + 'bilinear': cv2.INTER_LINEAR, + 'bicubic': cv2.INTER_CUBIC, +} + +class ResizeTransform(): + def __init__(self, size, interpolation='bilinear'): + if isinstance(size, int): + self.size = size, size + elif isinstance(size, list): + if len(size) == 1: + self.size = size[0], size[0] + elif len(size) == 2: + self.size = size[0], size[1] + + if interpolation in interpolation_map.keys(): + self.interpolation = interpolation_map[interpolation] + else: + raise ValueError("Undefined interpolation type") + + def __call__(self, sample): + image, label = sample + image = cv2.resize(image, self.size, interpolation=self.interpolation) + if len(image.shape) == 2: + image = np.expand_dims(image, -1) + return (image, label) + +class RescaleTransform(): + """Scale the values of image to [0,1]. + Returns: + tuple of processed image and label + """ + + def __call__(self, sample): + """Scale the values of the image in sample.""" + image, label = sample + if isinstance(image, np.ndarray): + image = image.astype('float32') / 255. + return (image, label) + +class NormalizeTransform(): + def __init__(self, mean=[0.0], std=[1.0]): + self.mean = mean + self.std = std + for item in self.std: + if item < 10**-6: + raise ValueError("Std should be greater than 0") + + def __call__(self, sample): + image, label = sample + assert len(self.mean) == image.shape[-1], 'Mean channel must match image channel' + image = (image - self.mean) / self.std + return (image, label) + +class TransposeTransform(): + def __init__(self, perm): + self.perm = perm + + def __call__(self, sample): + image, label = sample + assert len(image.shape) == len(self.perm), "Image rank doesn't match Perm rank" + image = np.transpose(image, axes=self.perm) + return (image, label) + +np_dtype_map = {'int8': np.int8, 'uint8': np.uint8, 'complex64': np.complex64, + 'uint16': np.uint16, 'int32': np.int32, 'uint32': np.uint32, + 'int64': np.int64, 'uint64': np.uint64, 'float32': np.float32, + 'float16': np.float16, 'float64': np.float64, 'bool': bool, + 'string': str, 'complex128': np.complex128, 'int16': np.int16} + +class CastTransform(): + def __init__(self, dtype='float32'): + assert dtype in np_dtype_map.keys(), 'Unknown dtype' + self.dtype = dtype + + def __call__(self, sample): + image, label = sample + image = image.astype(np_dtype_map[self.dtype]) + return (image, label) + +class ComposeTransform(): + def __init__(self, transform_list): + self.transform_list = transform_list + + def __call__(self, sample): + for transform in self.transform_list: + sample = transform(sample) + return sample + +class COCOmAPv2(): + """Compute mean average precision of the detection task.""" + + def __init__(self, + anno_path=None, + iou_thrs='0.5:0.05:0.95', + map_points=101, + map_key='DetectionBoxes_Precision/mAP', + output_index_mapping={'num_detections':-1, 'boxes':0, 'scores':1, 'classes':2}): + """Initialize the metric. + Args: + anno_path: The path of annotation file. + iou_thrs: Minimal value for intersection over union that allows to make decision + that prediction bounding box is true positive. You can specify one float value + between 0 to 1 or string "05:0.05:0.95" for standard COCO thresholds. + map_points: The way to calculate mAP. 101 for 101-point interpolated AP, 11 for + 11-point interpolated AP, 0 for area under PR curve. + map_key: The key that mapping to pycocotools COCOeval. + Defaults to 'DetectionBoxes_Precision/mAP'. + output_index_mapping: The output index mapping. + Defaults to {'num_detections':-1, 'boxes':0, 'scores':1, 'classes':2}. + """ + self.output_index_mapping = output_index_mapping + from coco_label_map import category_map + if anno_path: + assert os.path.exists(anno_path), 'Annotation path does not exists!' + with open(anno_path, 'r') as f: + label_map = yaml.safe_load(f.read()) + self.category_map_reverse = {k: v for k,v in label_map.items()} + else: + # label: index + self.category_map_reverse = {v: k for k, v in category_map.items()} + self.image_ids = [] + self.ground_truth_list = [] + self.detection_list = [] + self.annotation_id = 1 + self.category_map = category_map + self.category_id_set = set( + [cat for cat in self.category_map]) #index + self.iou_thrs = iou_thrs + self.map_points = map_points + self.map_key = map_key + + def update(self, predicts, labels, sample_weight=None): + """Add the predictions and labels. + Args: + predicts: The predictions. + labels: The labels corresponding to the predictions. + sample_weight: The sample weight. Defaults to None. + """ + from coco_tools import ExportSingleImageGroundtruthToCoco,\ + ExportSingleImageDetectionBoxesToCoco + detections = [] + if 'num_detections' in self.output_index_mapping and \ + self.output_index_mapping['num_detections'] > -1: + for item in zip(*predicts): + detection = {} + num = int(item[self.output_index_mapping['num_detections']]) + detection['boxes'] = np.asarray( + item[self.output_index_mapping['boxes']])[0:num] + detection['scores'] = np.asarray( + item[self.output_index_mapping['scores']])[0:num] + detection['classes'] = np.asarray( + item[self.output_index_mapping['classes']])[0:num] + detections.append(detection) + else: + for item in zip(*predicts): + detection = {} + detection['boxes'] = np.asarray(item[self.output_index_mapping['boxes']]) + detection['scores'] = np.asarray(item[self.output_index_mapping['scores']]) + detection['classes'] = np.asarray(item[self.output_index_mapping['classes']]) + detections.append(detection) + + bboxes, str_labels,int_labels, image_ids = labels + labels = [] + if len(int_labels[0]) == 0: + for str_label in str_labels: + str_label = [ + x if type(x) == 'str' else x.decode('utf-8') + for x in str_label + ] + labels.append([self.category_map_reverse[x] for x in str_label]) + elif len(str_labels[0]) == 0: + for int_label in int_labels: + labels.append([x for x in int_label]) + + for idx, image_id in enumerate(image_ids): + image_id = image_id if type( + image_id) == 'str' else image_id.decode('utf-8') + if image_id in self.image_ids: + continue + self.image_ids.append(image_id) + + ground_truth = {} + ground_truth['boxes'] = np.asarray(bboxes[idx]) + ground_truth['classes'] = np.asarray(labels[idx]) + + self.ground_truth_list.extend( + ExportSingleImageGroundtruthToCoco( + image_id=image_id, + next_annotation_id=self.annotation_id, + category_id_set=self.category_id_set, + groundtruth_boxes=ground_truth['boxes'], + groundtruth_classes=ground_truth['classes'])) + self.annotation_id += ground_truth['boxes'].shape[0] + + self.detection_list.extend( + ExportSingleImageDetectionBoxesToCoco( + image_id=image_id, + category_id_set=self.category_id_set, + detection_boxes=detections[idx]['boxes'], + detection_scores=detections[idx]['scores'], + detection_classes=detections[idx]['classes'])) + + def reset(self): + """Reset the prediction and labels.""" + self.image_ids = [] + self.ground_truth_list = [] + self.detection_list = [] + self.annotation_id = 1 + + def result(self): + """Compute mean average precision. + Returns: + The mean average precision score. + """ + from coco_tools import COCOWrapper, COCOEvalWrapper + if len(self.ground_truth_list) == 0: + return 0 + else: + groundtruth_dict = { + 'annotations': + self.ground_truth_list, + 'images': [{ + 'id': image_id + } for image_id in self.image_ids], + 'categories': [{ + 'id': k, + 'name': v + } for k, v in self.category_map.items()] + } + coco_wrapped_groundtruth = COCOWrapper(groundtruth_dict) + coco_wrapped_detections = coco_wrapped_groundtruth.LoadAnnotations( + self.detection_list) + box_evaluator = COCOEvalWrapper(coco_wrapped_groundtruth, + coco_wrapped_detections, + agnostic_mode=False, + iou_thrs = self.iou_thrs, + map_points = self.map_points) + box_metrics, box_per_category_ap = box_evaluator.ComputeMetrics( + include_metrics_per_category=False, all_metrics_per_category=False) + box_metrics.update(box_per_category_ap) + box_metrics = { + 'DetectionBoxes_' + key: value + for key, value in iter(box_metrics.items()) + } + + return box_metrics[self.map_key] + +class Post: + def __call__(self, sample): + preds, labels = sample + preds[0][0][:, [0, 1, 2, 3]] = preds[0][0][:, [1, 0, 3, 2]] + return preds, labels + +class LabelBalanceCOCORawFilter(object): + """The label balance filter for COCO raw data.""" + + def __init__(self, size=1): + """Initialize the attribute of class.""" + self.size = size + + def __call__(self, image, label): + """Execute the filter. + + Args: + image: Not used. + label: label of a sample. + """ + return len(label) == self.size \ No newline at end of file diff --git a/examples/onnxrt/object_detection/ssd_mobilenet_v1/quantization/ptq_static/main.py b/examples/onnxrt/object_detection/ssd_mobilenet_v1/quantization/ptq_static/main.py new file mode 100644 index 00000000000..d5c2c7b1810 --- /dev/null +++ b/examples/onnxrt/object_detection/ssd_mobilenet_v1/quantization/ptq_static/main.py @@ -0,0 +1,151 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# pylint:disable=redefined-outer-name,logging-format-interpolation + +import logging +import argparse + +import onnx +import onnxruntime as ort +import numpy as np + +from data_utils import COCORawDataloader, COCORawDataset, COCOmAPv2 +from data_utils import ComposeTransform, ResizeTransform, LabelBalanceCOCORawFilter + +logger = logging.getLogger(__name__) +logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s - %(message)s', + datefmt = '%m/%d/%Y %H:%M:%S', + level = logging.WARN) +logger.info("Evaluating ONNXRuntime full precision accuracy and performance:") +parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter +) +parser.add_argument( + '--model_path', + type=str, + help="Pre-trained model on onnx file" +) +parser.add_argument( + '--data_path', + type=str, + help="path to dataset" +) +parser.add_argument( + '--benchmark', + action='store_true', \ + default=False +) +parser.add_argument( + '--tune', + action='store_true', \ + default=False, + help="whether quantize the model" +) +parser.add_argument( + '--config', + type=str, + help="config yaml path" +) +parser.add_argument( + '--output_model', + type=str, + help="output model path" +) +parser.add_argument( + '--mode', + type=str, + help="benchmark mode of performance or accuracy" +) +parser.add_argument( + '--quant_format', + type=str, + choices=['QOperator', 'QDQ'], + help="quantization format" +) +parser.add_argument( + '--batch_size', + type=int, + default=16, + help="quantization format" +) +args = parser.parse_args() + +if __name__ == "__main__": + model = onnx.load(args.model_path) + transform = ComposeTransform([ResizeTransform(size=300)]) + filter = LabelBalanceCOCORawFilter() + eval_dataset = COCORawDataset(args.data_path, transform=transform) + calib_dataset = COCORawDataset(args.data_path, transform=transform, filter=filter) + eval_dataloader = COCORawDataloader(eval_dataset, batch_size=args.batch_size) + calib_dataloader = COCORawDataloader(calib_dataset, 1) + metric = COCOmAPv2(output_index_mapping={'num_detections': 0, + 'boxes': 1, + 'scores': 2, + 'classes': 3}) + + def eval_func(model): + metric.reset() + session = ort.InferenceSession(model.SerializeToString(), + providers=ort.get_available_providers()) + ort_inputs = {} + len_inputs = len(session.get_inputs()) + inputs_names = [session.get_inputs()[i].name for i in range(len_inputs)] + for idx, (inputs, labels) in enumerate(eval_dataloader): + if not isinstance(labels, list): + labels = [labels] + if len_inputs == 1: + ort_inputs.update( + inputs if isinstance(inputs, dict) else {inputs_names[0]: inputs} + ) + else: + assert len_inputs == len(inputs), 'number of input tensors must align with graph inputs' + if isinstance(inputs, dict): + ort_inputs.update(inputs) + else: + for i in range(len_inputs): + if not isinstance(inputs[i], np.ndarray): + ort_inputs.update({inputs_names[i]: np.array(inputs[i])}) + else: + ort_inputs.update({inputs_names[i]: inputs[i]}) + predictions = session.run(None, ort_inputs) + metric.update(predictions, labels) + return metric.result() + + if args.benchmark: + if args.mode == 'performance': + from neural_compressor.benchmark import fit + from neural_compressor.config import BenchmarkConfig + conf = BenchmarkConfig(iteration=100, + cores_per_instance=4, + num_of_instance=1) + fit(model, conf, b_dataloader=eval_dataloader) + elif args.mode == 'accuracy': + acc_result = eval_func(model) + print("Batch size = %d" % args.batch_size) + print("Accuracy: %.5f" % acc_result) + + if args.tune: + from neural_compressor import quantization + from neural_compressor.config import AccuracyCriterion, PostTrainingQuantConfig + accuracy_criterion = AccuracyCriterion() + accuracy_criterion.absolute = 0.01 + config = PostTrainingQuantConfig(approach='static', + accuracy_criterion=accuracy_criterion, + quant_format=args.quant_format, + calibration_sampling_size=[50]) + q_model = quantization.fit(model, config, calib_dataloader=calib_dataloader, eval_func=eval_func) + q_model.save(args.output_model) \ No newline at end of file diff --git a/examples/onnxrt/object_detection/ssd_mobilenet_v1/quantization/ptq_static/readme.md b/examples/onnxrt/object_detection/ssd_mobilenet_v1/quantization/ptq_static/readme.md new file mode 100644 index 00000000000..5deda5a02d1 --- /dev/null +++ b/examples/onnxrt/object_detection/ssd_mobilenet_v1/quantization/ptq_static/readme.md @@ -0,0 +1,47 @@ +# Evaluate performance of ONNX Runtime(SSD Mobilenet v1) +>ONNX runtime quantization is under active development. please use 1.6.0+ to get more quantization support. + +This example load an object detection model converted from Tensorflow and confirm its accuracy and speed based on [MS COCO 2017 dataset](https://cocodataset.org/#download). You need to download this dataset yourself. + +### Environment +onnx: 1.9.0 +onnxruntime: 1.10.0 + +### Prepare model +Please refer to [Converting SSDMobilenet To ONNX Tutorial](https://github.com/onnx/tensorflow-onnx/blob/master/tutorials/ConvertingSSDMobilenetToONNX.ipynb) for detailed model converted. The following is a simple example command: + +```shell +export MODEL=ssd_mobilenet_v1_coco_2018_01_28 +wget http://download.tensorflow.org/models/object_detection/$MODEL.tar.gz +tar -xvf $MODEL.tar.gz + +python -m tf2onnx.convert --graphdef $MODEL/frozen_inference_graph.pb --output ./$MODEL.onnx --fold_const --opset 11 --inputs image_tensor:0 --outputs num_detections:0,detection_boxes:0,detection_scores:0,detection_classes:0 +``` + +### Quantization + +Quantize model with QLinearOps: + +```bash +bash run_tuning.sh --input_model=path/to/model \ # model path as *.onnx + --output_model=path/to/save \ + --dataset_location=path/to/coco/dataset \ + --quant_format="QOperator" +``` + +Quantize model with QDQ mode: + +```bash +bash run_tuning.sh --input_model=path/to/model \ # model path as *.onnx + --output_model=path/to/save \ + --dataset_location=path/to/coco/dataset \ + --quant_format="QDQ" +``` + +### Benchmark + +```bash +bash run_benchmark.sh --input_model=path/to/model \ # model path as *.onnx + --dataset_location=path/to/coco/dataset \ + --mode=performance # or accuracy +``` diff --git a/examples/onnxrt/object_detection/ssd_mobilenet_v1/quantization/ptq_static/requirements.txt b/examples/onnxrt/object_detection/ssd_mobilenet_v1/quantization/ptq_static/requirements.txt new file mode 100644 index 00000000000..59037141de2 --- /dev/null +++ b/examples/onnxrt/object_detection/ssd_mobilenet_v1/quantization/ptq_static/requirements.txt @@ -0,0 +1,6 @@ +onnx +onnxruntime +torch +torchvision +onnxruntime-extensions; python_version < '3.10' +pillow>=8.2.0 # not directly required, pinned by Snyk to avoid a vulnerability diff --git a/examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq/run_benchmark.sh b/examples/onnxrt/object_detection/ssd_mobilenet_v1/quantization/ptq_static/run_benchmark.sh similarity index 98% rename from examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq/run_benchmark.sh rename to examples/onnxrt/object_detection/ssd_mobilenet_v1/quantization/ptq_static/run_benchmark.sh index 30b977b471c..fc6745cc65d 100644 --- a/examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq/run_benchmark.sh +++ b/examples/onnxrt/object_detection/ssd_mobilenet_v1/quantization/ptq_static/run_benchmark.sh @@ -39,7 +39,6 @@ function run_benchmark { --data_path ${dataset_location} \ --batch_size ${batch_size} \ --benchmark - } main "$@" diff --git a/examples/onnxrt/object_detection/ssd_mobilenet_v1/quantization/ptq_static/run_tuning.sh b/examples/onnxrt/object_detection/ssd_mobilenet_v1/quantization/ptq_static/run_tuning.sh new file mode 100644 index 00000000000..e6f6075fd4c --- /dev/null +++ b/examples/onnxrt/object_detection/ssd_mobilenet_v1/quantization/ptq_static/run_tuning.sh @@ -0,0 +1,43 @@ +#!/bin/bash +set -x + +function main { + init_params "$@" + run_tuning + +} + +# init params +function init_params { + + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --output_model=*) + output_model=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + --quant_format=*) + quant_format=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_tuning +function run_tuning { + python main.py \ + --model_path ${input_model} \ + --output_model ${output_model} \ + --data_path ${dataset_location} \ + --quant_format ${quant_format} \ + --tune +} + +main "$@" diff --git a/examples/onnxrt/object_detection/ssd_mobilenet_v2/quantization/ptq_static/README.md b/examples/onnxrt/object_detection/ssd_mobilenet_v2/quantization/ptq_static/README.md new file mode 100644 index 00000000000..d783b83ea06 --- /dev/null +++ b/examples/onnxrt/object_detection/ssd_mobilenet_v2/quantization/ptq_static/README.md @@ -0,0 +1,72 @@ +Step-by-Step +============ + +This example load an object detection model converted from Tensorflow and confirm its accuracy and speed based on [MS COCO 2017 dataset](https://cocodataset.org/#download). + +# Prerequisite + +## 1. Environment + +```shell +pip install neural-compressor +pip install -r requirements.txt +``` +> Note: Validated ONNX Runtime [Version](/docs/source/installation_guide.md#validated-software-environment). + +## 2. Prepare Model + +Please refer to [Converting SSDMobilenet To ONNX Tutorial](https://github.com/onnx/tensorflow-onnx/blob/master/tutorials/ConvertingSSDMobilenetToONNX.ipynb) for detailed model converted. The following is a simple example command: + +```shell +export MODEL=ssd_mobilenet_v2_coco_2018_03_29 +wget http://download.tensorflow.org/models/object_detection/$MODEL.tar.gz +tar -xvf $MODEL.tar.gz + +python -m tf2onnx.convert --graphdef $MODEL/frozen_inference_graph.pb --output ./$MODEL.onnx --fold_const --opset 11 --inputs image_tensor:0 --outputs num_detections:0,detection_boxes:0,detection_scores:0,detection_classes:0 +``` + +## 3. Prepare Dataset + +Download [MS COCO 2017 dataset](https://cocodataset.org/#download). + +Dataset directories: + +```bash +coco2017 +├── annotations +| ├── instances_val2017.json +| └── ... +├── test2017 +├── train2017 +└── val2017 +``` + +# Run + +## 1. Quantization + +Static quantization with QOperator format: + +```bash +bash run_tuning.sh --input_model=path/to/model \ # model path as *.onnx + --output_model=path/to/save \ # model path as *.onnx + --dataset_location=path/to/coco2017 \ # dataset path containing 'val2017' and 'annotations' folders + --quant_format="QOperator" +``` + +Static quantization with QDQ format: + +```bash +bash run_tuning.sh --input_model=path/to/model \ # model path as *.onnx + --output_model=path/to/save \ # model path as *.onnx + --dataset_location=path/to/coco2017 \ # dataset path containing 'val2017' and 'annotations' folders + --quant_format="QDQ" +``` + +## 2. Benchmark + +```bash +bash run_benchmark.sh --input_model=path/to/model \ # model path as *.onnx + --dataset_location=path/to/coco2017 \ # dataset path containing 'val2017' and 'annotations' folders + --mode=performance # or accuracy +``` diff --git a/examples/onnxrt/object_detection/ssd_mobilenet_v2/quantization/ptq_static/coco_label_map.py b/examples/onnxrt/object_detection/ssd_mobilenet_v2/quantization/ptq_static/coco_label_map.py new file mode 100644 index 00000000000..1e88f8abad8 --- /dev/null +++ b/examples/onnxrt/object_detection/ssd_mobilenet_v2/quantization/ptq_static/coco_label_map.py @@ -0,0 +1,103 @@ +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# + +"""The dict mapping category IDs to its names of labels.""" + +category_map = { + 1: 'person', + 2: 'bicycle', + 3: 'car', + 4: 'motorcycle', + 5: 'airplane', + 6: 'bus', + 7: 'train', + 8: 'truck', + 9: 'boat', + 10: 'traffic light', + 11: 'fire hydrant', + 13: 'stop sign', + 14: 'parking meter', + 15: 'bench', + 16: 'bird', + 17: 'cat', + 18: 'dog', + 19: 'horse', + 20: 'sheep', + 21: 'cow', + 22: 'elephant', + 23: 'bear', + 24: 'zebra', + 25: 'giraffe', + 27: 'backpack', + 28: 'umbrella', + 31: 'handbag', + 32: 'tie', + 33: 'suitcase', + 34: 'frisbee', + 35: 'skis', + 36: 'snowboard', + 37: 'sports ball', + 38: 'kite', + 39: 'baseball bat', + 40: 'baseball glove', + 41: 'skateboard', + 42: 'surfboard', + 43: 'tennis racket', + 44: 'bottle', + 46: 'wine glass', + 47: 'cup', + 48: 'fork', + 49: 'knife', + 50: 'spoon', + 51: 'bowl', + 52: 'banana', + 53: 'apple', + 54: 'sandwich', + 55: 'orange', + 56: 'broccoli', + 57: 'carrot', + 58: 'hot dog', + 59: 'pizza', + 60: 'donut', + 61: 'cake', + 62: 'chair', + 63: 'couch', + 64: 'potted plant', + 65: 'bed', + 67: 'dining table', + 70: 'toilet', + 72: 'tv', + 73: 'laptop', + 74: 'mouse', + 75: 'remote', + 76: 'keyboard', + 77: 'cell phone', + 78: 'microwave', + 79: 'oven', + 80: 'toaster', + 81: 'sink', + 82: 'refrigerator', + 84: 'book', + 85: 'clock', + 86: 'vase', + 87: 'scissors', + 88: 'teddy bear', + 89: 'hair drier', + 90: 'toothbrush' +} \ No newline at end of file diff --git a/examples/onnxrt/object_detection/ssd_mobilenet_v2/quantization/ptq_static/coco_tools.py b/examples/onnxrt/object_detection/ssd_mobilenet_v2/quantization/ptq_static/coco_tools.py new file mode 100644 index 00000000000..2c57fd61302 --- /dev/null +++ b/examples/onnxrt/object_detection/ssd_mobilenet_v2/quantization/ptq_static/coco_tools.py @@ -0,0 +1,672 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +"""Wrappers for third party pycocotools to be used within object_detection. +Note that nothing in this file is tensorflow related and thus cannot +be called directly as a slim metric, for example. +TODO(jonathanhuang): wrap as a slim metric in metrics.py +Usage example: given a set of images with ids in the list image_ids +and corresponding lists of numpy arrays encoding groundtruth (boxes and classes) +and detections (boxes, scores and classes), where elements of each list +correspond to detections/annotations of a single image, +then evaluation (in multi-class mode) can be invoked as follows: + groundtruth_dict = coco_tools.ExportGroundtruthToCOCO( + image_ids, groundtruth_boxes_list, groundtruth_classes_list, + max_num_classes, output_path=None) + detections_list = coco_tools.ExportDetectionsToCOCO( + image_ids, detection_boxes_list, detection_scores_list, + detection_classes_list, output_path=None) + groundtruth = coco_tools.COCOWrapper(groundtruth_dict) + detections = groundtruth.LoadAnnotations(detections_list) + evaluator = coco_tools.COCOEvalWrapper(groundtruth, detections, + agnostic_mode=False) + metrics = evaluator.ComputeMetrics() +""" + +import copy +import time + +import numpy as np + +from collections import OrderedDict +from neural_compressor.utils import logger +from pycocotools import coco +from pycocotools import cocoeval +from pycocotools import mask +from typing import Any, Dict, List, Set, Union + + +class COCOWrapper(coco.COCO): + """Wrapper for the pycocotools COCO class. + + Attributes: + dataset: a dictionary holding bounding box annotations in the COCO format. + detection_type: type of detections being wrapped. Can be one of ['bbox', + 'segmentation'] + """ + + def __init__(self, dataset: Dict[str, Any], detection_type: str = 'bbox'): + """Construct a COCOWrapper. + See http://mscoco.org/dataset/#format for a description of the format. + By default, the coco.COCO class constructor reads from a JSON file. + This function duplicates the same behavior but loads from a dictionary, + allowing us to perform evaluation without writing to external storage. + Args: + dataset: a dictionary holding bounding box annotations in the COCO format. + detection_type: type of detections being wrapped. Can be one of ['bbox', + 'segmentation'] + Raises: + ValueError: if detection_type is unsupported. + """ + supported_detection_types = ['bbox', 'segmentation'] + if detection_type not in supported_detection_types: + raise ValueError('Unsupported detection type: {}. ' + 'Supported values are: {}'.format( + detection_type, supported_detection_types)) + self._detection_type = detection_type + coco.COCO.__init__(self) + self.dataset = dataset + self.createIndex() + + def LoadAnnotations(self, annotations: list) -> coco.COCO: + """Load annotations dictionary into COCO datastructure. + See http://mscoco.org/dataset/#format for a description of the annotations + format. As above, this function replicates the default behavior of the API + but does not require writing to external storage. + Args: + annotations: python list holding object detection results where each + detection is encoded as a dict with required keys ['image_id', + 'category_id', 'score'] and one of ['bbox', 'segmentation'] based on + `detection_type`. + Returns: + a coco.COCO datastructure holding object detection annotations results + Raises: + ValueError: if (1) annotations is not a list or annotations do not + correspond to the images contained in self. + """ + results = coco.COCO() + results.dataset['images'] = [img for img in self.dataset['images']] + + logger.info("Load and prepare annotation results.") + tic = time.time() + + if not isinstance(annotations, list): + raise ValueError('annotations is not a list of objects') + annotation_img_ids = [ann['image_id'] for ann in annotations] + if (set(annotation_img_ids) != (set(annotation_img_ids) + & set(self.getImgIds()))): + raise ValueError('Results do not correspond to current coco set') + results.dataset['categories'] = copy.deepcopy( + self.dataset['categories']) + if self._detection_type == 'bbox': + for idx, ann in enumerate(annotations): + bb = ann['bbox'] + ann['area'] = bb[2] * bb[3] + ann['id'] = idx + 1 + ann['iscrowd'] = 0 + elif self._detection_type == 'segmentation': + for idx, ann in enumerate(annotations): + ann['area'] = mask.area(ann['segmentation']) + ann['bbox'] = mask.toBbox(ann['segmentation']) + ann['id'] = idx + 1 + ann['iscrowd'] = 0 + logger.info('DONE (t=%0.2fs)', (time.time() - tic)) + + results.dataset['annotations'] = annotations + results.createIndex() + return results + + +class COCOEvalWrapper(cocoeval.COCOeval): + """Wrapper for the pycocotools COCOeval class. + To evaluate, create two objects (groundtruth_dict and detections_list) + using the conventions listed at http://mscoco.org/dataset/#format. + Then call evaluation as follows: + groundtruth = coco_tools.COCOWrapper(groundtruth_dict) + detections = groundtruth.LoadAnnotations(detections_list) + evaluator = coco_tools.COCOEvalWrapper(groundtruth, detections, + agnostic_mode=False) + metrics = evaluator.ComputeMetrics() + """ + + def __init__(self, + groundtruth: coco.COCO = None, + detections: coco.COCO = None, + agnostic_mode = False, + iou_type: str = 'bbox', + iou_thrs: Union[str, float] = None, + map_points=None): + """Construct a COCOEvalWrapper. + Note that for the area-based metrics to be meaningful, detection and + groundtruth boxes must be in image coordinates measured in pixels. + Args: + groundtruth: a coco.COCO (or coco_tools.COCOWrapper) object holding + groundtruth annotations + detections: a coco.COCO (or coco_tools.COCOWrapper) object holding + detections + agnostic_mode: boolean (default: False). If True, evaluation ignores + class labels, treating all detections as proposals. + iou_thrs: Minimal value for intersection over union that allows to + make decision that prediction bounding box is true positive. + You can specify one float value between 0 to 1 or + string "05:0.05:0.95" for standard COCO thresholds. + iou_type: IOU type to use for evaluation. Supports `bbox` or `segm`. + map_points: The way to calculate mAP. 101 for 101-point interpolated AP, 11 for + 11-point interpolated AP, 0 for area under PR curve. + """ + cocoeval.COCOeval.__init__(self, + groundtruth, + detections, + iouType=iou_type) + if agnostic_mode: + self.params.useCats = 0 + if iou_thrs == '0.5:0.05:0.95': + self.params.iouThrs = np.linspace(.5, 0.95, int(np.round((0.95 - .5) / .05)) + 1, \ + endpoint=True) + elif isinstance(iou_thrs, float): + self.params.iouThrs = [iou_thrs] + + if map_points == 101: + self.params.recThrs = np.linspace(.0, 1.00, int(np.round((1.00 - .0) / .01)) + 1, \ + endpoint=True) + if map_points == 11: + self.params.recThrs = np.linspace(.0, 1.00, int(np.round((1.00 - .0) / .1)) + 1, \ + endpoint=True) + if map_points == 0: + self.params.recThrs = [-1] + + + def GetCategory(self, category_id: int) -> dict: + """Fetch dictionary holding category information given category id. + Args: + category_id: integer id + Returns: + dictionary holding 'id', 'name'. + """ + return self.cocoGt.cats[category_id] + + def GetAgnosticMode(self) -> bool: + """Return whether COCO Eval is configured to evaluate in agnostic mode.""" + return self.params.useCats == 0 + + def GetCategoryIdList(self) -> List[int]: + """Return the list of IDs of all valid categories.""" + return self.params.catIds + + def accumulate(self, p: cocoeval.Params = None): + """Accumulate evaluation results per image and store it to self.eval. + + Args: + p: input params for evaluation + """ + print('Accumulating evaluation results...') + tic = time.time() + if not self.evalImgs: + print('Please run evaluate() first') + # allows input customized parameters + if p is None: + p = self.params + p.catIds = p.catIds if p.useCats == 1 else [-1] + T = len(p.iouThrs) + R = len(p.recThrs) + K = len(p.catIds) if p.useCats else 1 + A = len(p.areaRng) + M = len(p.maxDets) + precision = -np.ones((T,R,K,A,M)) # -1 for the precision of absent categories + recall = -np.ones((T,K,A,M)) + scores = -np.ones((T,R,K,A,M)) + + # create dictionary for future indexing + _pe = self._paramsEval + print('-pe', _pe) + catIds = _pe.catIds if _pe.useCats else [-1] + setK = set(catIds) + setA = set(map(tuple, _pe.areaRng)) + setM = set(_pe.maxDets) + setI = set(_pe.imgIds) + # get inds to evaluate + k_list = [n for n, k in enumerate(p.catIds) if k in setK] + m_list = [m for n, m in enumerate(p.maxDets) if m in setM] + a_list = [n for n, a in enumerate(map(lambda x: tuple(x), p.areaRng)) if a in setA] + i_list = [n for n, i in enumerate(p.imgIds) if i in setI] + I0 = len(_pe.imgIds) + A0 = len(_pe.areaRng) + # retrieve E at each category, area range, and max number of detections + for k, k0 in enumerate(k_list): + Nk = k0*A0*I0 + for a, a0 in enumerate(a_list): + Na = a0*I0 + for m, maxDet in enumerate(m_list): + E = [self.evalImgs[Nk + Na + i] for i in i_list] + E = [e for e in E if not e is None] + if len(E) == 0: continue + dtScores = np.concatenate([e['dtScores'][0:maxDet] for e in E]) + + # different sorting method generates slightly different results. + # mergesort is used to be consistent as Matlab implementation. + inds = np.argsort(-dtScores, kind='mergesort') + dtScoresSorted = dtScores[inds] + + dtm = np.concatenate([e['dtMatches'][:,0:maxDet] for e in E], axis=1)[:,inds] + dtIg = np.concatenate([e['dtIgnore'][:,0:maxDet] for e in E], axis=1)[:,inds] + gtIg = np.concatenate([e['gtIgnore'] for e in E]) + npig = np.count_nonzero(gtIg==0 ) + if npig == 0: continue + tps = np.logical_and( dtm, np.logical_not(dtIg) ) + fps = np.logical_and(np.logical_not(dtm), np.logical_not(dtIg) ) + + tp_sum = np.cumsum(tps, axis=1).astype(dtype=np.float32) + fp_sum = np.cumsum(fps, axis=1).astype(dtype=np.float32) + for t, (tp, fp) in enumerate(zip(tp_sum, fp_sum)): + tp = np.array(tp) + fp = np.array(fp) + nd = len(tp) + rc = tp / npig + pr = tp / (fp+tp+np.spacing(1)) + + # calculate precision + if R == 1: + rc = np.concatenate(([0.], rc, [1.])) + pr = np.concatenate(([0.], pr, [0.])) + + # compute the precision envelope + for i in range(pr.size - 1, 0, -1): + pr[i - 1] = np.maximum(pr[i - 1], pr[i]) + + # to calculate area under PR curve, look for points + # where X axis (recall) changes value + change_point = np.where(rc[1:] != rc[:-1])[0] + # and sum (\Delta recall) * recall + res = np.sum((rc[change_point + 1] - rc[change_point]) \ + * pr[change_point + 1]) + precision[t,:,k,a,m] = np.array([res]) + else: + q = np.zeros((R,)) + + # numpy is slow without cython optimization for accessing elements + # use python array gets significant speed improvement + pr = pr.tolist(); q = q.tolist() + + for i in range(nd-1, 0, -1): + if pr[i] > pr[i-1]: + pr[i-1] = pr[i] + + inds = np.searchsorted(rc, p.recThrs, side='left') + try: + for ri, pi in enumerate(inds): + q[ri] = pr[pi] + except: + pass + precision[t,:,k,a,m] = np.array(q) + + # calculate recall + if nd: + recall[t,k,a,m] = rc[-1] + else: + recall[t,k,a,m] = 0 + + # calculate score + ss = np.zeros((R,)) + inds = np.searchsorted(rc, p.recThrs, side='left') + try: + for ri, pi in enumerate(inds): + ss[ri] = dtScoresSorted[pi] + except: + pass + scores[t,:,k,a,m] = np.array(ss) + # exit(0) + self.eval = { + 'params': p, + 'counts': [T, R, K, A, M], + 'precision': precision, + 'recall': recall, + 'scores': scores, + } + toc = time.time() + print('DONE (t={:0.2f}s).'.format( toc-tic)) + + + def ComputeMetrics(self, + include_metrics_per_category: bool = False, + all_metrics_per_category: bool = False): # pragma: no cover + """Compute detection metrics. + Args: + include_metrics_per_category: Whether include metrics per category. + all_metrics_per_category: Whether include all the summery metrics for + each category in per_category_ap. Be careful with setting it to true if + you have more than handful of categories, because it will pollute + your mldash. + Returns: + A tuple of (summary_metrics, per_category_ap), in which + (1) summary_metrics is a dictionary holding: + 'Precision/mAP': mean average precision over classes averaged over IOU + thresholds ranging from .5 to .95 with .05 increments; + 'Precision/mAP@.50IOU': mean average precision at 50% IOU; + 'Precision/mAP@.75IOU': mean average precision at 75% IOU; + 'Precision/mAP (small)': mean average precision for small objects + (area < 32^2 pixels); + 'Precision/mAP (medium)': mean average precision for medium sized + objects (32^2 pixels < area < 96^2 pixels); + 'Precision/mAP (large)': mean average precision for large objects + (96^2 pixels < area < 10000^2 pixels); + 'Recall/AR@1': average recall with 1 detection; + 'Recall/AR@10': average recall with 10 detections; + 'Recall/AR@100': average recall with 100 detections; + 'Recall/AR@100 (small)': average recall for small objects with 100 + detections; + 'Recall/AR@100 (medium)': average recall for medium objects with 100 + detections; + 'Recall/AR@100 (large)': average recall for large objects with 100 + detections; + and (2) per_category_ap is a dictionary holding category specific results with + keys of the form: 'Precision mAP ByCategory/category' + (without the supercategory part if no supercategories exist). + For backward compatibility 'PerformanceByCategory' is included in the + output regardless of all_metrics_per_category. If evaluating class-agnostic + mode, per_category_ap is an empty dictionary. + Raises: + ValueError: If category_stats does not exist. + """ + self.evaluate() + self.accumulate() + self.summarize() + + summary_metrics = OrderedDict([ + ('Precision/mAP', self.stats[0]), + ('Precision/mAP@.50IOU', self.stats[1]), + ('Precision/mAP@.75IOU', self.stats[2]), + ('Precision/mAP (small)', self.stats[3]), + ('Precision/mAP (medium)', self.stats[4]), + ('Precision/mAP (large)', self.stats[5]), + ('Recall/AR@1', self.stats[6]), ('Recall/AR@10', self.stats[7]), + ('Recall/AR@100', self.stats[8]), + ('Recall/AR@100 (small)', self.stats[9]), + ('Recall/AR@100 (medium)', self.stats[10]), + ('Recall/AR@100 (large)', self.stats[11]) + ]) + if not include_metrics_per_category: + return summary_metrics, {} + if not hasattr(self, 'category_stats'): + raise ValueError('Category stats do not exist') + per_category_ap = OrderedDict([]) + if self.GetAgnosticMode(): + return summary_metrics, per_category_ap + for category_index, category_id in enumerate(self.GetCategoryIdList()): + category = self.GetCategory(category_id)['name'] + # Kept for backward compatilbility + # pylint: disable=no-member + per_category_ap['PerformanceByCategory/mAP/{}'.format( + category)] = self.category_stats[0][category_index] + if all_metrics_per_category: + per_category_ap['Precision mAP ByCategory/{}'.format( + category)] = self.category_stats[0][category_index] + per_category_ap['Precision mAP@.50IOU ByCategory/{}'.format( + category)] = self.category_stats[1][category_index] + per_category_ap['Precision mAP@.75IOU ByCategory/{}'.format( + category)] = self.category_stats[2][category_index] + per_category_ap['Precision mAP (small) ByCategory/{}'.format( + category)] = self.category_stats[3][category_index] + per_category_ap['Precision mAP (medium) ByCategory/{}'.format( + category)] = self.category_stats[4][category_index] + per_category_ap['Precision mAP (large) ByCategory/{}'.format( + category)] = self.category_stats[5][category_index] + per_category_ap['Recall AR@1 ByCategory/{}'.format( + category)] = self.category_stats[6][category_index] + per_category_ap['Recall AR@10 ByCategory/{}'.format( + category)] = self.category_stats[7][category_index] + per_category_ap['Recall AR@100 ByCategory/{}'.format( + category)] = self.category_stats[8][category_index] + per_category_ap['Recall AR@100 (small) ByCategory/{}'.format( + category)] = self.category_stats[9][category_index] + per_category_ap['Recall AR@100 (medium) ByCategory/{}'.format( + category)] = self.category_stats[10][category_index] + per_category_ap['Recall AR@100 (large) ByCategory/{}'.format( + category)] = self.category_stats[11][category_index] + + return summary_metrics, per_category_ap + + +def _ConvertBoxToCOCOFormat(box): + """Convert a box in [ymin, xmin, ymax, xmax] format to COCO format. + This is a utility function for converting from our internal + [ymin, xmin, ymax, xmax] convention to the convention used by the COCO API + i.e., [xmin, ymin, width, height]. + Args: + box: a numpy array in format of [ymin, xmin, ymax, xmax] + Returns: + A list of floats, in COCO format, representing [xmin, ymin, width, height] + """ + return [ + float(box[1]), + float(box[0]), + float(box[3] - box[1]), + float(box[2] - box[0]) + ] + + +def _RleCompress(masks): + """Compresses mask using Run-length encoding provided by pycocotools. + Args: + masks: uint8 numpy array of shape [mask_height, mask_width] with values in + {0, 1}. + Returns: + A pycocotools Run-length encoding of the mask. + """ + return mask.encode(np.asfortranarray(masks)) + + +def ExportSingleImageGroundtruthToCoco(image_id: Union[int, str], + next_annotation_id: int, + category_id_set: Set[str], + groundtruth_boxes: np.array, + groundtruth_classes: np.array, + groundtruth_masks: Union[np.array, None] = None, + groundtruth_is_crowd: Union[np.array, None] = None) -> list: + """Export groundtruth of a single image to COCO format. + This function converts groundtruth detection annotations represented as numpy + arrays to dictionaries that can be ingested by the COCO evaluation API. Note + that the image_ids provided here must match the ones given to + ExportSingleImageDetectionsToCoco. We assume that boxes and classes are in + correspondence - that is: groundtruth_boxes[i, :], and + groundtruth_classes[i] are associated with the same groundtruth annotation. + In the exported result, "area" fields are always set to the area of the + groundtruth bounding box. + Args: + image_id: a unique image identifier either of type integer or string. + next_annotation_id: integer specifying the first id to use for the + groundtruth annotations. All annotations are assigned a continuous integer + id starting from this value. + category_id_set: A set of valid class ids. Groundtruth with classes not in + category_id_set are dropped. + groundtruth_boxes: numpy array (float32) with shape [num_gt_boxes, 4] + groundtruth_classes: numpy array (int) with shape [num_gt_boxes] + groundtruth_masks: optional uint8 numpy array of shape [num_detections, + image_height, image_width] containing detection_masks. + groundtruth_is_crowd: optional numpy array (int) with shape [num_gt_boxes] + indicating whether groundtruth boxes are crowd. + Returns: + A list of groundtruth annotations for a single image in the COCO format. + Raises: + ValueError: if (1) groundtruth_boxes and groundtruth_classes do not have the + right lengths or (2) if each of the elements inside these lists do not + have the correct shapes or (3) if image_ids are not integers + """ + if len(groundtruth_classes.shape) != 1: + raise ValueError('groundtruth_classes is ' 'expected to be of rank 1.') + if len(groundtruth_boxes.shape) != 2: + raise ValueError('groundtruth_boxes is expected to be of ' 'rank 2.') + if groundtruth_boxes.shape[1] != 4: + raise ValueError('groundtruth_boxes should have ' 'shape[1] == 4.') + num_boxes = groundtruth_classes.shape[0] + if num_boxes != groundtruth_boxes.shape[0]: + raise ValueError( + 'Corresponding entries in groundtruth_classes, ' + 'and groundtruth_boxes should have ' + 'compatible shapes (i.e., agree on the 0th dimension).' + 'Classes shape: %d. Boxes shape: %d. Image ID: %s' % + (groundtruth_classes.shape[0], groundtruth_boxes.shape[0], + image_id)) + has_is_crowd = groundtruth_is_crowd is not None + if has_is_crowd and len(groundtruth_is_crowd.shape) != 1: + raise ValueError('groundtruth_is_crowd is expected to be of rank 1.') + groundtruth_list = [] + for i in range(num_boxes): + if groundtruth_classes[i] in category_id_set: + iscrowd = groundtruth_is_crowd[i] if has_is_crowd else 0 + export_dict = { + 'id': + next_annotation_id + i, + 'image_id': + image_id, + 'category_id': + int(groundtruth_classes[i]), + 'bbox': + list(_ConvertBoxToCOCOFormat(groundtruth_boxes[i, :])), + 'area': + float((groundtruth_boxes[i, 2] - groundtruth_boxes[i, 0]) * + (groundtruth_boxes[i, 3] - groundtruth_boxes[i, 1])), + 'iscrowd': + iscrowd + } + if groundtruth_masks is not None: + export_dict['segmentation'] = _RleCompress( + groundtruth_masks[i]) + groundtruth_list.append(export_dict) + return groundtruth_list + + +def ExportSingleImageDetectionBoxesToCoco(image_id: Union[int, str], + category_id_set: Set[int], + detection_boxes: np.array, + detection_scores: np.array, + detection_classes: np.array) -> list: + """Export detections of a single image to COCO format. + This function converts detections represented as numpy arrays to dictionaries + that can be ingested by the COCO evaluation API. Note that the image_ids + provided here must match the ones given to the + ExporSingleImageDetectionBoxesToCoco. We assume that boxes, and classes are in + correspondence - that is: boxes[i, :], and classes[i] + are associated with the same groundtruth annotation. + Args: + image_id: unique image identifier either of type integer or string. + category_id_set: A set of valid class ids. Detections with classes not in + category_id_set are dropped. + detection_boxes: float numpy array of shape [num_detections, 4] containing + detection boxes. + detection_scores: float numpy array of shape [num_detections] containing + scored for the detection boxes. + detection_classes: integer numpy array of shape [num_detections] containing + the classes for detection boxes. + Returns: + A list of detection annotations for a single image in the COCO format. + Raises: + ValueError: if (1) detection_boxes, detection_scores and detection_classes + do not have the right lengths or (2) if each of the elements inside these + lists do not have the correct shapes or (3) if image_ids are not integers. + """ + if len(detection_classes.shape) != 1 or len(detection_scores.shape) != 1: + raise ValueError( + 'All entries in detection_classes and detection_scores' + 'expected to be of rank 1.') + if len(detection_boxes.shape) != 2: + raise ValueError('All entries in detection_boxes expected to be of ' + 'rank 2.') + if detection_boxes.shape[1] != 4: + raise ValueError('All entries in detection_boxes should have ' + 'shape[1] == 4.') + num_boxes = detection_classes.shape[0] + if not num_boxes == detection_boxes.shape[0] == detection_scores.shape[0]: + raise ValueError( + 'Corresponding entries in detection_classes, ' + 'detection_scores and detection_boxes should have ' + 'compatible shapes (i.e., agree on the 0th dimension). ' + 'Classes shape: %d. Boxes shape: %d. ' + 'Scores shape: %d' % + (detection_classes.shape[0], detection_boxes.shape[0], + detection_scores.shape[0])) + detections_list = [] + for i in range(num_boxes): + if detection_classes[i] in category_id_set: + detections_list.append({ + 'image_id': + image_id, + 'category_id': + int(detection_classes[i]), + 'bbox': + list(_ConvertBoxToCOCOFormat(detection_boxes[i, :])), + 'score': + float(detection_scores[i]) + }) + return detections_list + + +def ExportSingleImageDetectionMasksToCoco(image_id: Union[str, int], + category_id_set: Set[int], + detection_masks: np.array, + detection_scores: np.array, + detection_classes: np.array) -> list: + """Export detection masks of a single image to COCO format. + This function converts detections represented as numpy arrays to dictionaries + that can be ingested by the COCO evaluation API. We assume that + detection_masks, detection_scores, and detection_classes are in correspondence + - that is: detection_masks[i, :], detection_classes[i] and detection_scores[i] + are associated with the same annotation. + Args: + image_id: unique image identifier either of type integer or string. + category_id_set: A set of valid class ids. Detections with classes not in + category_id_set are dropped. + detection_masks: uint8 numpy array of shape [num_detections, image_height, + image_width] containing detection_masks. + detection_scores: float numpy array of shape [num_detections] containing + scores for detection masks. + detection_classes: integer numpy array of shape [num_detections] containing + the classes for detection masks. + Returns: + A list of detection mask annotations for a single image in the COCO format. + Raises: + ValueError: if (1) detection_masks, detection_scores and detection_classes + do not have the right lengths or (2) if each of the elements inside these + lists do not have the correct shapes or (3) if image_ids are not integers. + """ + if len(detection_classes.shape) != 1 or len(detection_scores.shape) != 1: + raise ValueError( + 'All entries in detection_classes and detection_scores' + 'expected to be of rank 1.') + num_boxes = detection_classes.shape[0] + if not num_boxes == len(detection_masks) == detection_scores.shape[0]: + raise ValueError('Corresponding entries in detection_classes, ' + 'detection_scores and detection_masks should have ' + 'compatible lengths and shapes ' + 'Classes length: %d. Masks length: %d. ' + 'Scores length: %d' % + (detection_classes.shape[0], len(detection_masks), + detection_scores.shape[0])) + detections_list = [] + for i in range(num_boxes): + if detection_classes[i] in category_id_set: + detections_list.append({ + 'image_id': + image_id, + 'category_id': + int(detection_classes[i]), + 'segmentation': + _RleCompress(detection_masks[i]), + 'score': + float(detection_scores[i]) + }) + return detections_list \ No newline at end of file diff --git a/examples/onnxrt/object_detection/ssd_mobilenet_v2/quantization/ptq_static/data_utils.py b/examples/onnxrt/object_detection/ssd_mobilenet_v2/quantization/ptq_static/data_utils.py new file mode 100644 index 00000000000..31653c25bc5 --- /dev/null +++ b/examples/onnxrt/object_detection/ssd_mobilenet_v2/quantization/ptq_static/data_utils.py @@ -0,0 +1,470 @@ +import numpy as np +import collections +from PIL import Image +import os +import yaml +from pycocotools.coco import COCO +import cv2 + +class SequentialSampler(): + def __init__(self, dataset): + self.whole_dataset = dataset + + def __iter__(self): + self.process_rank = 0 # The default rank is 0, which represents the main process + self.process_size = 1 # By default, process_size=1, only the main process is running + return iter(range(self.process_rank, len(self.whole_dataset), self.process_size)) + + def __len__(self): + return len(self.whole_dataset) + +class BatchSampler(): + def __init__(self, sampler, batch_size, drop_last=True): + if isinstance(drop_last, bool): + self.drop_last = drop_last + else: + raise ValueError("last_batch only support bool as input") + + self.sampler = sampler + self.batch_size = batch_size + self.drop_last = drop_last + + def __iter__(self): + batch = [] + for idx in self.sampler: + batch.append(idx) + if len(batch) == self.batch_size: + yield batch + batch = [] + if len(batch) > 0 and not self.drop_last: + yield batch + + def __len__(self): + if self.drop_last: + return len(self.sampler) // self.batch_size + else: + return (len(self.sampler) + self.batch_size - 1) // self.batch_size + +class IndexFetcher(): + def __init__(self, dataset, collate_fn, drop_last): + self.dataset = dataset + self.collate_fn = collate_fn + self.drop_last = drop_last + + def __call__(self, batched_indices): + data = [self.dataset[idx] for idx in batched_indices] + return self.collate_fn(data) + + +def default_collate(batch): + """Merge data with outer dimension batch size.""" + elem = batch[0] + if isinstance(elem, collections.abc.Mapping): + return {key: default_collate([d[key] for d in batch]) for key in elem} + elif isinstance(elem, collections.abc.Sequence): + batch = zip(*batch) + return [default_collate(samples) for samples in batch] + elif isinstance(elem, np.ndarray): + try: + return np.stack(batch) + except: + return batch + else: + return batch + +class COCORawDataloader(): + def __init__(self, dataset, batch_size=1, last_batch='rollover', collate_fn=None, + sampler=None, batch_sampler=None, num_workers=0, pin_memory=False, + shuffle=False): + self.dataset = dataset + self.last_batch = last_batch + self.sampler = sampler + self.batch_sampler = batch_sampler + self.num_workers = num_workers + self.pin_memory = pin_memory + self.collate_fn = collate_fn + self.batch_size = batch_size + self.shuffle = shuffle + self.drop_last = False if last_batch == 'rollover' else True + if self.collate_fn == None: + self.collate_fn = default_collate + + def __iter__(self): + """Yield data in iterative order.""" + return self._generate_dataloader( + self.dataset, + batch_size=self.batch_size, + last_batch=self.last_batch, + collate_fn=self.collate_fn, + sampler=self.sampler, + batch_sampler=self.batch_sampler, + num_workers=self.num_workers, + pin_memory=self.pin_memory, + shuffle=self.shuffle) + + def _generate_dataloader(self, dataset, batch_size, last_batch, collate_fn, sampler, + batch_sampler, num_workers, pin_memory, shuffle): + + sampler = self._generate_sampler(dataset) + self.batch_sampler = BatchSampler(sampler, batch_size, self.drop_last) + self.fetcher = IndexFetcher(dataset, collate_fn, self.drop_last) + + for batched_indices in self.batch_sampler: + try: + data = self.fetcher(batched_indices) + yield data + except StopIteration: + return + + def _generate_sampler(self, dataset): + if hasattr(dataset, "__getitem__"): + self.dataset_type = 'index' + return SequentialSampler(dataset) + else: + raise ValueError("dataset type only support (index, iter)") + + +class COCORawDataset(): + """Coco raw dataset. + Please arrange data in this way: + /root/img_dir/1.jpg + /root/img_dir/2.jpg + ... + /root/img_dir/n.jpg + /root/anno_dir + Please use Resize transform when batch_size > 1 + Args: root (str): Root directory of dataset. + img_dir (str, default='val2017'): image file directory. + anno_dir (str, default='annotations/instances_val2017.json'): annotation file directory. + transform (transform object, default=None): transform to process input data. + filter (Filter objects, default=None): filter out examples according + to specific conditions. + """ + + def __init__(self, root, img_dir='val2017', \ + anno_dir='annotations/instances_val2017.json', transform=None, filter=None): + """Initialize the attributes of class.""" + self.batch_size = 1 + self.image_list = [] + self.transform = transform + img_path = os.path.join(root, img_dir) + anno_path = os.path.join(root, anno_dir) + coco = COCO(anno_path) + img_ids = coco.getImgIds() + cat_ids = coco.getCatIds() + for idx, img_id in enumerate(img_ids): + img_info = {} + bboxes = [] + labels = [] + ids = [] + img_detail = coco.loadImgs(img_id)[0] + ids.append(img_detail['file_name'].encode('utf-8')) + pic_height = img_detail['height'] + pic_width = img_detail['width'] + + ann_ids = coco.getAnnIds(imgIds=img_id,catIds=cat_ids) + anns = coco.loadAnns(ann_ids) + for ann in anns: + bbox = ann['bbox'] + if len(bbox) == 0: + continue + bbox = [bbox[0]/float(pic_width), bbox[1]/float(pic_height),\ + bbox[2]/float(pic_width), bbox[3]/float(pic_height)] + bboxes.append([bbox[1], bbox[0], bbox[1]+bbox[3], bbox[0]+bbox[2]]) + labels.append(coco.cats[ann['category_id']]['name'].encode('utf8')) + img_file = os.path.join(img_path, img_detail['file_name']) + if not os.path.exists(img_file) or len(bboxes) == 0: + continue + + if filter and not filter(None, bboxes): + continue + + with Image.open(img_file) as image: + image = np.array(image.convert('RGB')) + self.image_list.append( + (image, [np.array(bboxes), np.array(labels), np.array([]),\ + np.array(img_detail['file_name'].encode('utf-8'))])) + + def __len__(self): + """Length of the dataset.""" + return len(self.image_list) + + def __getitem__(self, index): + """Magic method. + x[i] is roughly equivalent to type(x).__getitem__(x, index) + """ + sample = self.image_list[index] + if self.transform is not None: + sample= self.transform(sample) + return sample + +interpolation_map = { + 'nearest': cv2.INTER_NEAREST, + 'bilinear': cv2.INTER_LINEAR, + 'bicubic': cv2.INTER_CUBIC, +} + +class ResizeTransform(): + def __init__(self, size, interpolation='bilinear'): + if isinstance(size, int): + self.size = size, size + elif isinstance(size, list): + if len(size) == 1: + self.size = size[0], size[0] + elif len(size) == 2: + self.size = size[0], size[1] + + if interpolation in interpolation_map.keys(): + self.interpolation = interpolation_map[interpolation] + else: + raise ValueError("Undefined interpolation type") + + def __call__(self, sample): + image, label = sample + image = cv2.resize(image, self.size, interpolation=self.interpolation) + if len(image.shape) == 2: + image = np.expand_dims(image, -1) + return (image, label) + +class RescaleTransform(): + """Scale the values of image to [0,1]. + Returns: + tuple of processed image and label + """ + + def __call__(self, sample): + """Scale the values of the image in sample.""" + image, label = sample + if isinstance(image, np.ndarray): + image = image.astype('float32') / 255. + return (image, label) + +class NormalizeTransform(): + def __init__(self, mean=[0.0], std=[1.0]): + self.mean = mean + self.std = std + for item in self.std: + if item < 10**-6: + raise ValueError("Std should be greater than 0") + + def __call__(self, sample): + image, label = sample + assert len(self.mean) == image.shape[-1], 'Mean channel must match image channel' + image = (image - self.mean) / self.std + return (image, label) + +class TransposeTransform(): + def __init__(self, perm): + self.perm = perm + + def __call__(self, sample): + image, label = sample + assert len(image.shape) == len(self.perm), "Image rank doesn't match Perm rank" + image = np.transpose(image, axes=self.perm) + return (image, label) + +np_dtype_map = {'int8': np.int8, 'uint8': np.uint8, 'complex64': np.complex64, + 'uint16': np.uint16, 'int32': np.int32, 'uint32': np.uint32, + 'int64': np.int64, 'uint64': np.uint64, 'float32': np.float32, + 'float16': np.float16, 'float64': np.float64, 'bool': bool, + 'string': str, 'complex128': np.complex128, 'int16': np.int16} + +class CastTransform(): + def __init__(self, dtype='float32'): + assert dtype in np_dtype_map.keys(), 'Unknown dtype' + self.dtype = dtype + + def __call__(self, sample): + image, label = sample + image = image.astype(np_dtype_map[self.dtype]) + return (image, label) + +class ComposeTransform(): + def __init__(self, transform_list): + self.transform_list = transform_list + + def __call__(self, sample): + for transform in self.transform_list: + sample = transform(sample) + return sample + +class COCOmAPv2(): + """Compute mean average precision of the detection task.""" + + def __init__(self, + anno_path=None, + iou_thrs='0.5:0.05:0.95', + map_points=101, + map_key='DetectionBoxes_Precision/mAP', + output_index_mapping={'num_detections':-1, 'boxes':0, 'scores':1, 'classes':2}): + """Initialize the metric. + Args: + anno_path: The path of annotation file. + iou_thrs: Minimal value for intersection over union that allows to make decision + that prediction bounding box is true positive. You can specify one float value + between 0 to 1 or string "05:0.05:0.95" for standard COCO thresholds. + map_points: The way to calculate mAP. 101 for 101-point interpolated AP, 11 for + 11-point interpolated AP, 0 for area under PR curve. + map_key: The key that mapping to pycocotools COCOeval. + Defaults to 'DetectionBoxes_Precision/mAP'. + output_index_mapping: The output index mapping. + Defaults to {'num_detections':-1, 'boxes':0, 'scores':1, 'classes':2}. + """ + self.output_index_mapping = output_index_mapping + from coco_label_map import category_map + if anno_path: + assert os.path.exists(anno_path), 'Annotation path does not exists!' + with open(anno_path, 'r') as f: + label_map = yaml.safe_load(f.read()) + self.category_map_reverse = {k: v for k,v in label_map.items()} + else: + # label: index + self.category_map_reverse = {v: k for k, v in category_map.items()} + self.image_ids = [] + self.ground_truth_list = [] + self.detection_list = [] + self.annotation_id = 1 + self.category_map = category_map + self.category_id_set = set( + [cat for cat in self.category_map]) #index + self.iou_thrs = iou_thrs + self.map_points = map_points + self.map_key = map_key + + def update(self, predicts, labels, sample_weight=None): + """Add the predictions and labels. + Args: + predicts: The predictions. + labels: The labels corresponding to the predictions. + sample_weight: The sample weight. Defaults to None. + """ + from coco_tools import ExportSingleImageGroundtruthToCoco,\ + ExportSingleImageDetectionBoxesToCoco + detections = [] + if 'num_detections' in self.output_index_mapping and \ + self.output_index_mapping['num_detections'] > -1: + for item in zip(*predicts): + detection = {} + num = int(item[self.output_index_mapping['num_detections']]) + detection['boxes'] = np.asarray( + item[self.output_index_mapping['boxes']])[0:num] + detection['scores'] = np.asarray( + item[self.output_index_mapping['scores']])[0:num] + detection['classes'] = np.asarray( + item[self.output_index_mapping['classes']])[0:num] + detections.append(detection) + else: + for item in zip(*predicts): + detection = {} + detection['boxes'] = np.asarray(item[self.output_index_mapping['boxes']]) + detection['scores'] = np.asarray(item[self.output_index_mapping['scores']]) + detection['classes'] = np.asarray(item[self.output_index_mapping['classes']]) + detections.append(detection) + + bboxes, str_labels,int_labels, image_ids = labels + labels = [] + if len(int_labels[0]) == 0: + for str_label in str_labels: + str_label = [ + x if type(x) == 'str' else x.decode('utf-8') + for x in str_label + ] + labels.append([self.category_map_reverse[x] for x in str_label]) + elif len(str_labels[0]) == 0: + for int_label in int_labels: + labels.append([x for x in int_label]) + + for idx, image_id in enumerate(image_ids): + image_id = image_id if type( + image_id) == 'str' else image_id.decode('utf-8') + if image_id in self.image_ids: + continue + self.image_ids.append(image_id) + + ground_truth = {} + ground_truth['boxes'] = np.asarray(bboxes[idx]) + ground_truth['classes'] = np.asarray(labels[idx]) + + self.ground_truth_list.extend( + ExportSingleImageGroundtruthToCoco( + image_id=image_id, + next_annotation_id=self.annotation_id, + category_id_set=self.category_id_set, + groundtruth_boxes=ground_truth['boxes'], + groundtruth_classes=ground_truth['classes'])) + self.annotation_id += ground_truth['boxes'].shape[0] + + self.detection_list.extend( + ExportSingleImageDetectionBoxesToCoco( + image_id=image_id, + category_id_set=self.category_id_set, + detection_boxes=detections[idx]['boxes'], + detection_scores=detections[idx]['scores'], + detection_classes=detections[idx]['classes'])) + + def reset(self): + """Reset the prediction and labels.""" + self.image_ids = [] + self.ground_truth_list = [] + self.detection_list = [] + self.annotation_id = 1 + + def result(self): + """Compute mean average precision. + Returns: + The mean average precision score. + """ + from coco_tools import COCOWrapper, COCOEvalWrapper + if len(self.ground_truth_list) == 0: + return 0 + else: + groundtruth_dict = { + 'annotations': + self.ground_truth_list, + 'images': [{ + 'id': image_id + } for image_id in self.image_ids], + 'categories': [{ + 'id': k, + 'name': v + } for k, v in self.category_map.items()] + } + coco_wrapped_groundtruth = COCOWrapper(groundtruth_dict) + coco_wrapped_detections = coco_wrapped_groundtruth.LoadAnnotations( + self.detection_list) + box_evaluator = COCOEvalWrapper(coco_wrapped_groundtruth, + coco_wrapped_detections, + agnostic_mode=False, + iou_thrs = self.iou_thrs, + map_points = self.map_points) + box_metrics, box_per_category_ap = box_evaluator.ComputeMetrics( + include_metrics_per_category=False, all_metrics_per_category=False) + box_metrics.update(box_per_category_ap) + box_metrics = { + 'DetectionBoxes_' + key: value + for key, value in iter(box_metrics.items()) + } + + return box_metrics[self.map_key] + +class Post: + def __call__(self, sample): + preds, labels = sample + preds[0][0][:, [0, 1, 2, 3]] = preds[0][0][:, [1, 0, 3, 2]] + return preds, labels + +class LabelBalanceCOCORawFilter(object): + """The label balance filter for COCO raw data.""" + + def __init__(self, size=1): + """Initialize the attribute of class.""" + self.size = size + + def __call__(self, image, label): + """Execute the filter. + + Args: + image: Not used. + label: label of a sample. + """ + return len(label) == self.size \ No newline at end of file diff --git a/examples/onnxrt/object_detection/ssd_mobilenet_v2/quantization/ptq_static/main.py b/examples/onnxrt/object_detection/ssd_mobilenet_v2/quantization/ptq_static/main.py new file mode 100644 index 00000000000..fd22c6390a7 --- /dev/null +++ b/examples/onnxrt/object_detection/ssd_mobilenet_v2/quantization/ptq_static/main.py @@ -0,0 +1,152 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# pylint:disable=redefined-outer-name,logging-format-interpolation + + +import logging +import argparse + +import onnx +import onnxruntime as ort +import numpy as np + +from data_utils import COCORawDataloader, COCORawDataset, COCOmAPv2 +from data_utils import ComposeTransform, ResizeTransform, LabelBalanceCOCORawFilter + +logger = logging.getLogger(__name__) +logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s - %(message)s', + datefmt = '%m/%d/%Y %H:%M:%S', + level = logging.WARN) +logger.info("Evaluating ONNXRuntime full precision accuracy and performance:") +parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter +) +parser.add_argument( + '--model_path', + type=str, + help="Pre-trained model on onnx file" +) +parser.add_argument( + '--data_path', + type=str, + help="path to dataset" +) +parser.add_argument( + '--benchmark', + action='store_true', \ + default=False +) +parser.add_argument( + '--tune', + action='store_true', \ + default=False, + help="whether quantize the model" +) +parser.add_argument( + '--config', + type=str, + help="config yaml path" +) +parser.add_argument( + '--output_model', + type=str, + help="output model path" +) +parser.add_argument( + '--mode', + type=str, + help="benchmark mode of performance or accuracy" +) +parser.add_argument( + '--quant_format', + type=str, + choices=['QOperator', 'QDQ'], + help="quantization format" +) +parser.add_argument( + '--batch_size', + type=int, + default=16, + help="quantization format" +) +args = parser.parse_args() + +if __name__ == "__main__": + model = onnx.load(args.model_path) + transform = ComposeTransform([ResizeTransform(size=300)]) + filter = LabelBalanceCOCORawFilter() + eval_dataset = COCORawDataset(args.data_path, transform=transform) + calib_dataset = COCORawDataset(args.data_path, transform=transform, filter=filter) + eval_dataloader = COCORawDataloader(eval_dataset, batch_size=args.batch_size) + calib_dataloader = COCORawDataloader(calib_dataset, 1) + metric = COCOmAPv2(output_index_mapping={'num_detections': 0, + 'boxes': 1, + 'scores': 2, + 'classes': 3}) + + def eval_func(model): + metric.reset() + session = ort.InferenceSession(model.SerializeToString(), + providers=ort.get_available_providers()) + ort_inputs = {} + len_inputs = len(session.get_inputs()) + inputs_names = [session.get_inputs()[i].name for i in range(len_inputs)] + for idx, (inputs, labels) in enumerate(eval_dataloader): + if not isinstance(labels, list): + labels = [labels] + if len_inputs == 1: + ort_inputs.update( + inputs if isinstance(inputs, dict) else {inputs_names[0]: inputs} + ) + else: + assert len_inputs == len(inputs), 'number of input tensors must align with graph inputs' + if isinstance(inputs, dict): + ort_inputs.update(inputs) + else: + for i in range(len_inputs): + if not isinstance(inputs[i], np.ndarray): + ort_inputs.update({inputs_names[i]: np.array(inputs[i])}) + else: + ort_inputs.update({inputs_names[i]: inputs[i]}) + predictions = session.run(None, ort_inputs) + metric.update(predictions, labels) + return metric.result() + + if args.benchmark: + if args.mode == 'performance': + from neural_compressor.benchmark import fit + from neural_compressor.config import BenchmarkConfig + conf = BenchmarkConfig(iteration=100, + cores_per_instance=4, + num_of_instance=1) + fit(model, conf, b_dataloader=eval_dataloader) + elif args.mode == 'accuracy': + acc_result = eval_func(model) + print("Batch size = %d" % args.batch_size) + print("Accuracy: %.5f" % acc_result) + + if args.tune: + from neural_compressor import quantization + from neural_compressor.config import AccuracyCriterion, PostTrainingQuantConfig + accuracy_criterion = AccuracyCriterion() + accuracy_criterion.absolute = 0.01 + config = PostTrainingQuantConfig(approach='static', + accuracy_criterion=accuracy_criterion, + quant_format=args.quant_format, + calibration_sampling_size=[50]) + q_model = quantization.fit(model, config, calib_dataloader=calib_dataloader, eval_func=eval_func) + q_model.save(args.output_model) \ No newline at end of file diff --git a/examples/onnxrt/object_detection/ssd_mobilenet_v2/quantization/ptq_static/requirements.txt b/examples/onnxrt/object_detection/ssd_mobilenet_v2/quantization/ptq_static/requirements.txt new file mode 100644 index 00000000000..59037141de2 --- /dev/null +++ b/examples/onnxrt/object_detection/ssd_mobilenet_v2/quantization/ptq_static/requirements.txt @@ -0,0 +1,6 @@ +onnx +onnxruntime +torch +torchvision +onnxruntime-extensions; python_version < '3.10' +pillow>=8.2.0 # not directly required, pinned by Snyk to avoid a vulnerability diff --git a/examples/onnxrt/object_detection/ssd_mobilenet_v2/quantization/ptq_static/run_benchmark.sh b/examples/onnxrt/object_detection/ssd_mobilenet_v2/quantization/ptq_static/run_benchmark.sh new file mode 100644 index 00000000000..fc6745cc65d --- /dev/null +++ b/examples/onnxrt/object_detection/ssd_mobilenet_v2/quantization/ptq_static/run_benchmark.sh @@ -0,0 +1,44 @@ +#!/bin/bash +set -x + +function main { + init_params "$@" + run_benchmark + +} + +# init params +function init_params { + + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --mode=*) + mode=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + --batch_size=*) + batch_size=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_benchmark +function run_benchmark { + + python main.py \ + --model_path ${input_model} \ + --mode ${mode} \ + --data_path ${dataset_location} \ + --batch_size ${batch_size} \ + --benchmark +} + +main "$@" diff --git a/examples/onnxrt/object_detection/ssd_mobilenet_v2/quantization/ptq_static/run_tuning.sh b/examples/onnxrt/object_detection/ssd_mobilenet_v2/quantization/ptq_static/run_tuning.sh new file mode 100644 index 00000000000..e6f6075fd4c --- /dev/null +++ b/examples/onnxrt/object_detection/ssd_mobilenet_v2/quantization/ptq_static/run_tuning.sh @@ -0,0 +1,43 @@ +#!/bin/bash +set -x + +function main { + init_params "$@" + run_tuning + +} + +# init params +function init_params { + + for var in "$@" + do + case $var in + --input_model=*) + input_model=$(echo $var |cut -f2 -d=) + ;; + --output_model=*) + output_model=$(echo $var |cut -f2 -d=) + ;; + --dataset_location=*) + dataset_location=$(echo $var |cut -f2 -d=) + ;; + --quant_format=*) + quant_format=$(echo $var |cut -f2 -d=) + ;; + esac + done + +} + +# run_tuning +function run_tuning { + python main.py \ + --model_path ${input_model} \ + --output_model ${output_model} \ + --data_path ${dataset_location} \ + --quant_format ${quant_format} \ + --tune +} + +main "$@" diff --git a/neural_compressor/adaptor/onnxrt.py b/neural_compressor/adaptor/onnxrt.py index a2b957ab1cd..38e3c246148 100644 --- a/neural_compressor/adaptor/onnxrt.py +++ b/neural_compressor/adaptor/onnxrt.py @@ -689,8 +689,9 @@ def _detect_domain(self, model): is_nlp = True break - # 4. according to LSTM structure - if "LSTM" in [node.op_type for node in model.model.graph.node]: + # 4. according to LSTM/Attention optype + op_types = [node.op_type for node in model.model.graph.node] + if "LSTM" in op_types or 'Attention' in op_types: is_nlp = True logger.warning("The model is automatically detected as {} model. " diff --git a/neural_compressor/config.py b/neural_compressor/config.py index bd97d6f9a99..2c13a9dfff9 100644 --- a/neural_compressor/config.py +++ b/neural_compressor/config.py @@ -521,7 +521,7 @@ def graph_optimization_level(val=None): return check_value("graph_optimization_level", val, str, ["DISABLE_ALL", "ENABLE_BASIC", "ENABLE_EXTENDED", "ENABLE_ALL"]) else: - return "ENABLE_BASIC" + return None def first_conv_or_matmul_quantization(val=None): if val is not None: diff --git a/neural_compressor/model/onnx_model.py b/neural_compressor/model/onnx_model.py index f87c6a88adc..37c9f6e7ba1 100644 --- a/neural_compressor/model/onnx_model.py +++ b/neural_compressor/model/onnx_model.py @@ -232,10 +232,11 @@ def _get_input_name_to_nodes(self, nodes): for attr in attrs: self._get_input_name_to_nodes(attr.g.node) for input_name in node.input: - if input_name not in self._input_name_to_nodes: - self._input_name_to_nodes[input_name] = [node] - else: - self._input_name_to_nodes[input_name].append(node) + if len(input_name.strip()) != 0: + if input_name not in self._input_name_to_nodes: + self._input_name_to_nodes[input_name] = [node] + else: + self._input_name_to_nodes[input_name].append(node) @property def output_name_to_node(self): @@ -251,7 +252,8 @@ def _get_output_name_to_node(self, nodes): for attr in attrs: self._get_output_name_to_node(attr.g.node) for output_name in node.output: - self._output_name_to_node[output_name] = node + if len(output_name.strip()) != 0: + self._output_name_to_node[output_name] = node def get_siblings(self, node): """Get siblings nodes.""" @@ -424,12 +426,14 @@ def topological_sort(self, enable_subgraph=False): output_name_to_node = {} for node in self.model.graph.node: for input_name in node.input: - if input_name not in input_name_to_nodes: - input_name_to_nodes[input_name] = [node] - else: - input_name_to_nodes[input_name].append(node) + if len(input_name.strip()) != 0: + if input_name not in input_name_to_nodes: + input_name_to_nodes[input_name] = [node] + else: + input_name_to_nodes[input_name].append(node) for output_name in node.output: - output_name_to_node[output_name] = node + if len(output_name.strip()) != 0: + output_name_to_node[output_name] = node else: # pragma: no cover input_name_to_nodes = self._input_name_to_nodes output_name_to_node = self._output_name_to_node diff --git a/test/config/test_config.py b/test/config/test_config.py index ea80fd4b149..326710dce6f 100644 --- a/test/config/test_config.py +++ b/test/config/test_config.py @@ -17,7 +17,7 @@ def test_config(self): self.assertEqual(config.recipes['weight_correction'], False) self.assertEqual(config.recipes['dedicated_qdq_pair'], False) self.assertEqual(config.recipes['add_qdq_pair_to_weight'], False) - self.assertEqual(config.recipes['graph_optimization_level'], 'ENABLE_BASIC') + self.assertEqual(config.recipes['graph_optimization_level'], None) class TestPyConf(unittest.TestCase): def test_config(self):