From efd737956161af23c9cc14d25952063e63e7deda Mon Sep 17 00:00:00 2001 From: Chen Xi Date: Thu, 2 Mar 2023 10:57:17 +0800 Subject: [PATCH] support keras functional model and tuning fallback layers (#521) Signed-off-by: Clark Chin Co-authored-by: chensuyue Co-authored-by: Lv, Liang1 --- examples/.config/model_params_keras.json | 39 + examples/README.md | 8 +- .../imagenet_prepare/build_imagenet_data.py | 567 + .../download_and_convert_imagenet.sh | 100 + .../imagenet_prepare/download_imagenet.sh | 99 + .../imagenet_lsvrc_2015_synsets.txt | 1000 + .../imagenet_prepare/imagenet_metadata.txt | 21842 ++++++++++++++++ .../{ => image_recognition}/mnist/README.md | 0 .../{ => image_recognition}/mnist/mnist.py | 0 .../mnist/requirements.txt | 0 .../mobilenet_v2/quantization/ptq/README.md | 0 .../mobilenet_v2}/quantization/ptq/main.py | 39 +- .../quantization/ptq/prepare_model.py | 0 .../quantization/ptq/run_benchmark.sh | 0 .../quantization/ptq/run_tuning.sh | 0 .../image_recognition/prepare_dataset.sh | 71 + .../resnetv2_101/quantization/ptq/README.md | 0 .../resnetv2_101}/quantization/ptq/main.py | 52 +- .../quantization/ptq/prepare_model.py | 0 .../quantization/ptq/run_benchmark.sh | 0 .../quantization/ptq/run_tuning.sh | 0 .../resnetv2_50/quantization/ptq/README.md | 41 +- .../resnetv2_50}/quantization/ptq/main.py | 92 +- .../quantization/ptq/prepare_model.py | 0 .../quantization/ptq/run_benchmark.sh | 0 .../quantization/ptq/run_tuning.sh | 0 .../xception/quantization/ptq/README.md | 62 + .../xception}/quantization/ptq/main.py | 300 +- .../quantization/ptq/prepare_model.py | 0 .../quantization/ptq/run_benchmark.sh | 0 .../xception/quantization/ptq/run_tuning.sh | 0 .../quantization/ptq/README.md | 170 +- .../quantization/ptq/main.py | 300 +- .../quantization/ptq/prepare_model.py | 70 +- .../inception_v3/quantization/ptq/README.md | 172 +- .../inception_v3/quantization/ptq/main.py | 284 +- .../quantization/ptq/prepare_model.py | 70 +- .../resnet101/quantization/ptq/README.md | 168 +- .../resnet101/quantization/ptq/main.py | 284 +- .../quantization/ptq/prepare_model.py | 70 +- .../xception/quantization/ptq/README.md | 82 - neural_compressor/adaptor/keras.py | 207 +- .../adaptor/keras_utils/conv2d.py | 76 + .../adaptor/keras_utils/dense.py | 72 + .../adaptor/keras_utils/quantizer.py | 182 +- neural_compressor/model/keras_model.py | 75 +- neural_compressor/model/model.py | 4 +- test/itex/test_keras_in_keras_out.py | 2 +- 48 files changed, 25228 insertions(+), 1372 deletions(-) create mode 100644 examples/.config/model_params_keras.json create mode 100644 examples/keras/image_recognition/imagenet_prepare/build_imagenet_data.py create mode 100644 examples/keras/image_recognition/imagenet_prepare/download_and_convert_imagenet.sh create mode 100644 examples/keras/image_recognition/imagenet_prepare/download_imagenet.sh create mode 100644 examples/keras/image_recognition/imagenet_prepare/imagenet_lsvrc_2015_synsets.txt create mode 100644 examples/keras/image_recognition/imagenet_prepare/imagenet_metadata.txt rename examples/keras/{ => image_recognition}/mnist/README.md (100%) rename examples/keras/{ => image_recognition}/mnist/mnist.py (100%) rename examples/keras/{ => image_recognition}/mnist/requirements.txt (100%) rename examples/{tensorflow/image_recognition/keras_models => keras/image_recognition}/mobilenet_v2/quantization/ptq/README.md (100%) rename examples/{tensorflow/image_recognition/keras_models/resnetv2_50 => keras/image_recognition/mobilenet_v2}/quantization/ptq/main.py (79%) rename examples/{tensorflow/image_recognition/keras_models => keras/image_recognition}/mobilenet_v2/quantization/ptq/prepare_model.py (100%) rename examples/{tensorflow/image_recognition/keras_models => keras/image_recognition}/mobilenet_v2/quantization/ptq/run_benchmark.sh (100%) rename examples/{tensorflow/image_recognition/keras_models => keras/image_recognition}/mobilenet_v2/quantization/ptq/run_tuning.sh (100%) create mode 100644 examples/keras/image_recognition/prepare_dataset.sh rename examples/{tensorflow/image_recognition/keras_models => keras/image_recognition}/resnetv2_101/quantization/ptq/README.md (100%) rename examples/{tensorflow/image_recognition/keras_models/xception => keras/image_recognition/resnetv2_101}/quantization/ptq/main.py (73%) rename examples/{tensorflow/image_recognition/keras_models => keras/image_recognition}/resnetv2_101/quantization/ptq/prepare_model.py (100%) rename examples/{tensorflow/image_recognition/keras_models => keras/image_recognition}/resnetv2_101/quantization/ptq/run_benchmark.sh (100%) rename examples/{tensorflow/image_recognition/keras_models => keras/image_recognition}/resnetv2_101/quantization/ptq/run_tuning.sh (100%) rename examples/{tensorflow/image_recognition/keras_models => keras/image_recognition}/resnetv2_50/quantization/ptq/README.md (51%) rename examples/{tensorflow/image_recognition/keras_models/resnetv2_101 => keras/image_recognition/resnetv2_50}/quantization/ptq/main.py (56%) rename examples/{tensorflow/image_recognition/keras_models => keras/image_recognition}/resnetv2_50/quantization/ptq/prepare_model.py (100%) rename examples/{tensorflow/image_recognition/keras_models => keras/image_recognition}/resnetv2_50/quantization/ptq/run_benchmark.sh (100%) rename examples/{tensorflow/image_recognition/keras_models => keras/image_recognition}/resnetv2_50/quantization/ptq/run_tuning.sh (100%) create mode 100644 examples/keras/image_recognition/xception/quantization/ptq/README.md rename examples/{tensorflow/image_recognition/keras_models/mobilenet_v2 => keras/image_recognition/xception}/quantization/ptq/main.py (50%) rename examples/{tensorflow/image_recognition/keras_models => keras/image_recognition}/xception/quantization/ptq/prepare_model.py (100%) rename examples/{tensorflow/image_recognition/keras_models => keras/image_recognition}/xception/quantization/ptq/run_benchmark.sh (100%) rename examples/{tensorflow/image_recognition/keras_models => keras/image_recognition}/xception/quantization/ptq/run_tuning.sh (100%) delete mode 100644 examples/tensorflow/image_recognition/keras_models/xception/quantization/ptq/README.md create mode 100644 neural_compressor/adaptor/keras_utils/conv2d.py create mode 100644 neural_compressor/adaptor/keras_utils/dense.py diff --git a/examples/.config/model_params_keras.json b/examples/.config/model_params_keras.json new file mode 100644 index 00000000000..cbfbd696a15 --- /dev/null +++ b/examples/.config/model_params_keras.json @@ -0,0 +1,39 @@ +{ + "keras": { + "resnetv2_50": { + "model_src_dir": "image_recognition/resnetv2_50/quantization/ptq", + "dataset_location": "/tf_dataset/dataset/imagenet", + "input_model": "/tf_dataset2/models/tensorflow/resnetv2_50_keras/saved_model", + "main_script": "main.py", + "batch_size": 32 + }, + "xception": { + "model_src_dir": "image_recognition/xception/quantization/ptq", + "dataset_location": "/tf_dataset/dataset/imagenet", + "input_model": "/tf_dataset2/models/tensorflow/xception_keras/saved_model/", + "main_script": "main.py", + "batch_size": 32 + }, + "mnist": { + "model_src_dir": "image_recognition/mnist/quantization/qat", + "dataset_location": "", + "input_model": "/tf_dataset2/models/tensorflow/mnist_keras/saved_model/", + "main_script": "main.py", + "batch_size": 32 + }, + "resnetv2_101": { + "model_src_dir": "image_recognition/resnetv2_101/quantization/ptq", + "dataset_location": "/tf_dataset/dataset/imagenet", + "input_model": "/tf_dataset2/models/tensorflow/resnetv2_101_keras/saved_model", + "main_script": "main.py", + "batch_size": 32 + }, + "mobilenet_v2": { + "model_src_dir": "image_recognition/mobilenet_v2/quantization/ptq", + "dataset_location": "/tf_dataset/dataset/imagenet", + "input_model": "/tf_dataset2/models/tensorflow/mobilenet_v2_keras/saved_model/", + "main_script": "main.py", + "batch_size": 32 + } + } +} diff --git a/examples/README.md b/examples/README.md index 169214c99b0..ce299bde789 100644 --- a/examples/README.md +++ b/examples/README.md @@ -61,7 +61,7 @@ Intel® Neural Compressor validated examples with multiple compression technique MobileNet V2 Image Recognition Post-Training Static Quantization - pb / keras + pb / keras MobileNet V3 @@ -115,13 +115,13 @@ Intel® Neural Compressor validated examples with multiple compression technique ResNet V2 50 Image Recognition Post-Training Static Quantization - pb / keras + pb / keras ResNet V2 101 Image Recognition Post-Training Static Quantization - pb / keras + pb / keras ResNet V2 152 @@ -193,7 +193,7 @@ Intel® Neural Compressor validated examples with multiple compression technique Xception Image Recognition Post-Training Static Quantization - keras + keras ResNet V2 diff --git a/examples/keras/image_recognition/imagenet_prepare/build_imagenet_data.py b/examples/keras/image_recognition/imagenet_prepare/build_imagenet_data.py new file mode 100644 index 00000000000..c52d2bd4218 --- /dev/null +++ b/examples/keras/image_recognition/imagenet_prepare/build_imagenet_data.py @@ -0,0 +1,567 @@ +# Copyright 2016 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Converts ImageNet data to TFRecords file format with Example protos. + +The raw ImageNet data set is expected to reside in JPEG files located in the +following directory structure. + + data_dir/n01440764/ILSVRC2012_val_00000293.JPEG + data_dir/n01440764/ILSVRC2012_val_00000543.JPEG + ... + +where 'n01440764' is the unique synset label associated with +these images. + +The training data set consists of 1000 sub-directories (i.e. labels) +each containing 1200 JPEG images for a total of 1.2M JPEG images. + +The evaluation data set consists of 1000 sub-directories (i.e. labels) +each containing 50 JPEG images for a total of 50K JPEG images. + +This TensorFlow script converts the training and evaluation data into +a sharded data set consisting of 1024 and 128 TFRecord files, respectively. + + train_directory/train-00000-of-01024 + train_directory/train-00001-of-01024 + ... + train_directory/train-00127-of-01024 + +and + + validation_directory/validation-00000-of-00128 + validation_directory/validation-00001-of-00128 + ... + validation_directory/validation-00127-of-00128 + +Each validation TFRecord file contains ~390 records. Each training TFREcord +file contains ~1250 records. Each record within the TFRecord file is a +serialized Example proto. The Example proto contains the following fields: + + image/encoded: string containing JPEG encoded image in RGB colorspace + image/height: integer, image height in pixels + image/width: integer, image width in pixels + image/colorspace: string, specifying the colorspace, always 'RGB' + image/channels: integer, specifying the number of channels, always 3 + image/format: string, specifying the format, always'JPEG' + + image/filename: string containing the basename of the image file + e.g. 'n01440764_10026.JPEG' or 'ILSVRC2012_val_00000293.JPEG' + image/class/label: integer specifying the index in a classification layer. + The label ranges from [1, 1000] where 0 is not used. + image/class/synset: string specifying the unique ID of the label, + e.g. 'n01440764' + image/class/text: string specifying the human-readable version of the label + e.g. 'red fox, Vulpes vulpes' + +Note that the length of xmin is identical to the length of xmax, ymin and ymax +for each example. + +Running this script using 16 threads may take around ~2.5 hours on a HP Z420. +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from datetime import datetime +import os +import random +import sys +import threading + +import numpy as np +from six.moves import xrange # pylint: disable=redefined-builtin +import tensorflow as tf +tf.compat.v1.disable_eager_execution() + + +tf.compat.v1.app.flags.DEFINE_string('raw_directory', None, + 'Raw data directory') + +tf.compat.v1.app.flags.DEFINE_string('output_directory', None, + 'Output data directory') + +tf.compat.v1.app.flags.DEFINE_integer('shards', 1, + 'Number of shards in TFRecord files.') + +tf.compat.v1.app.flags.DEFINE_string('subset', 'validation', + 'Subset of imagenet, can be validation/train') + +tf.compat.v1.app.flags.DEFINE_integer('num_threads', 1, + 'Number of threads to preprocess the images.') + +# The labels file contains a list of valid labels are held in this file. +# Assumes that the file contains entries as such: +# n01440764 +# n01443537 +# n01484850 +# where each line corresponds to a label expressed as a synset. We map +# each synset contained in the file to an integer (based on the alphabetical +# ordering). See below for details. +tf.compat.v1.app.flags.DEFINE_string('labels_file', + 'imagenet_lsvrc_2015_synsets.txt', + 'Labels file') + +# This file containing mapping from synset to human-readable label. +# Assumes each line of the file looks like: +# +# n02119247 black fox +# n02119359 silver fox +# n02119477 red fox, Vulpes fulva +# +# where each line corresponds to a unique mapping. Note that each line is +# formatted as \t. +tf.compat.v1.app.flags.DEFINE_string('imagenet_metadata_file', + 'imagenet_metadata.txt', + 'ImageNet metadata file') + +FLAGS = tf.compat.v1.app.flags.FLAGS + + +def _int64_feature(value): + """Wrapper for inserting int64 features into Example proto.""" + if not isinstance(value, list): + value = [value] + return tf.train.Feature(int64_list=tf.train.Int64List(value=value)) + + +def _float_feature(value): + """Wrapper for inserting float features into Example proto.""" + if not isinstance(value, list): + value = [value] + return tf.train.Feature(float_list=tf.train.FloatList(value=value)) + + +def _bytes_feature(value): + """Wrapper for inserting bytes features into Example proto.""" + return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) + + +def _convert_to_example(filename, image_buffer, label, synset, human, + height, width): + """Build an Example proto for an example. + + Args: + filename: string, path to an image file, e.g., '/path/to/example.JPG' + image_buffer: string, JPEG encoding of RGB image + label: integer, identifier for the ground truth for the network + synset: string, unique WordNet ID specifying the label, e.g., 'n02323233' + human: string, human-readable label, e.g., 'red fox, Vulpes vulpes' + height: integer, image height in pixels + width: integer, image width in pixels + Returns: + Example proto + """ + + colorspace = b'RGB' + channels = 3 + image_format = b'JPEG' + + example = tf.train.Example(features=tf.train.Features(feature={ + 'image/height': _int64_feature(height), + 'image/width': _int64_feature(width), + 'image/colorspace': _bytes_feature(colorspace), + 'image/channels': _int64_feature(channels), + 'image/class/label': _int64_feature(label), + 'image/class/synset': _bytes_feature(bytes(synset,'utf-8')), + 'image/class/text': _bytes_feature(bytes(human,'utf-8')), + 'image/format': _bytes_feature(image_format), + 'image/filename': _bytes_feature(bytes(os.path.basename(filename),'utf-8')), + 'image/encoded': _bytes_feature(image_buffer)})) + return example + + +class ImageCoder(object): + """Helper class that provides TensorFlow image coding utilities.""" + + def __init__(self): + # Create a single Session to run all image coding calls. + self._sess = tf.compat.v1.Session() + + # Initializes function that converts PNG to JPEG data. + self._png_data = tf.compat.v1.placeholder(dtype=tf.string) + image = tf.image.decode_png(self._png_data, channels=3) + self._png_to_jpeg = tf.image.encode_jpeg(image, format='rgb', quality=100) + + # Initializes function that converts CMYK JPEG data to RGB JPEG data. + self._cmyk_data = tf.compat.v1.placeholder(dtype=tf.string) + image = tf.image.decode_jpeg(self._cmyk_data, channels=0) + self._cmyk_to_rgb = tf.image.encode_jpeg(image, format='rgb', quality=100) + + # Initializes function that decodes RGB JPEG data. + self._decode_jpeg_data = tf.compat.v1.placeholder(dtype=tf.string) + self._decode_jpeg = tf.image.decode_jpeg(self._decode_jpeg_data, channels=3) + + def png_to_jpeg(self, image_data): + return self._sess.run(self._png_to_jpeg, + feed_dict={self._png_data: image_data}) + + def cmyk_to_rgb(self, image_data): + return self._sess.run(self._cmyk_to_rgb, + feed_dict={self._cmyk_data: image_data}) + + def decode_jpeg(self, image_data): + image = self._sess.run(self._decode_jpeg, + feed_dict={self._decode_jpeg_data: image_data}) + assert len(image.shape) == 3 + assert image.shape[2] == 3 + return image + + +def _is_png(filename): + """Determine if a file contains a PNG format image. + + Args: + filename: string, path of the image file. + + Returns: + boolean indicating if the image is a PNG. + """ + # File list from: + # https://groups.google.com/forum/embed/?place=forum/torch7#!topic/torch7/fOSTXHIESSU + return 'n02105855_2933.JPEG' in filename + + +def _is_cmyk(filename): + """Determine if file contains a CMYK JPEG format image. + + Args: + filename: string, path of the image file. + + Returns: + boolean indicating if the image is a JPEG encoded with CMYK color space. + """ + # File list from: + # https://github.com/cytsai/ilsvrc-cmyk-image-list + blacklist = ['n01739381_1309.JPEG', 'n02077923_14822.JPEG', + 'n02447366_23489.JPEG', 'n02492035_15739.JPEG', + 'n02747177_10752.JPEG', 'n03018349_4028.JPEG', + 'n03062245_4620.JPEG', 'n03347037_9675.JPEG', + 'n03467068_12171.JPEG', 'n03529860_11437.JPEG', + 'n03544143_17228.JPEG', 'n03633091_5218.JPEG', + 'n03710637_5125.JPEG', 'n03961711_5286.JPEG', + 'n04033995_2932.JPEG', 'n04258138_17003.JPEG', + 'n04264628_27969.JPEG', 'n04336792_7448.JPEG', + 'n04371774_5854.JPEG', 'n04596742_4225.JPEG', + 'n07583066_647.JPEG', 'n13037406_4650.JPEG'] + return filename.split('/')[-1] in blacklist + + +def _process_image(filename, coder): + """Process a single image file. + + Args: + filename: string, path to an image file e.g., '/path/to/example.JPG'. + coder: instance of ImageCoder to provide TensorFlow image coding utils. + Returns: + image_buffer: string, JPEG encoding of RGB image. + height: integer, image height in pixels. + width: integer, image width in pixels. + """ + # Read the image file. + image_data = tf.io.gfile.GFile(filename, 'rb').read() + + # Clean the dirty data. + if _is_png(filename): + # 1 image is a PNG. + print('Converting PNG to JPEG for %s' % filename) + image_data = coder.png_to_jpeg(image_data) + elif _is_cmyk(filename): + # 22 JPEG images are in CMYK colorspace. + print('Converting CMYK to RGB for %s' % filename) + image_data = coder.cmyk_to_rgb(image_data) + + # Decode the RGB JPEG. + image = coder.decode_jpeg(image_data) + + # Check that image converted to RGB + assert len(image.shape) == 3 + height = image.shape[0] + width = image.shape[1] + assert image.shape[2] == 3 + + return image_data, height, width + + +def _process_image_files_batch(coder, thread_index, ranges, name, filenames, + synsets, labels, humans, num_shards): + """Processes and saves list of images as TFRecord in 1 thread. + + Args: + coder: instance of ImageCoder to provide TensorFlow image coding utils. + thread_index: integer, unique batch to run index is within [0, len(ranges)). + ranges: list of pairs of integers specifying ranges of each batches to + analyze in parallel. + name: string, unique identifier specifying the data set + filenames: list of strings; each string is a path to an image file + synsets: list of strings; each string is a unique WordNet ID + labels: list of integer; each integer identifies the ground truth + humans: list of strings; each string is a human-readable label + num_shards: integer number of shards for this data set. + """ + # Each thread produces N shards where N = int(num_shards / num_threads). + # For instance, if num_shards = 128, and the num_threads = 2, then the first + # thread would produce shards [0, 64). + num_threads = len(ranges) + assert not num_shards % num_threads + num_shards_per_batch = int(num_shards / num_threads) + + shard_ranges = np.linspace(ranges[thread_index][0], + ranges[thread_index][1], + num_shards_per_batch + 1).astype(int) + num_files_in_thread = ranges[thread_index][1] - ranges[thread_index][0] + + counter = 0 + for s in xrange(num_shards_per_batch): + # Generate a sharded version of the file name, e.g. 'train-00002-of-00010' + shard = thread_index * num_shards_per_batch + s + output_filename = '%s-%.5d-of-%.5d' % (name, shard, num_shards) + output_file = os.path.join(FLAGS.output_directory, output_filename) + writer = tf.io.TFRecordWriter(output_file) + + shard_counter = 0 + files_in_shard = np.arange(shard_ranges[s], shard_ranges[s + 1], dtype=int) # HERE + for i in files_in_shard: + filename = filenames[i] + label = labels[i] + synset = synsets[i] + human = humans[i] + + image_buffer, height, width = _process_image(filename, coder) + + example = _convert_to_example(filename, image_buffer, label, synset, human, height, width) + writer.write(example.SerializeToString()) + shard_counter += 1 + counter += 1 + + if not counter % 1000: + print('%s [thread %d]: Processed %d of %d images in thread batch.' % + (datetime.now(), thread_index, counter, num_files_in_thread)) + sys.stdout.flush() + + writer.close() + print('%s [thread %d]: Wrote %d images to %s' % + (datetime.now(), thread_index, shard_counter, output_file)) + sys.stdout.flush() + shard_counter = 0 + print('%s [thread %d]: Wrote %d images to %d shards.' % + (datetime.now(), thread_index, counter, num_files_in_thread)) + sys.stdout.flush() + + +def _process_image_files(name, filenames, synsets, labels, humans, num_shards): + """Process and save list of images as TFRecord of Example protos. + + Args: + name: string, unique identifier specifying the data set + filenames: list of strings; each string is a path to an image file + synsets: list of strings; each string is a unique WordNet ID + labels: list of integer; each integer identifies the ground truth + humans: list of strings; each string is a human-readable label + num_shards: integer number of shards for this data set. + """ + assert len(filenames) == len(synsets) + assert len(filenames) == len(labels) + assert len(filenames) == len(humans) + + # Break all images into batches with a [ranges[i][0], ranges[i][1]]. + spacing = np.linspace(0, len(filenames), FLAGS.num_threads + 1).astype(np.int) + ranges = [] + threads = [] + for i in xrange(len(spacing) - 1): + ranges.append([spacing[i], spacing[i+1]]) + + # Launch a thread for each batch. + print('Launching %d threads for spacings: %s' % (FLAGS.num_threads, ranges)) + sys.stdout.flush() + + # Create a mechanism for monitoring when all threads are finished. + coord = tf.train.Coordinator() + + # Create a generic TensorFlow-based utility for converting all image codings. + coder = ImageCoder() + + threads = [] + for thread_index in xrange(len(ranges)): + args = (coder, thread_index, ranges, name, filenames, + synsets, labels, humans, num_shards) + t = threading.Thread(target=_process_image_files_batch, args=args) + t.start() + threads.append(t) + + # Wait for all the threads to terminate. + coord.join(threads) + print('%s: Finished writing all %d images in data set.' % + (datetime.now(), len(filenames))) + sys.stdout.flush() + + +def _find_image_files(data_dir, labels_file): + """Build a list of all images files and labels in the data set. + + Args: + data_dir: string, path to the root directory of images. + + Assumes that the ImageNet data set resides in JPEG files located in + the following directory structure. + + data_dir/n01440764/ILSVRC2012_val_00000293.JPEG + data_dir/n01440764/ILSVRC2012_val_00000543.JPEG + + where 'n01440764' is the unique synset label associated with these images. + + labels_file: string, path to the labels file. + + The list of valid labels are held in this file. Assumes that the file + contains entries as such: + n01440764 + n01443537 + n01484850 + where each line corresponds to a label expressed as a synset. We map + each synset contained in the file to an integer (based on the alphabetical + ordering) starting with the integer 1 corresponding to the synset + contained in the first line. + + The reason we start the integer labels at 1 is to reserve label 0 as an + unused background class. + + Returns: + filenames: list of strings; each string is a path to an image file. + synsets: list of strings; each string is a unique WordNet ID. + labels: list of integer; each integer identifies the ground truth. + """ + print('Determining list of input files and labels from %s.' % data_dir) + challenge_synsets = [l.strip() for l in + tf.compat.v1.gfile.FastGFile(labels_file, 'r').readlines()] + + labels = [] + filenames = [] + synsets = [] + + # Leave label index 0 empty as a background class. + label_index = 1 + + # Construct the list of JPEG files and labels. + for synset in challenge_synsets: + jpeg_file_path = '%s/%s/*.JPEG' % (data_dir, synset) + matching_files = tf.io.gfile.glob(jpeg_file_path) + + labels.extend([label_index] * len(matching_files)) + synsets.extend([synset] * len(matching_files)) + filenames.extend(matching_files) + + if not label_index % 100: + print('Finished finding files in %d of %d classes.' % ( + label_index, len(challenge_synsets))) + label_index += 1 + + # Shuffle the ordering of all image files in order to guarantee + # random ordering of the images with respect to label in the + # saved TFRecord files. Make the randomization repeatable. + shuffled_index = range(len(filenames)) + random.seed(12345) + + random.shuffle(list(range(len(shuffled_index)))) + + filenames = [filenames[i] for i in shuffled_index] + synsets = [synsets[i] for i in shuffled_index] + labels = [labels[i] for i in shuffled_index] + + print('Found %d JPEG files across %d labels inside %s.' % + (len(filenames), len(challenge_synsets), data_dir)) + return filenames, synsets, labels + + +def _find_human_readable_labels(synsets, synset_to_human): + """Build a list of human-readable labels. + + Args: + synsets: list of strings; each string is a unique WordNet ID. + synset_to_human: dict of synset to human labels, e.g., + 'n02119022' --> 'red fox, Vulpes vulpes' + + Returns: + List of human-readable strings corresponding to each synset. + """ + humans = [] + for s in synsets: + assert s in synset_to_human, ('Failed to find: %s' % s) + humans.append(synset_to_human[s]) + return humans + + +def _process_dataset(name, directory, num_shards, synset_to_human): + """Process a complete data set and save it as a TFRecord. + + Args: + name: string, unique identifier specifying the data set. + directory: string, root path to the data set. + num_shards: integer number of shards for this data set. + synset_to_human: dict of synset to human labels, e.g., + 'n02119022' --> 'red fox, Vulpes vulpes' + """ + filenames, synsets, labels = _find_image_files(directory, FLAGS.labels_file) + humans = _find_human_readable_labels(synsets, synset_to_human) + + _process_image_files(name, filenames, synsets, labels, + humans, num_shards) + + +def _build_synset_lookup(imagenet_metadata_file): + """Build lookup for synset to human-readable label. + + Args: + imagenet_metadata_file: string, path to file containing mapping from + synset to human-readable label. + + Assumes each line of the file looks like: + + n02119247 black fox + n02119359 silver fox + n02119477 red fox, Vulpes fulva + + where each line corresponds to a unique mapping. Note that each line is + formatted as \t. + + Returns: + Dictionary of synset to human labels, such as: + 'n02119022' --> 'red fox, Vulpes vulpes' + """ + lines = tf.compat.v1.gfile.FastGFile(imagenet_metadata_file, 'r').readlines() + synset_to_human = {} + for l in lines: + if l: + parts = l.strip().split('\t') + assert len(parts) == 2 + synset = parts[0] + human = parts[1] + synset_to_human[synset] = human + return synset_to_human + + +def main(unused_argv): + assert not FLAGS.shards % FLAGS.num_threads, ( + 'Please make the FLAGS.num_threads commensurate with FLAGS.shards') + + print('Saving results to %s' % FLAGS.output_directory) + + # Build a map from synset to human-readable label. + synset_to_human = _build_synset_lookup(FLAGS.imagenet_metadata_file) + + if(FLAGS.raw_directory != None): + _process_dataset(FLAGS.subset, FLAGS.raw_directory,FLAGS.shards, synset_to_human) + +if __name__ == '__main__': + tf.compat.v1.app.run() diff --git a/examples/keras/image_recognition/imagenet_prepare/download_and_convert_imagenet.sh b/examples/keras/image_recognition/imagenet_prepare/download_and_convert_imagenet.sh new file mode 100644 index 00000000000..f9baa85ab07 --- /dev/null +++ b/examples/keras/image_recognition/imagenet_prepare/download_and_convert_imagenet.sh @@ -0,0 +1,100 @@ +#!/bin/bash +# Copyright 2016 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +# Script to download and preprocess ImageNet Challenge 2012 +# training and validation data set. +# +# The final output of this script are sharded TFRecord files containing +# serialized Example protocol buffers. See build_imagenet_data.py for +# details of how the Example protocol buffers contain the ImageNet data. +# +# The final output of this script appears as such: +# +# data_dir/train-00000-of-01024 +# data_dir/train-00001-of-01024 +# ... +# data_dir/train-00127-of-01024 +# +# and +# +# data_dir/validation-00000-of-00128 +# data_dir/validation-00001-of-00128 +# ... +# data_dir/validation-00127-of-00128 +# +# Note that this script may take several hours to run to completion. The +# conversion of the ImageNet data to TFRecords alone takes 2-3 hours depending +# on the speed of your machine. Please be patient. +# +# **IMPORTANT** +# To download the raw images, the user must create an account with image-net.org +# and generate a username and access_key. The latter two are required for +# downloading the raw images. +# + +set -e + +if [ -z "$1" ]; then + echo "usage download_and_convert_imagenet.sh [data dir]" + exit +fi + +# Create the output and temporary directories. +DATA_DIR="${1%/}" +SCRATCH_DIR="${DATA_DIR}/raw-data/" +mkdir -p "${DATA_DIR}" +mkdir -p "${SCRATCH_DIR}" +WORK_DIR="$0.runfiles/__main__" + +# Download the ImageNet data. +LABELS_FILE="${WORK_DIR}/datasets/imagenet_lsvrc_2015_synsets.txt" +DOWNLOAD_SCRIPT="${WORK_DIR}/datasets/download_imagenet.sh" +"${DOWNLOAD_SCRIPT}" "${SCRATCH_DIR}" "${LABELS_FILE}" + +# Note the locations of the train and validation data. +TRAIN_DIRECTORY="${SCRATCH_DIR}train/" +VALIDATION_DIRECTORY="${SCRATCH_DIR}validation/" + +# Preprocess the validation data by moving the images into the appropriate +# sub-directory based on the label (synset) of the image. +echo "Organizing the validation data into sub-directories." +PREPROCESS_VAL_SCRIPT="${WORK_DIR}/datasets/preprocess_imagenet_validation_data.py" +VAL_LABELS_FILE="${WORK_DIR}/datasets/imagenet_2012_validation_synset_labels.txt" + +"${PREPROCESS_VAL_SCRIPT}" "${VALIDATION_DIRECTORY}" "${VAL_LABELS_FILE}" + +# Convert the XML files for bounding box annotations into a single CSV. +echo "Extracting bounding box information from XML." +BOUNDING_BOX_SCRIPT="${WORK_DIR}/datasets/process_bounding_boxes.py" +BOUNDING_BOX_FILE="${SCRATCH_DIR}/imagenet_2012_bounding_boxes.csv" +BOUNDING_BOX_DIR="${SCRATCH_DIR}bounding_boxes/" + +"${BOUNDING_BOX_SCRIPT}" "${BOUNDING_BOX_DIR}" "${LABELS_FILE}" \ + | sort >"${BOUNDING_BOX_FILE}" +echo "Finished downloading and preprocessing the ImageNet data." + +# Build the TFRecords version of the ImageNet data. +BUILD_SCRIPT="${WORK_DIR}/build_imagenet_data" +OUTPUT_DIRECTORY="${DATA_DIR}" +IMAGENET_METADATA_FILE="${WORK_DIR}/datasets/imagenet_metadata.txt" + +"${BUILD_SCRIPT}" \ + --train_directory="${TRAIN_DIRECTORY}" \ + --validation_directory="${VALIDATION_DIRECTORY}" \ + --output_directory="${OUTPUT_DIRECTORY}" \ + --imagenet_metadata_file="${IMAGENET_METADATA_FILE}" \ + --labels_file="${LABELS_FILE}" \ + --bounding_box_file="${BOUNDING_BOX_FILE}" diff --git a/examples/keras/image_recognition/imagenet_prepare/download_imagenet.sh b/examples/keras/image_recognition/imagenet_prepare/download_imagenet.sh new file mode 100644 index 00000000000..c780e179f93 --- /dev/null +++ b/examples/keras/image_recognition/imagenet_prepare/download_imagenet.sh @@ -0,0 +1,99 @@ +#!/bin/bash +# Copyright 2016 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +# Script to download ImageNet Challenge 2012 training and validation data set. +# +# Downloads and decompresses raw images and bounding boxes. +# +# **IMPORTANT** +# To download the raw images, the user must create an account with image-net.org +# and generate a username and access_key. The latter two are required for +# downloading the raw images. +# +# usage: +# ./download_imagenet.sh [dirname] +set -e + +if [ "x$IMAGENET_ACCESS_KEY" == x -o "x$IMAGENET_USERNAME" == x ]; then + cat <= iteration: @@ -122,9 +117,9 @@ def main(_): if FLAGS.tune: from neural_compressor.quantization import fit from neural_compressor.config import PostTrainingQuantConfig - from neural_compressor.utils import set_random_seed + from neural_compressor.utils.utility import set_random_seed set_random_seed(9527) - config = PostTrainingQuantConfig( + config = PostTrainingQuantConfig(backend='itex', calibration_sampling_size=[50, 100]) q_model = fit( model=FLAGS.input_model, @@ -138,10 +133,10 @@ def main(_): from neural_compressor.benchmark import fit from neural_compressor.config import BenchmarkConfig if FLAGS.mode == 'performance': - conf = BenchmarkConfig(iteration=100, cores_per_instance=4, num_of_instance=7) + conf = BenchmarkConfig(backend='itex', cores_per_instance=4, num_of_instance=7) fit(FLAGS.input_model, conf, b_func=evaluate) else: - from neural_compressor.model import Model + from neural_compressor.model.model import Model accuracy = evaluate(Model(FLAGS.input_model).model) logger.info('Batch size = %d' % FLAGS.batch_size) logger.info("Accuracy: %.5f" % accuracy) diff --git a/examples/tensorflow/image_recognition/keras_models/mobilenet_v2/quantization/ptq/prepare_model.py b/examples/keras/image_recognition/mobilenet_v2/quantization/ptq/prepare_model.py similarity index 100% rename from examples/tensorflow/image_recognition/keras_models/mobilenet_v2/quantization/ptq/prepare_model.py rename to examples/keras/image_recognition/mobilenet_v2/quantization/ptq/prepare_model.py diff --git a/examples/tensorflow/image_recognition/keras_models/mobilenet_v2/quantization/ptq/run_benchmark.sh b/examples/keras/image_recognition/mobilenet_v2/quantization/ptq/run_benchmark.sh similarity index 100% rename from examples/tensorflow/image_recognition/keras_models/mobilenet_v2/quantization/ptq/run_benchmark.sh rename to examples/keras/image_recognition/mobilenet_v2/quantization/ptq/run_benchmark.sh diff --git a/examples/tensorflow/image_recognition/keras_models/mobilenet_v2/quantization/ptq/run_tuning.sh b/examples/keras/image_recognition/mobilenet_v2/quantization/ptq/run_tuning.sh similarity index 100% rename from examples/tensorflow/image_recognition/keras_models/mobilenet_v2/quantization/ptq/run_tuning.sh rename to examples/keras/image_recognition/mobilenet_v2/quantization/ptq/run_tuning.sh diff --git a/examples/keras/image_recognition/prepare_dataset.sh b/examples/keras/image_recognition/prepare_dataset.sh new file mode 100644 index 00000000000..4aad5d69a3f --- /dev/null +++ b/examples/keras/image_recognition/prepare_dataset.sh @@ -0,0 +1,71 @@ +#!/bin/bash +# set -x + +OUTPUT_DIR="./data" +SUBSET="validation" +SHARDS=1 + +help() +{ + cat <<- EOF + Desc: Convert prepared raw imagnet dataset to tfrecord + -h --help help info + --output_dir Output data directory + default: './data' + --raw_dir Raw data directory + --shards Number of shards in TFRecord files. + default: '1' + --subset Subset of imagenet, can be validation/train. + default: 'validation' +EOF + exit 0 +} + +function main { + init_params "$@" + convert_dataset +} + +# init params +function init_params { + for var in "$@" + do + case $var in + --output_dir=*) + OUTPUT_DIR=$(echo $var |cut -f2 -d=) + ;; + --raw_dir=*) + RAW_DIR=$(echo $var |cut -f2 -d=) + ;; + --shards=*) + SHARDS=$(echo $var |cut -f2 -d=) + ;; + --subset=*) + SUBSET=$(echo $var |cut -f2 -d=) + ;; + -h|--help) help + ;; + *) + echo "Error: No such parameter: ${var}" + exit 1 + ;; + esac + done +} + +# convert dataset +function convert_dataset { + if [ ! -d ${OUTPUT_DIR} ]; then + mkdir ${OUTPUT_DIR} + fi + python imagenet_prepare/build_imagenet_data.py \ + --imagenet_metadata_file "imagenet_prepare/imagenet_metadata.txt" \ + --labels_file "imagenet_prepare/imagenet_lsvrc_2015_synsets.txt" \ + --output_directory ${OUTPUT_DIR} \ + --subset ${SUBSET} \ + --raw_directory ${RAW_DIR} \ + --shards ${SHARDS} +} + +main "$@" + diff --git a/examples/tensorflow/image_recognition/keras_models/resnetv2_101/quantization/ptq/README.md b/examples/keras/image_recognition/resnetv2_101/quantization/ptq/README.md similarity index 100% rename from examples/tensorflow/image_recognition/keras_models/resnetv2_101/quantization/ptq/README.md rename to examples/keras/image_recognition/resnetv2_101/quantization/ptq/README.md diff --git a/examples/tensorflow/image_recognition/keras_models/xception/quantization/ptq/main.py b/examples/keras/image_recognition/resnetv2_101/quantization/ptq/main.py similarity index 73% rename from examples/tensorflow/image_recognition/keras_models/xception/quantization/ptq/main.py rename to examples/keras/image_recognition/resnetv2_101/quantization/ptq/main.py index 2f9d50abcbe..11f1d660433 100644 --- a/examples/tensorflow/image_recognition/keras_models/xception/quantization/ptq/main.py +++ b/examples/keras/image_recognition/resnetv2_101/quantization/ptq/main.py @@ -51,23 +51,23 @@ flags.DEFINE_integer( 'iters', 100, 'maximum iteration when evaluating performance') -from neural_compressor.metric import TensorflowTopK -from neural_compressor.data import ComposeTransform -from neural_compressor.data import TensorflowImageRecord -from neural_compressor.data import LabelShift -from neural_compressor.data import DefaultDataLoader -from neural_compressor.data import BilinearImagenetTransform - +from neural_compressor.metric.metric import TensorflowTopK +from neural_compressor.data.transforms.transform import ComposeTransform +from neural_compressor.data.datasets.dataset import TensorflowImageRecord +from neural_compressor.data.transforms.imagenet_transform import LabelShift +from neural_compressor.data.dataloaders.tensorflow_dataloader import TensorflowDataLoader +from neural_compressor.data.transforms.imagenet_transform import BilinearImagenetTransform + +height = width = 224 eval_dataset = TensorflowImageRecord(root=FLAGS.eval_data, transform=ComposeTransform(transform_list= \ - [BilinearImagenetTransform(height=299, width=299)])) - -eval_dataloader = DefaultDataLoader(dataset=eval_dataset, batch_size=FLAGS.batch_size) + [BilinearImagenetTransform(height=height, width=width)])) +eval_dataloader = TensorflowDataLoader(dataset=eval_dataset, batch_size=FLAGS.batch_size) if FLAGS.calib_data: calib_dataset = TensorflowImageRecord(root=FLAGS.calib_data, transform= \ - ComposeTransform(transform_list= [BilinearImagenetTransform(height=299, width=299)])) - calib_dataloader = DefaultDataLoader(dataset=calib_dataset, batch_size=10) + ComposeTransform(transform_list= [BilinearImagenetTransform(height=height, width=width)])) + calib_dataloader = TensorflowDataLoader(dataset=calib_dataset, batch_size=10) def evaluate(model): """ @@ -80,9 +80,6 @@ def evaluate(model): Returns: accuracy (float): evaluation result, the larger is better. """ - infer = model.signatures["serving_default"] - output_dict_keys = infer.structured_outputs.keys() - output_name = list(output_dict_keys )[0] postprocess = LabelShift(label_shift=1) metric = TensorflowTopK(k=1) latency_list = [] @@ -90,17 +87,13 @@ def evaluate(model): def eval_func(dataloader, metric): warmup = 5 iteration = None - if FLAGS.benchmark and FLAGS.mode == 'performance': iteration = FLAGS.iters for idx, (inputs, labels) in enumerate(dataloader): - inputs = np.array(inputs) - input_tensor = tf.constant(inputs) start = time.time() - predictions = infer(input_tensor)[output_name] + predictions = model.predict_on_batch(inputs) end = time.time() latency_list.append(end - start) - predictions = predictions.numpy() predictions, labels = postprocess((predictions, labels)) metric.update(predictions, labels) if iteration and idx >= iteration: @@ -114,9 +107,9 @@ def eval_func(dataloader, metric): for i, res in enumerate(latency_list): logger.debug("Iteration {} result {}:".format(i, res)) if FLAGS.benchmark and FLAGS.mode == 'performance': - print("Batch size = {}".format(eval_dataloader.batch_size)) - print("Latency: {:.3f} ms".format(latency * 1000)) - print("Throughput: {:.3f} images/sec".format(1. / latency)) + logger.info("Batch size = {}".format(eval_dataloader.batch_size)) + logger.info("Latency: {:.3f} ms".format(latency * 1000)) + logger.info("Throughput: {:.3f} images/sec".format(1. / latency)) acc = metric.result() return acc @@ -124,9 +117,10 @@ def main(_): if FLAGS.tune: from neural_compressor.quantization import fit from neural_compressor.config import PostTrainingQuantConfig - from neural_compressor.utils import set_random_seed + from neural_compressor.utils.utility import set_random_seed set_random_seed(9527) - config = PostTrainingQuantConfig(calibration_sampling_size=[50, 100]) + config = PostTrainingQuantConfig(backend='itex', + calibration_sampling_size=[50, 100]) q_model = fit( model=FLAGS.input_model, conf=config, @@ -139,13 +133,13 @@ def main(_): from neural_compressor.benchmark import fit from neural_compressor.config import BenchmarkConfig if FLAGS.mode == 'performance': - conf = BenchmarkConfig(iteration=100, cores_per_instance=4, num_of_instance=7) + conf = BenchmarkConfig(backend='itex', cores_per_instance=4, num_of_instance=7) fit(FLAGS.input_model, conf, b_func=evaluate) else: - from neural_compressor.model import Model + from neural_compressor.model.model import Model accuracy = evaluate(Model(FLAGS.input_model).model) - print('Batch size = %d' % FLAGS.batch_size) - print("Accuracy: %.5f" % accuracy) + logger.info('Batch size = %d' % FLAGS.batch_size) + logger.info("Accuracy: %.5f" % accuracy) if __name__ == "__main__": tf.compat.v1.app.run() diff --git a/examples/tensorflow/image_recognition/keras_models/resnetv2_101/quantization/ptq/prepare_model.py b/examples/keras/image_recognition/resnetv2_101/quantization/ptq/prepare_model.py similarity index 100% rename from examples/tensorflow/image_recognition/keras_models/resnetv2_101/quantization/ptq/prepare_model.py rename to examples/keras/image_recognition/resnetv2_101/quantization/ptq/prepare_model.py diff --git a/examples/tensorflow/image_recognition/keras_models/resnetv2_101/quantization/ptq/run_benchmark.sh b/examples/keras/image_recognition/resnetv2_101/quantization/ptq/run_benchmark.sh similarity index 100% rename from examples/tensorflow/image_recognition/keras_models/resnetv2_101/quantization/ptq/run_benchmark.sh rename to examples/keras/image_recognition/resnetv2_101/quantization/ptq/run_benchmark.sh diff --git a/examples/tensorflow/image_recognition/keras_models/resnetv2_101/quantization/ptq/run_tuning.sh b/examples/keras/image_recognition/resnetv2_101/quantization/ptq/run_tuning.sh similarity index 100% rename from examples/tensorflow/image_recognition/keras_models/resnetv2_101/quantization/ptq/run_tuning.sh rename to examples/keras/image_recognition/resnetv2_101/quantization/ptq/run_tuning.sh diff --git a/examples/tensorflow/image_recognition/keras_models/resnetv2_50/quantization/ptq/README.md b/examples/keras/image_recognition/resnetv2_50/quantization/ptq/README.md similarity index 51% rename from examples/tensorflow/image_recognition/keras_models/resnetv2_50/quantization/ptq/README.md rename to examples/keras/image_recognition/resnetv2_50/quantization/ptq/README.md index 0af9fa2c7fd..8244c08abd9 100644 --- a/examples/tensorflow/image_recognition/keras_models/resnetv2_50/quantization/ptq/README.md +++ b/examples/keras/image_recognition/resnetv2_50/quantization/ptq/README.md @@ -5,22 +5,20 @@ This document is used to enable Tensorflow Keras models using Intel® Neural Com This example can run on Intel CPUs and GPUs. -# Prerequisite +## Prerequisite -## 1. Environment - -### Installation +### 1. Installation ```shell # Install Intel® Neural Compressor pip install neural-compressor ``` -### Install Intel Tensorflow +### 2. Install Intel Tensorflow ```shell pip install intel-tensorflow ``` > Note: Validated TensorFlow [Version](/docs/source/installation_guide.md#validated-software-environment). -### Install Intel Extension for Tensorflow +### 3. Install Intel Extension for Tensorflow #### Quantizing the model on Intel GPU Intel Extension for Tensorflow is mandatory to be installed for quantizing the model on Intel GPUs. @@ -37,30 +35,14 @@ Intel Extension for Tensorflow for Intel CPUs is experimental currently. It's no pip install --upgrade intel-extension-for-tensorflow[cpu] ``` -## 2. Prepare Pretrained model +### 4. Prepare Pretrained model The pretrained model is provided by [Keras Applications](https://keras.io/api/applications/). prepare the model, Run as follow: ``` -python prepare_model.py --output_model=/path/to/model +python prepare_model.py --output_model=/path/to/model ``` `--output_model ` the model should be saved as SavedModel format or H5 format. -## 3. Prepare Dataset - - TensorFlow [models](https://github.com/tensorflow/models) repo provides [scripts and instructions](https://github.com/tensorflow/models/tree/master/research/slim#an-automated-script-for-processing-imagenet-data) to download, process and convert the ImageNet dataset to the TF records format. - We also prepared related scripts in `imagenet_prepare` directory. To download the raw images, the user must create an account with image-net.org. If you have downloaded the raw data and preprocessed the validation data by moving the images into the appropriate sub-directory based on the label (synset) of the image. we can use below command ro convert it to tf records format. - - ```shell - cd examples/tensorflow/image_recognition/keras_models/ - # convert validation subset - bash prepare_dataset.sh --output_dir=/resnetv2_50/quantization/ptq/data --raw_dir=/PATH/TO/img_raw/val/ --subset=validation - # convert train subset - bash prepare_dataset.sh --output_dir=/resnetv2_50/quantization/ptq/data --raw_dir=/PATH/TO/img_raw/train/ --subset=train - cd resnetv2_50/quantization/ptq - ``` - -# Run Command - ## Quantization Config The Quantization Config class has default parameters setting for running on Intel CPUs. If running this example on Intel GPUs, the 'backend' parameter should be set to 'itex' and the 'device' parameter should be set to 'gpu'. @@ -72,14 +54,9 @@ config = PostTrainingQuantConfig( ) ``` -## Quantization - ```shell - bash run_tuning.sh --input_model=./resnetv2_50_keras/ --output_model=./result --dataset_location=/path/to/evaluation/dataset - ``` - -## Benchmark +## Run Command ```shell - bash run_benchmark.sh --input_model=./result --mode=accuracy --dataset_location=/path/to/evaluation/dataset --batch_size=32 - bash run_benchmark.sh --input_model=./result --mode=performance --dataset_location=/path/to/evaluation/dataset --batch_size=1 + bash run_tuning.sh --input_model=./path/to/model --output_model=./result --dataset_location=/path/to/evaluation/dataset --batch_size=32 + bash run_benchmark.sh --input_model=./path/to/model --mode=performance --dataset_location=/path/to/evaluation/dataset --batch_size=1 ``` diff --git a/examples/tensorflow/image_recognition/keras_models/resnetv2_101/quantization/ptq/main.py b/examples/keras/image_recognition/resnetv2_50/quantization/ptq/main.py similarity index 56% rename from examples/tensorflow/image_recognition/keras_models/resnetv2_101/quantization/ptq/main.py rename to examples/keras/image_recognition/resnetv2_50/quantization/ptq/main.py index 79893199bfd..11f1d660433 100644 --- a/examples/tensorflow/image_recognition/keras_models/resnetv2_101/quantization/ptq/main.py +++ b/examples/keras/image_recognition/resnetv2_50/quantization/ptq/main.py @@ -1,7 +1,7 @@ # # -*- coding: utf-8 -*- # -# Copyright (c) 2022 Intel Corporation +# Copyright (c) 2018 Intel Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -17,8 +17,8 @@ # import time import numpy as np -from neural_compressor import data import tensorflow as tf +from neural_compressor.utils import logger tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) flags = tf.compat.v1.flags @@ -46,98 +46,100 @@ flags.DEFINE_string( 'eval_data', None, 'location of evaluate dataset') -flags.DEFINE_integer( - 'batch_size', 32, 'batch_size of evaluation') +flags.DEFINE_integer('batch_size', 32, 'batch_size') flags.DEFINE_integer( 'iters', 100, 'maximum iteration when evaluating performance') -from neural_compressor.metric import TensorflowTopK -from neural_compressor.data import TensorflowImageRecord -from neural_compressor.data import DefaultDataLoader -from neural_compressor.data import ComposeTransform -from neural_compressor.data import LabelShift -from neural_compressor.data import BilinearImagenetTransform +from neural_compressor.metric.metric import TensorflowTopK +from neural_compressor.data.transforms.transform import ComposeTransform +from neural_compressor.data.datasets.dataset import TensorflowImageRecord +from neural_compressor.data.transforms.imagenet_transform import LabelShift +from neural_compressor.data.dataloaders.tensorflow_dataloader import TensorflowDataLoader +from neural_compressor.data.transforms.imagenet_transform import BilinearImagenetTransform +height = width = 224 eval_dataset = TensorflowImageRecord(root=FLAGS.eval_data, transform=ComposeTransform(transform_list= \ - [BilinearImagenetTransform(height=224, width=224)])) -if FLAGS.benchmark and FLAGS.mode == 'performance': - eval_dataloader = DefaultDataLoader(dataset=eval_dataset, batch_size=1) -else: - eval_dataloader = DefaultDataLoader(dataset=eval_dataset, batch_size=FLAGS.batch_size) + [BilinearImagenetTransform(height=height, width=width)])) + +eval_dataloader = TensorflowDataLoader(dataset=eval_dataset, batch_size=FLAGS.batch_size) + if FLAGS.calib_data: - calib_dataset = TensorflowImageRecord(root=FLAGS.calib_data, transform=ComposeTransform(transform_list= \ - [BilinearImagenetTransform(height=224, width=224)])) - calib_dataloader = DefaultDataLoader(dataset=calib_dataset, batch_size=10) + calib_dataset = TensorflowImageRecord(root=FLAGS.calib_data, transform= \ + ComposeTransform(transform_list= [BilinearImagenetTransform(height=height, width=width)])) + calib_dataloader = TensorflowDataLoader(dataset=calib_dataset, batch_size=10) def evaluate(model): - """Custom evaluate function to inference the model for specified metric on validation dataset. + """ + Custom evaluate function to inference the model for specified metric on validation dataset. Args: model (tf.saved_model.load): The input model will be the class of tf.saved_model.load(quantized_model_path). - + measurer (object, optional): for benchmark measurement of duration. + Returns: accuracy (float): evaluation result, the larger is better. """ - infer = model.signatures["serving_default"] - output_dict_keys = infer.structured_outputs.keys() - output_name = list(output_dict_keys )[0] postprocess = LabelShift(label_shift=1) metric = TensorflowTopK(k=1) + latency_list = [] def eval_func(dataloader, metric): warmup = 5 iteration = None - latency_list = [] if FLAGS.benchmark and FLAGS.mode == 'performance': iteration = FLAGS.iters for idx, (inputs, labels) in enumerate(dataloader): - inputs = np.array(inputs) - input_tensor = tf.constant(inputs) start = time.time() - predictions = infer(input_tensor)[output_name] + predictions = model.predict_on_batch(inputs) end = time.time() - predictions = predictions.numpy() + latency_list.append(end - start) predictions, labels = postprocess((predictions, labels)) metric.update(predictions, labels) - latency_list.append(end - start) if iteration and idx >= iteration: break latency = np.array(latency_list[warmup:]).mean() / eval_dataloader.batch_size return latency latency = eval_func(eval_dataloader, metric) + if FLAGS.benchmark: + logger.info("\n{} mode benchmark result:".format(FLAGS.mode)) + for i, res in enumerate(latency_list): + logger.debug("Iteration {} result {}:".format(i, res)) if FLAGS.benchmark and FLAGS.mode == 'performance': - print("Batch size = {}".format(eval_dataloader.batch_size)) - print("Latency: {:.3f} ms".format(latency * 1000)) - print("Throughput: {:.3f} images/sec".format(1. / latency)) + logger.info("Batch size = {}".format(eval_dataloader.batch_size)) + logger.info("Latency: {:.3f} ms".format(latency * 1000)) + logger.info("Throughput: {:.3f} images/sec".format(1. / latency)) acc = metric.result() return acc def main(_): - from neural_compressor.utils import set_random_seed - set_random_seed(9527) if FLAGS.tune: - from neural_compressor import quantization + from neural_compressor.quantization import fit from neural_compressor.config import PostTrainingQuantConfig - - conf = PostTrainingQuantConfig(calibration_sampling_size=[50, 100]) - q_model = quantization.fit(FLAGS.input_model, conf=conf, calib_dataloader=calib_dataloader, - eval_func=evaluate) + from neural_compressor.utils.utility import set_random_seed + set_random_seed(9527) + config = PostTrainingQuantConfig(backend='itex', + calibration_sampling_size=[50, 100]) + q_model = fit( + model=FLAGS.input_model, + conf=config, + calib_dataloader=calib_dataloader, + eval_dataloader=eval_dataloader, + eval_func=evaluate) q_model.save(FLAGS.output_model) if FLAGS.benchmark: from neural_compressor.benchmark import fit from neural_compressor.config import BenchmarkConfig if FLAGS.mode == 'performance': - conf = BenchmarkConfig(cores_per_instance=4, num_of_instance=7) + conf = BenchmarkConfig(backend='itex', cores_per_instance=4, num_of_instance=7) fit(FLAGS.input_model, conf, b_func=evaluate) else: - from neural_compressor.model import Model - model = Model(FLAGS.input_model).model - accuracy = evaluate(model) - print('Batch size = %d' % FLAGS.batch_size) - print("Accuracy: %.5f" % accuracy) + from neural_compressor.model.model import Model + accuracy = evaluate(Model(FLAGS.input_model).model) + logger.info('Batch size = %d' % FLAGS.batch_size) + logger.info("Accuracy: %.5f" % accuracy) if __name__ == "__main__": tf.compat.v1.app.run() diff --git a/examples/tensorflow/image_recognition/keras_models/resnetv2_50/quantization/ptq/prepare_model.py b/examples/keras/image_recognition/resnetv2_50/quantization/ptq/prepare_model.py similarity index 100% rename from examples/tensorflow/image_recognition/keras_models/resnetv2_50/quantization/ptq/prepare_model.py rename to examples/keras/image_recognition/resnetv2_50/quantization/ptq/prepare_model.py diff --git a/examples/tensorflow/image_recognition/keras_models/resnetv2_50/quantization/ptq/run_benchmark.sh b/examples/keras/image_recognition/resnetv2_50/quantization/ptq/run_benchmark.sh similarity index 100% rename from examples/tensorflow/image_recognition/keras_models/resnetv2_50/quantization/ptq/run_benchmark.sh rename to examples/keras/image_recognition/resnetv2_50/quantization/ptq/run_benchmark.sh diff --git a/examples/tensorflow/image_recognition/keras_models/resnetv2_50/quantization/ptq/run_tuning.sh b/examples/keras/image_recognition/resnetv2_50/quantization/ptq/run_tuning.sh similarity index 100% rename from examples/tensorflow/image_recognition/keras_models/resnetv2_50/quantization/ptq/run_tuning.sh rename to examples/keras/image_recognition/resnetv2_50/quantization/ptq/run_tuning.sh diff --git a/examples/keras/image_recognition/xception/quantization/ptq/README.md b/examples/keras/image_recognition/xception/quantization/ptq/README.md new file mode 100644 index 00000000000..8244c08abd9 --- /dev/null +++ b/examples/keras/image_recognition/xception/quantization/ptq/README.md @@ -0,0 +1,62 @@ +Step-by-Step +============ + +This document is used to enable Tensorflow Keras models using Intel® Neural Compressor. +This example can run on Intel CPUs and GPUs. + + +## Prerequisite + +### 1. Installation +```shell +# Install Intel® Neural Compressor +pip install neural-compressor +``` +### 2. Install Intel Tensorflow +```shell +pip install intel-tensorflow +``` +> Note: Validated TensorFlow [Version](/docs/source/installation_guide.md#validated-software-environment). + +### 3. Install Intel Extension for Tensorflow +#### Quantizing the model on Intel GPU +Intel Extension for Tensorflow is mandatory to be installed for quantizing the model on Intel GPUs. + +```shell +pip install --upgrade intel-extension-for-tensorflow[gpu] +``` +Please refer to the [Installation Guides](https://dgpu-docs.intel.com/installation-guides/ubuntu/ubuntu-focal-dc.html) for latest Intel GPU driver installation. +For any more details, please follow the procedure in [install-gpu-drivers](https://github.com/intel-innersource/frameworks.ai.infrastructure.intel-extension-for-tensorflow.intel-extension-for-tensorflow/blob/master/docs/install/install_for_gpu.md#install-gpu-drivers). + +#### Quantizing the model on Intel CPU(Experimental) +Intel Extension for Tensorflow for Intel CPUs is experimental currently. It's not mandatory for quantizing the model on Intel CPUs. + +```shell +pip install --upgrade intel-extension-for-tensorflow[cpu] +``` + +### 4. Prepare Pretrained model + +The pretrained model is provided by [Keras Applications](https://keras.io/api/applications/). prepare the model, Run as follow: + ``` +python prepare_model.py --output_model=/path/to/model + ``` +`--output_model ` the model should be saved as SavedModel format or H5 format. + +## Quantization Config +The Quantization Config class has default parameters setting for running on Intel CPUs. If running this example on Intel GPUs, the 'backend' parameter should be set to 'itex' and the 'device' parameter should be set to 'gpu'. + +``` +config = PostTrainingQuantConfig( + device="gpu", + backend="itex", + ... + ) +``` + +## Run Command + ```shell + bash run_tuning.sh --input_model=./path/to/model --output_model=./result --dataset_location=/path/to/evaluation/dataset --batch_size=32 + bash run_benchmark.sh --input_model=./path/to/model --mode=performance --dataset_location=/path/to/evaluation/dataset --batch_size=1 + ``` + diff --git a/examples/tensorflow/image_recognition/keras_models/mobilenet_v2/quantization/ptq/main.py b/examples/keras/image_recognition/xception/quantization/ptq/main.py similarity index 50% rename from examples/tensorflow/image_recognition/keras_models/mobilenet_v2/quantization/ptq/main.py rename to examples/keras/image_recognition/xception/quantization/ptq/main.py index 96b3d7a201b..5e7b7ffa4d9 100644 --- a/examples/tensorflow/image_recognition/keras_models/mobilenet_v2/quantization/ptq/main.py +++ b/examples/keras/image_recognition/xception/quantization/ptq/main.py @@ -1,155 +1,145 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2022 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -import time -import numpy as np -from neural_compressor import data -import tensorflow as tf -tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) - -flags = tf.compat.v1.flags -FLAGS = flags.FLAGS - -## Required parameters -flags.DEFINE_string( - 'input_model', None, 'Run inference with specified keras model.') - -flags.DEFINE_string( - 'output_model', None, 'The output quantized model.') - -flags.DEFINE_string( - 'mode', 'performance', 'define benchmark mode for accuracy or performance') - -flags.DEFINE_bool( - 'tune', False, 'whether to tune the model') - -flags.DEFINE_bool( - 'benchmark', False, 'whether to benchmark the model') - -flags.DEFINE_string( - 'calib_data', None, 'location of calibration dataset') - -flags.DEFINE_string( - 'eval_data', None, 'location of evaluate dataset') - -flags.DEFINE_integer( - 'batch_size', 32, 'batch_size of evaluation') - -flags.DEFINE_integer( - 'iters', 100, 'maximum iteration when evaluating performance') - -from neural_compressor.metric import TensorflowTopK -from neural_compressor.data import TensorflowImageRecord -from neural_compressor.data import DefaultDataLoader -from neural_compressor.data import ComposeTransform -from neural_compressor.data import LabelShift -from neural_compressor.data import BilinearImagenetTransform - -eval_dataset = TensorflowImageRecord(root=FLAGS.eval_data, transform=ComposeTransform(transform_list= \ - [BilinearImagenetTransform(height=224, width=224)])) -if FLAGS.benchmark and FLAGS.mode == 'performance': - eval_dataloader = DefaultDataLoader(dataset=eval_dataset, batch_size=1) -else: - eval_dataloader = DefaultDataLoader(dataset=eval_dataset, batch_size=FLAGS.batch_size) -if FLAGS.calib_data: - calib_dataset = TensorflowImageRecord(root=FLAGS.calib_data, transform=ComposeTransform(transform_list= \ - [BilinearImagenetTransform(height=224, width=224)])) - calib_dataloader = DefaultDataLoader(dataset=calib_dataset, batch_size=10) - -def evaluate(model): - """Custom evaluate function to inference the model for specified metric on validation dataset. - - Args: - model (tf.saved_model.load): The input model will be the class of tf.saved_model.load(quantized_model_path). - - Returns: - accuracy (float): evaluation result, the larger is better. - """ - infer = model.signatures["serving_default"] - output_dict_keys = infer.structured_outputs.keys() - output_name = list(output_dict_keys )[0] - postprocess = LabelShift(label_shift=1) - metric = TensorflowTopK(k=1) - - def eval_func(dataloader, metric): - warmup = 5 - iteration = None - latency_list = [] - if FLAGS.benchmark and FLAGS.mode == 'performance': - iteration = FLAGS.iters - for idx, (inputs, labels) in enumerate(dataloader): - inputs = np.array(inputs) - input_tensor = tf.constant(inputs) - start = time.time() - predictions = infer(input_tensor)[output_name] - end = time.time() - predictions = predictions.numpy() - predictions, labels = postprocess((predictions, labels)) - metric.update(predictions, labels) - latency_list.append(end - start) - if iteration and idx >= iteration: - break - latency = np.array(latency_list[warmup:]).mean() / eval_dataloader.batch_size - return latency - - latency = eval_func(eval_dataloader, metric) - if FLAGS.benchmark and FLAGS.mode == 'performance': - print("Batch size = {}".format(eval_dataloader.batch_size)) - print("Latency: {:.3f} ms".format(latency * 1000)) - print("Throughput: {:.3f} images/sec".format(1. / latency)) - acc = metric.result() - return acc - -def main(_): - from neural_compressor.utils import set_random_seed - set_random_seed(9527) - if FLAGS.tune: - from neural_compressor import quantization - from neural_compressor.config import PostTrainingQuantConfig - op_name_list={ - 'StatefulPartitionedCall/mobilenetv2_1.00_224/expanded_conv_depthwise/depthwise': - { - 'activation': {'dtype': ['fp32']}, - 'weight': {'dtype': ['fp32']}, - }, - 'StatefulPartitionedCall/mobilenetv2_1.00_224/expanded_conv_project_BN/FusedBatchNormV3/Mul': - { - 'activation': {'dtype': ['fp32']}, - 'weight': {'dtype': ['fp32']}, - } - } - conf = PostTrainingQuantConfig(calibration_sampling_size=[20, 50], - op_name_list=op_name_list) - q_model = quantization.fit(FLAGS.input_model, conf=conf, calib_dataloader=calib_dataloader, - eval_func=evaluate) - q_model.save(FLAGS.output_model) - - if FLAGS.benchmark: - from neural_compressor.benchmark import fit - from neural_compressor.config import BenchmarkConfig - if FLAGS.mode == 'performance': - conf = BenchmarkConfig(cores_per_instance=4, num_of_instance=7) - fit(FLAGS.input_model, conf, b_func=evaluate) - else: - from neural_compressor.model import Model - model = Model(FLAGS.input_model).model - accuracy = evaluate(model) - print('Batch size = %d' % FLAGS.batch_size) - print("Accuracy: %.5f" % accuracy) - -if __name__ == "__main__": - tf.compat.v1.app.run() +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2018 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import time +import numpy as np +import tensorflow as tf +from neural_compressor.utils import logger +tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) + +flags = tf.compat.v1.flags +FLAGS = flags.FLAGS + +## Required parameters +flags.DEFINE_string( + 'input_model', None, 'Run inference with specified keras model.') + +flags.DEFINE_string( + 'output_model', None, 'The output quantized model.') + +flags.DEFINE_string( + 'mode', 'performance', 'define benchmark mode for accuracy or performance') + +flags.DEFINE_bool( + 'tune', False, 'whether to tune the model') + +flags.DEFINE_bool( + 'benchmark', False, 'whether to benchmark the model') + +flags.DEFINE_string( + 'calib_data', None, 'location of calibration dataset') + +flags.DEFINE_string( + 'eval_data', None, 'location of evaluate dataset') + +flags.DEFINE_integer('batch_size', 32, 'batch_size') + +flags.DEFINE_integer( + 'iters', 100, 'maximum iteration when evaluating performance') + +from neural_compressor.metric.metric import TensorflowTopK +from neural_compressor.data.transforms.transform import ComposeTransform +from neural_compressor.data.datasets.dataset import TensorflowImageRecord +from neural_compressor.data.transforms.imagenet_transform import LabelShift +from neural_compressor.data.dataloaders.tensorflow_dataloader import TensorflowDataLoader +from neural_compressor.data.transforms.imagenet_transform import BilinearImagenetTransform + +height = width = 299 +eval_dataset = TensorflowImageRecord(root=FLAGS.eval_data, transform=ComposeTransform(transform_list= \ + [BilinearImagenetTransform(height=height, width=width)])) + +eval_dataloader = TensorflowDataLoader(dataset=eval_dataset, batch_size=FLAGS.batch_size) + +if FLAGS.calib_data: + calib_dataset = TensorflowImageRecord(root=FLAGS.calib_data, transform= \ + ComposeTransform(transform_list= [BilinearImagenetTransform(height=height, width=width)])) + calib_dataloader = TensorflowDataLoader(dataset=calib_dataset, batch_size=10) + +def evaluate(model): + """ + Custom evaluate function to inference the model for specified metric on validation dataset. + + Args: + model (tf.saved_model.load): The input model will be the class of tf.saved_model.load(quantized_model_path). + measurer (object, optional): for benchmark measurement of duration. + + Returns: + accuracy (float): evaluation result, the larger is better. + """ + postprocess = LabelShift(label_shift=1) + metric = TensorflowTopK(k=1) + latency_list = [] + + def eval_func(dataloader, metric): + warmup = 5 + iteration = None + if FLAGS.benchmark and FLAGS.mode == 'performance': + iteration = FLAGS.iters + for idx, (inputs, labels) in enumerate(dataloader): + start = time.time() + predictions = model.predict_on_batch(inputs) + end = time.time() + latency_list.append(end - start) + predictions, labels = postprocess((predictions, labels)) + metric.update(predictions, labels) + if iteration and idx >= iteration: + break + latency = np.array(latency_list[warmup:]).mean() / eval_dataloader.batch_size + return latency + + latency = eval_func(eval_dataloader, metric) + if FLAGS.benchmark: + logger.info("\n{} mode benchmark result:".format(FLAGS.mode)) + for i, res in enumerate(latency_list): + logger.debug("Iteration {} result {}:".format(i, res)) + if FLAGS.benchmark and FLAGS.mode == 'performance': + logger.info("Batch size = {}".format(eval_dataloader.batch_size)) + logger.info("Latency: {:.3f} ms".format(latency * 1000)) + logger.info("Throughput: {:.3f} images/sec".format(1. / latency)) + acc = metric.result() + return acc + +def main(_): + if FLAGS.tune: + from neural_compressor.quantization import fit + from neural_compressor.config import PostTrainingQuantConfig + from neural_compressor.utils.utility import set_random_seed + set_random_seed(9527) + config = PostTrainingQuantConfig(backend='itex', + calibration_sampling_size=[50, 100]) + q_model = fit( + model=FLAGS.input_model, + conf=config, + calib_dataloader=calib_dataloader, + eval_dataloader=eval_dataloader, + eval_func=evaluate) + q_model.save(FLAGS.output_model) + + if FLAGS.benchmark: + from neural_compressor.benchmark import fit + from neural_compressor.config import BenchmarkConfig + if FLAGS.mode == 'performance': + conf = BenchmarkConfig(backend='itex', cores_per_instance=4, num_of_instance=7) + fit(FLAGS.input_model, conf, b_func=evaluate) + else: + from neural_compressor.model.model import Model + accuracy = evaluate(Model(FLAGS.input_model).model) + logger.info('Batch size = %d' % FLAGS.batch_size) + logger.info("Accuracy: %.5f" % accuracy) + +if __name__ == "__main__": + tf.compat.v1.app.run() diff --git a/examples/tensorflow/image_recognition/keras_models/xception/quantization/ptq/prepare_model.py b/examples/keras/image_recognition/xception/quantization/ptq/prepare_model.py similarity index 100% rename from examples/tensorflow/image_recognition/keras_models/xception/quantization/ptq/prepare_model.py rename to examples/keras/image_recognition/xception/quantization/ptq/prepare_model.py diff --git a/examples/tensorflow/image_recognition/keras_models/xception/quantization/ptq/run_benchmark.sh b/examples/keras/image_recognition/xception/quantization/ptq/run_benchmark.sh similarity index 100% rename from examples/tensorflow/image_recognition/keras_models/xception/quantization/ptq/run_benchmark.sh rename to examples/keras/image_recognition/xception/quantization/ptq/run_benchmark.sh diff --git a/examples/tensorflow/image_recognition/keras_models/xception/quantization/ptq/run_tuning.sh b/examples/keras/image_recognition/xception/quantization/ptq/run_tuning.sh similarity index 100% rename from examples/tensorflow/image_recognition/keras_models/xception/quantization/ptq/run_tuning.sh rename to examples/keras/image_recognition/xception/quantization/ptq/run_tuning.sh diff --git a/examples/tensorflow/image_recognition/keras_models/inception_resnet_v2/quantization/ptq/README.md b/examples/tensorflow/image_recognition/keras_models/inception_resnet_v2/quantization/ptq/README.md index 9ded8b119a7..3a91b58ec0d 100644 --- a/examples/tensorflow/image_recognition/keras_models/inception_resnet_v2/quantization/ptq/README.md +++ b/examples/tensorflow/image_recognition/keras_models/inception_resnet_v2/quantization/ptq/README.md @@ -1,85 +1,85 @@ -Step-by-Step -============ - -This document is used to enable Tensorflow Keras model inception_resnet_v2 quantization and benchmark using Intel® Neural Compressor. -This example can run on Intel CPUs and GPUs. - - -# Prerequisite - -## 1. Environment - -### Installation -```shell -# Install Intel® Neural Compressor -pip install neural-compressor -``` -### Install Intel Tensorflow -```shell -pip install intel-tensorflow -``` -> Note: Validated TensorFlow [Version](/docs/source/installation_guide.md#validated-software-environment). - -### Install Intel Extension for Tensorflow - -#### Quantizing the model on Intel GPU -Intel Extension for Tensorflow is mandatory to be installed for quantizing the model on Intel GPUs. - -```shell -pip install --upgrade intel-extension-for-tensorflow[gpu] -``` -Please refer to the [Installation Guides](https://dgpu-docs.intel.com/installation-guides/ubuntu/ubuntu-focal-dc.html) for latest Intel GPU driver installation. -For any more details, please follow the procedure in [install-gpu-drivers](https://github.com/intel-innersource/frameworks.ai.infrastructure.intel-extension-for-tensorflow.intel-extension-for-tensorflow/blob/master/docs/install/install_for_gpu.md#install-gpu-drivers). - -#### Quantizing the model on Intel CPU(Experimental) -Intel Extension for Tensorflow for Intel CPUs is experimental currently. It's not mandatory for quantizing the model on Intel CPUs. - -```shell -pip install --upgrade intel-extension-for-tensorflow[cpu] -``` - -## 2. Prepare Pretrained model - -The pretrained model is provided by [Keras Applications](https://keras.io/api/applications/). prepare the model, Run as follow: - ``` -python prepare_model.py --output_model=/path/to/model - ``` -`--output_model ` the model should be saved as SavedModel format or H5 format. - -## 3. Prepare Dataset - - TensorFlow [models](https://github.com/tensorflow/models) repo provides [scripts and instructions](https://github.com/tensorflow/models/tree/master/research/slim#an-automated-script-for-processing-imagenet-data) to download, process and convert the ImageNet dataset to the TF records format. - We also prepared related scripts in `imagenet_prepare` directory. To download the raw images, the user must create an account with image-net.org. If you have downloaded the raw data and preprocessed the validation data by moving the images into the appropriate sub-directory based on the label (synset) of the image. we can use below command ro convert it to tf records format. - - ```shell - cd examples/tensorflow/image_recognition/keras_models/ - # convert validation subset - bash prepare_dataset.sh --output_dir=/inception_resnet_v2/quantization/ptq/data --raw_dir=/PATH/TO/img_raw/val/ --subset=validation - # convert train subset - bash prepare_dataset.sh --output_dir=/inception_resnet_v2/quantization/ptq/data --raw_dir=/PATH/TO/img_raw/train/ --subset=train - cd inception_resnet_v2/quantization/ptq - ``` - -# Run Command - -## Quantization Config -The Quantization Config class has default parameters setting for running on Intel CPUs. If running this example on Intel GPUs, the 'backend' parameter should be set to 'itex' and the 'device' parameter should be set to 'gpu'. - -``` -config = PostTrainingQuantConfig( - device="gpu", - backend="itex", - ... - ) -``` - -## Quantization - ```shell - bash run_tuning.sh --input_model=./inception_resnet_v2_keras/ --output_model=./result --dataset_location=/path/to/evaluation/dataset - ``` - -## Benchmark - ```shell - bash run_benchmark.sh --input_model=./result --dataset_location=/path/to/evaluation/dataset --mode=performance --batch_size=1 - bash run_benchmark.sh --input_model=./result --dataset_location=/path/to/evaluation/dataset --mode=accuracy --batch_size=32 - ``` +Step-by-Step +============ + +This document is used to enable Tensorflow Keras model inception_resnet_v2 quantization and benchmark using Intel® Neural Compressor. +This example can run on Intel CPUs and GPUs. + + +# Prerequisite + +## 1. Environment + +### Installation +```shell +# Install Intel® Neural Compressor +pip install neural-compressor +``` +### Install Intel Tensorflow +```shell +pip install intel-tensorflow +``` +> Note: Validated TensorFlow [Version](/docs/source/installation_guide.md#validated-software-environment). + +### Install Intel Extension for Tensorflow + +#### Quantizing the model on Intel GPU +Intel Extension for Tensorflow is mandatory to be installed for quantizing the model on Intel GPUs. + +```shell +pip install --upgrade intel-extension-for-tensorflow[gpu] +``` +Please refer to the [Installation Guides](https://dgpu-docs.intel.com/installation-guides/ubuntu/ubuntu-focal-dc.html) for latest Intel GPU driver installation. +For any more details, please follow the procedure in [install-gpu-drivers](https://github.com/intel-innersource/frameworks.ai.infrastructure.intel-extension-for-tensorflow.intel-extension-for-tensorflow/blob/master/docs/install/install_for_gpu.md#install-gpu-drivers). + +#### Quantizing the model on Intel CPU(Experimental) +Intel Extension for Tensorflow for Intel CPUs is experimental currently. It's not mandatory for quantizing the model on Intel CPUs. + +```shell +pip install --upgrade intel-extension-for-tensorflow[cpu] +``` + +## 2. Prepare Pretrained model + +The pretrained model is provided by [Keras Applications](https://keras.io/api/applications/). prepare the model, Run as follow: + ``` +python prepare_model.py --output_model=/path/to/model + ``` +`--output_model ` the model should be saved as SavedModel format or H5 format. + +## 3. Prepare Dataset + + TensorFlow [models](https://github.com/tensorflow/models) repo provides [scripts and instructions](https://github.com/tensorflow/models/tree/master/research/slim#an-automated-script-for-processing-imagenet-data) to download, process and convert the ImageNet dataset to the TF records format. + We also prepared related scripts in `imagenet_prepare` directory. To download the raw images, the user must create an account with image-net.org. If you have downloaded the raw data and preprocessed the validation data by moving the images into the appropriate sub-directory based on the label (synset) of the image. we can use below command ro convert it to tf records format. + + ```shell + cd examples/tensorflow/image_recognition/keras_models/ + # convert validation subset + bash prepare_dataset.sh --output_dir=/inception_resnet_v2/quantization/ptq/data --raw_dir=/PATH/TO/img_raw/val/ --subset=validation + # convert train subset + bash prepare_dataset.sh --output_dir=/inception_resnet_v2/quantization/ptq/data --raw_dir=/PATH/TO/img_raw/train/ --subset=train + cd inception_resnet_v2/quantization/ptq + ``` + +# Run Command + +## Quantization Config +The Quantization Config class has default parameters setting for running on Intel CPUs. If running this example on Intel GPUs, the 'backend' parameter should be set to 'itex' and the 'device' parameter should be set to 'gpu'. + +``` +config = PostTrainingQuantConfig( + device="gpu", + backend="itex", + ... + ) +``` + +## Quantization + ```shell + bash run_tuning.sh --input_model=./inception_resnet_v2_keras/ --output_model=./result --dataset_location=/path/to/evaluation/dataset + ``` + +## Benchmark + ```shell + bash run_benchmark.sh --input_model=./result --dataset_location=/path/to/evaluation/dataset --mode=performance --batch_size=1 + bash run_benchmark.sh --input_model=./result --dataset_location=/path/to/evaluation/dataset --mode=accuracy --batch_size=32 + ``` diff --git a/examples/tensorflow/image_recognition/keras_models/inception_resnet_v2/quantization/ptq/main.py b/examples/tensorflow/image_recognition/keras_models/inception_resnet_v2/quantization/ptq/main.py index 6160c05f576..bade6317218 100644 --- a/examples/tensorflow/image_recognition/keras_models/inception_resnet_v2/quantization/ptq/main.py +++ b/examples/tensorflow/image_recognition/keras_models/inception_resnet_v2/quantization/ptq/main.py @@ -1,150 +1,150 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2022 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -import time -import numpy as np -import tensorflow as tf -from neural_compressor.utils import logger -tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) - -flags = tf.compat.v1.flags -FLAGS = flags.FLAGS - -## Required parameters -flags.DEFINE_string( - 'input_model', None, 'Run inference with specified keras model.') - -flags.DEFINE_string( - 'output_model', None, 'The output quantized model.') - -flags.DEFINE_string( - 'mode', 'performance', 'define benchmark mode for accuracy or performance') - -flags.DEFINE_bool( - 'tune', False, 'whether to tune the model') - -flags.DEFINE_bool( - 'benchmark', False, 'whether to benchmark the model') - -flags.DEFINE_string( - 'calib_data', None, 'location of calibration dataset') - -flags.DEFINE_string( - 'eval_data', None, 'location of evaluate dataset') - -flags.DEFINE_integer('batch_size', 32, 'batch_size') - -flags.DEFINE_integer( - 'iters', 100, 'maximum iteration when evaluating performance') - -from neural_compressor.metric import TensorflowTopK -from neural_compressor.data import ComposeTransform -from neural_compressor.data import TensorflowImageRecord -from neural_compressor.data import LabelShift -from neural_compressor.data import DefaultDataLoader -from neural_compressor.data import BilinearImagenetTransform - -eval_dataset = TensorflowImageRecord(root=FLAGS.eval_data, transform=ComposeTransform(transform_list= \ - [BilinearImagenetTransform(height=299, width=299)])) - -eval_dataloader = DefaultDataLoader(dataset=eval_dataset, batch_size=FLAGS.batch_size) - -if FLAGS.calib_data: - calib_dataset = TensorflowImageRecord(root=FLAGS.calib_data, transform= \ - ComposeTransform(transform_list= [BilinearImagenetTransform(height=299, width=299)])) - calib_dataloader = DefaultDataLoader(dataset=calib_dataset, batch_size=10) - -def evaluate(model): - """ - Custom evaluate function to inference the model for specified metric on validation dataset. - - Args: - model (tf.saved_model.load): The input model will be the class of tf.saved_model.load(quantized_model_path). - measurer (object, optional): for benchmark measurement of duration. - - Returns: - accuracy (float): evaluation result, the larger is better. - """ - infer = model.signatures["serving_default"] - output_dict_keys = infer.structured_outputs.keys() - output_name = list(output_dict_keys )[0] - postprocess = LabelShift(label_shift=1) - metric = TensorflowTopK(k=1) - latency_list = [] - - def eval_func(dataloader, metric): - warmup = 5 - iteration = None - - if FLAGS.benchmark and FLAGS.mode == 'performance': - iteration = FLAGS.iters - for idx, (inputs, labels) in enumerate(dataloader): - inputs = np.array(inputs) - input_tensor = tf.constant(inputs) - start = time.time() - predictions = infer(input_tensor)[output_name] - end = time.time() - latency_list.append(end - start) - predictions = predictions.numpy() - predictions, labels = postprocess((predictions, labels)) - metric.update(predictions, labels) - if iteration and idx >= iteration: - break - latency = np.array(latency_list[warmup:]).mean() / eval_dataloader.batch_size - return latency - - latency = eval_func(eval_dataloader, metric) - if FLAGS.benchmark: - logger.info("\n{} mode benchmark result:".format(FLAGS.mode)) - for i, res in enumerate(latency_list): - logger.debug("Iteration {} result {}:".format(i, res)) - if FLAGS.benchmark and FLAGS.mode == 'performance': - logger.info("Batch size = {}".format(eval_dataloader.batch_size)) - logger.info("Latency: {:.3f} ms".format(latency * 1000)) - logger.info("Throughput: {:.3f} images/sec".format(1. / latency)) - acc = metric.result() - return acc - -def main(_): - if FLAGS.tune: - from neural_compressor.quantization import fit - from neural_compressor.config import PostTrainingQuantConfig - from neural_compressor.utils import set_random_seed - set_random_seed(9527) - config = PostTrainingQuantConfig(calibration_sampling_size=[50, 100]) - q_model = fit( - model=FLAGS.input_model, - conf=config, - calib_dataloader=calib_dataloader, - eval_dataloader=eval_dataloader, - eval_func=evaluate) - q_model.save(FLAGS.output_model) - - if FLAGS.benchmark: - from neural_compressor.benchmark import fit - from neural_compressor.config import BenchmarkConfig - if FLAGS.mode == 'performance': - conf = BenchmarkConfig(iteration=100, cores_per_instance=4, num_of_instance=7) - fit(FLAGS.input_model, conf, b_func=evaluate) - else: - from neural_compressor.model import Model - accuracy = evaluate(Model(FLAGS.input_model).model) - logger.info('Batch size = %d' % FLAGS.batch_size) - logger.info("Accuracy: %.5f" % accuracy) - -if __name__ == "__main__": - tf.compat.v1.app.run() +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import time +import numpy as np +import tensorflow as tf +from neural_compressor.utils import logger +tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) + +flags = tf.compat.v1.flags +FLAGS = flags.FLAGS + +## Required parameters +flags.DEFINE_string( + 'input_model', None, 'Run inference with specified keras model.') + +flags.DEFINE_string( + 'output_model', None, 'The output quantized model.') + +flags.DEFINE_string( + 'mode', 'performance', 'define benchmark mode for accuracy or performance') + +flags.DEFINE_bool( + 'tune', False, 'whether to tune the model') + +flags.DEFINE_bool( + 'benchmark', False, 'whether to benchmark the model') + +flags.DEFINE_string( + 'calib_data', None, 'location of calibration dataset') + +flags.DEFINE_string( + 'eval_data', None, 'location of evaluate dataset') + +flags.DEFINE_integer('batch_size', 32, 'batch_size') + +flags.DEFINE_integer( + 'iters', 100, 'maximum iteration when evaluating performance') + +from neural_compressor.metric import TensorflowTopK +from neural_compressor.data import ComposeTransform +from neural_compressor.data import TensorflowImageRecord +from neural_compressor.data import LabelShift +from neural_compressor.data import DefaultDataLoader +from neural_compressor.data import BilinearImagenetTransform + +eval_dataset = TensorflowImageRecord(root=FLAGS.eval_data, transform=ComposeTransform(transform_list= \ + [BilinearImagenetTransform(height=299, width=299)])) + +eval_dataloader = DefaultDataLoader(dataset=eval_dataset, batch_size=FLAGS.batch_size) + +if FLAGS.calib_data: + calib_dataset = TensorflowImageRecord(root=FLAGS.calib_data, transform= \ + ComposeTransform(transform_list= [BilinearImagenetTransform(height=299, width=299)])) + calib_dataloader = DefaultDataLoader(dataset=calib_dataset, batch_size=10) + +def evaluate(model): + """ + Custom evaluate function to inference the model for specified metric on validation dataset. + + Args: + model (tf.saved_model.load): The input model will be the class of tf.saved_model.load(quantized_model_path). + measurer (object, optional): for benchmark measurement of duration. + + Returns: + accuracy (float): evaluation result, the larger is better. + """ + infer = model.signatures["serving_default"] + output_dict_keys = infer.structured_outputs.keys() + output_name = list(output_dict_keys )[0] + postprocess = LabelShift(label_shift=1) + metric = TensorflowTopK(k=1) + latency_list = [] + + def eval_func(dataloader, metric): + warmup = 5 + iteration = None + + if FLAGS.benchmark and FLAGS.mode == 'performance': + iteration = FLAGS.iters + for idx, (inputs, labels) in enumerate(dataloader): + inputs = np.array(inputs) + input_tensor = tf.constant(inputs) + start = time.time() + predictions = infer(input_tensor)[output_name] + end = time.time() + latency_list.append(end - start) + predictions = predictions.numpy() + predictions, labels = postprocess((predictions, labels)) + metric.update(predictions, labels) + if iteration and idx >= iteration: + break + latency = np.array(latency_list[warmup:]).mean() / eval_dataloader.batch_size + return latency + + latency = eval_func(eval_dataloader, metric) + if FLAGS.benchmark: + logger.info("\n{} mode benchmark result:".format(FLAGS.mode)) + for i, res in enumerate(latency_list): + logger.debug("Iteration {} result {}:".format(i, res)) + if FLAGS.benchmark and FLAGS.mode == 'performance': + logger.info("Batch size = {}".format(eval_dataloader.batch_size)) + logger.info("Latency: {:.3f} ms".format(latency * 1000)) + logger.info("Throughput: {:.3f} images/sec".format(1. / latency)) + acc = metric.result() + return acc + +def main(_): + if FLAGS.tune: + from neural_compressor.quantization import fit + from neural_compressor.config import PostTrainingQuantConfig + from neural_compressor.utils import set_random_seed + set_random_seed(9527) + config = PostTrainingQuantConfig(calibration_sampling_size=[50, 100]) + q_model = fit( + model=FLAGS.input_model, + conf=config, + calib_dataloader=calib_dataloader, + eval_dataloader=eval_dataloader, + eval_func=evaluate) + q_model.save(FLAGS.output_model) + + if FLAGS.benchmark: + from neural_compressor.benchmark import fit + from neural_compressor.config import BenchmarkConfig + if FLAGS.mode == 'performance': + conf = BenchmarkConfig(iteration=100, cores_per_instance=4, num_of_instance=7) + fit(FLAGS.input_model, conf, b_func=evaluate) + else: + from neural_compressor.model import Model + accuracy = evaluate(Model(FLAGS.input_model).model) + logger.info('Batch size = %d' % FLAGS.batch_size) + logger.info("Accuracy: %.5f" % accuracy) + +if __name__ == "__main__": + tf.compat.v1.app.run() diff --git a/examples/tensorflow/image_recognition/keras_models/inception_resnet_v2/quantization/ptq/prepare_model.py b/examples/tensorflow/image_recognition/keras_models/inception_resnet_v2/quantization/ptq/prepare_model.py index f7ea841eef2..42441598eb8 100644 --- a/examples/tensorflow/image_recognition/keras_models/inception_resnet_v2/quantization/ptq/prepare_model.py +++ b/examples/tensorflow/image_recognition/keras_models/inception_resnet_v2/quantization/ptq/prepare_model.py @@ -1,35 +1,35 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2022 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import argparse -import tensorflow as tf -def get_inception_resnet_v2_model(saved_path): - model = tf.keras.applications.InceptionResNetV2(weights='imagenet') - model.save(saved_path) - -if __name__ == "__main__": - parser = argparse.ArgumentParser( - description='Export pretained keras model', - formatter_class=argparse.ArgumentDefaultsHelpFormatter) - parser.add_argument( - '--output_model', - type=str, - help='path to exported model file') - - args = parser.parse_args() - get_inception_resnet_v2_model(args.output_model) +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import argparse +import tensorflow as tf +def get_inception_resnet_v2_model(saved_path): + model = tf.keras.applications.InceptionResNetV2(weights='imagenet') + model.save(saved_path) + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description='Export pretained keras model', + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument( + '--output_model', + type=str, + help='path to exported model file') + + args = parser.parse_args() + get_inception_resnet_v2_model(args.output_model) diff --git a/examples/tensorflow/image_recognition/keras_models/inception_v3/quantization/ptq/README.md b/examples/tensorflow/image_recognition/keras_models/inception_v3/quantization/ptq/README.md index 3897c169305..1b5bb6a98e5 100644 --- a/examples/tensorflow/image_recognition/keras_models/inception_v3/quantization/ptq/README.md +++ b/examples/tensorflow/image_recognition/keras_models/inception_v3/quantization/ptq/README.md @@ -1,86 +1,86 @@ -Step-by-Step -============ - -This document is used to enable Tensorflow Keras inception_v3 model quantization and benchmark using Intel® Neural Compressor. -This example can run on Intel CPUs and GPUs. - - -# Prerequisite - -## 1. Environment - -### Installation -```shell -# Install Intel® Neural Compressor -pip install neural-compressor -``` -### Install Tensorflow -```shell -pip install tensorflow -``` -> Note: Validated TensorFlow [Version](/docs/source/installation_guide.md#validated-software-environment). - -### Install Intel Extension for Tensorflow - -#### Quantizing the model on Intel GPU -Intel Extension for Tensorflow is mandatory to be installed for quantizing the model on Intel GPUs. - -```shell -pip install --upgrade intel-extension-for-tensorflow[gpu] -``` -Please refer to the [Installation Guides](https://dgpu-docs.intel.com/installation-guides/ubuntu/ubuntu-focal-dc.html) for latest Intel GPU driver installation. -For any more details, please follow the procedure in [install-gpu-drivers](https://github.com/intel-innersource/frameworks.ai.infrastructure.intel-extension-for-tensorflow.intel-extension-for-tensorflow/blob/master/docs/install/install_for_gpu.md#install-gpu-drivers). - -#### Quantizing the model on Intel CPU(Experimental) -Intel Extension for Tensorflow for Intel CPUs is experimental currently. It's not mandatory for quantizing the model on Intel CPUs. - -```shell -pip install --upgrade intel-extension-for-tensorflow[cpu] -``` - -## 2. Prepare Pretrained model - -The pretrained model is provided by [Keras Applications](https://keras.io/api/applications/). prepare the model, Run as follow: - ``` -python prepare_model.py --output_model=/path/to/model - ``` -`--output_model ` the model should be saved as SavedModel format or H5 format. - - -## 3. Prepare Dataset - - TensorFlow [models](https://github.com/tensorflow/models) repo provides [scripts and instructions](https://github.com/tensorflow/models/tree/master/research/slim#an-automated-script-for-processing-imagenet-data) to download, process and convert the ImageNet dataset to the TF records format. - We also prepared related scripts in `imagenet_prepare` directory. To download the raw images, the user must create an account with image-net.org. If you have downloaded the raw data and preprocessed the validation data by moving the images into the appropriate sub-directory based on the label (synset) of the image. we can use below command ro convert it to tf records format. - - ```shell - cd examples/tensorflow/image_recognition/keras_models/ - # convert validation subset - bash prepare_dataset.sh --output_dir=/inception_v3/quantization/ptq/data --raw_dir=/PATH/TO/img_raw/val/ --subset=validation - # convert train subset - bash prepare_dataset.sh --output_dir=/inception_v3/quantization/ptq/data --raw_dir=/PATH/TO/img_raw/train/ --subset=train - cd inception_v3/quantization/ptq - ``` - -# Run Command - -## Quantization Config -The Quantization Config class has default parameters setting for running on Intel CPUs. If running this example on Intel GPUs, the 'backend' parameter should be set to 'itex' and the 'device' parameter should be set to 'gpu'. - -``` -config = PostTrainingQuantConfig( - device="gpu", - backend="itex", - ... - ) -``` - -## Quantization - ```shell - bash run_tuning.sh --input_model=./inception_v3_keras/ --output_model=./result --dataset_location=/path/to/evaluation/dataset - ``` - -## Benchmark - ```shell - bash run_benchmark.sh --input_model=./result --dataset_location=/path/to/evaluation/dataset --mode=performance --batch_size=1 - bash run_benchmark.sh --input_model=./result --dataset_location=/path/to/evaluation/dataset --mode=accuracy --batch_size=32 - ``` +Step-by-Step +============ + +This document is used to enable Tensorflow Keras inception_v3 model quantization and benchmark using Intel® Neural Compressor. +This example can run on Intel CPUs and GPUs. + + +# Prerequisite + +## 1. Environment + +### Installation +```shell +# Install Intel® Neural Compressor +pip install neural-compressor +``` +### Install Tensorflow +```shell +pip install tensorflow +``` +> Note: Validated TensorFlow [Version](/docs/source/installation_guide.md#validated-software-environment). + +### Install Intel Extension for Tensorflow + +#### Quantizing the model on Intel GPU +Intel Extension for Tensorflow is mandatory to be installed for quantizing the model on Intel GPUs. + +```shell +pip install --upgrade intel-extension-for-tensorflow[gpu] +``` +Please refer to the [Installation Guides](https://dgpu-docs.intel.com/installation-guides/ubuntu/ubuntu-focal-dc.html) for latest Intel GPU driver installation. +For any more details, please follow the procedure in [install-gpu-drivers](https://github.com/intel-innersource/frameworks.ai.infrastructure.intel-extension-for-tensorflow.intel-extension-for-tensorflow/blob/master/docs/install/install_for_gpu.md#install-gpu-drivers). + +#### Quantizing the model on Intel CPU(Experimental) +Intel Extension for Tensorflow for Intel CPUs is experimental currently. It's not mandatory for quantizing the model on Intel CPUs. + +```shell +pip install --upgrade intel-extension-for-tensorflow[cpu] +``` + +## 2. Prepare Pretrained model + +The pretrained model is provided by [Keras Applications](https://keras.io/api/applications/). prepare the model, Run as follow: + ``` +python prepare_model.py --output_model=/path/to/model + ``` +`--output_model ` the model should be saved as SavedModel format or H5 format. + + +## 3. Prepare Dataset + + TensorFlow [models](https://github.com/tensorflow/models) repo provides [scripts and instructions](https://github.com/tensorflow/models/tree/master/research/slim#an-automated-script-for-processing-imagenet-data) to download, process and convert the ImageNet dataset to the TF records format. + We also prepared related scripts in `imagenet_prepare` directory. To download the raw images, the user must create an account with image-net.org. If you have downloaded the raw data and preprocessed the validation data by moving the images into the appropriate sub-directory based on the label (synset) of the image. we can use below command ro convert it to tf records format. + + ```shell + cd examples/tensorflow/image_recognition/keras_models/ + # convert validation subset + bash prepare_dataset.sh --output_dir=/inception_v3/quantization/ptq/data --raw_dir=/PATH/TO/img_raw/val/ --subset=validation + # convert train subset + bash prepare_dataset.sh --output_dir=/inception_v3/quantization/ptq/data --raw_dir=/PATH/TO/img_raw/train/ --subset=train + cd inception_v3/quantization/ptq + ``` + +# Run Command + +## Quantization Config +The Quantization Config class has default parameters setting for running on Intel CPUs. If running this example on Intel GPUs, the 'backend' parameter should be set to 'itex' and the 'device' parameter should be set to 'gpu'. + +``` +config = PostTrainingQuantConfig( + device="gpu", + backend="itex", + ... + ) +``` + +## Quantization + ```shell + bash run_tuning.sh --input_model=./inception_v3_keras/ --output_model=./result --dataset_location=/path/to/evaluation/dataset + ``` + +## Benchmark + ```shell + bash run_benchmark.sh --input_model=./result --dataset_location=/path/to/evaluation/dataset --mode=performance --batch_size=1 + bash run_benchmark.sh --input_model=./result --dataset_location=/path/to/evaluation/dataset --mode=accuracy --batch_size=32 + ``` diff --git a/examples/tensorflow/image_recognition/keras_models/inception_v3/quantization/ptq/main.py b/examples/tensorflow/image_recognition/keras_models/inception_v3/quantization/ptq/main.py index 35de045f432..6de44621a1b 100644 --- a/examples/tensorflow/image_recognition/keras_models/inception_v3/quantization/ptq/main.py +++ b/examples/tensorflow/image_recognition/keras_models/inception_v3/quantization/ptq/main.py @@ -1,142 +1,142 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2022 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -import time -import numpy as np -from neural_compressor import data -import tensorflow as tf -tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) - -flags = tf.compat.v1.flags -FLAGS = flags.FLAGS - -## Required parameters -flags.DEFINE_string( - 'input_model', None, 'Run inference with specified keras model.') - -flags.DEFINE_string( - 'output_model', None, 'The output quantized model.') - -flags.DEFINE_string( - 'mode', 'performance', 'define benchmark mode for accuracy or performance') - -flags.DEFINE_bool( - 'tune', False, 'whether to tune the model') - -flags.DEFINE_bool( - 'benchmark', False, 'whether to benchmark the model') - -flags.DEFINE_string( - 'calib_data', None, 'location of calibration dataset') - -flags.DEFINE_string( - 'eval_data', None, 'location of evaluate dataset') - -flags.DEFINE_integer( - 'batch_size', 32, 'batch_size of evaluation') - -flags.DEFINE_integer( - 'iters', 100, 'maximum iteration when evaluating performance') - -from neural_compressor.metric import TensorflowTopK -from neural_compressor.data import TensorflowImageRecord -from neural_compressor.data import DefaultDataLoader -from neural_compressor.data import ComposeTransform -from neural_compressor.data import LabelShift -from neural_compressor.data import BilinearImagenetTransform - -eval_dataset = TensorflowImageRecord(root=FLAGS.eval_data, transform=ComposeTransform(transform_list= \ - [BilinearImagenetTransform(height=299, width=299)])) -if FLAGS.benchmark and FLAGS.mode == 'performance': - eval_dataloader = DefaultDataLoader(dataset=eval_dataset, batch_size=1) -else: - eval_dataloader = DefaultDataLoader(dataset=eval_dataset, batch_size=FLAGS.batch_size) -if FLAGS.calib_data: - calib_dataset = TensorflowImageRecord(root=FLAGS.calib_data, transform=ComposeTransform(transform_list= \ - [BilinearImagenetTransform(height=299, width=299)])) - calib_dataloader = DefaultDataLoader(dataset=calib_dataset, batch_size=10) - -def evaluate(model): - """Custom evaluate function to inference the model for specified metric on validation dataset. - - Args: - model (tf.saved_model.load): The input model will be the class of tf.saved_model.load(quantized_model_path). - - Returns: - accuracy (float): evaluation result, the larger is better. - """ - infer = model.signatures["serving_default"] - output_dict_keys = infer.structured_outputs.keys() - output_name = list(output_dict_keys )[0] - postprocess = LabelShift(label_shift=1) - metric = TensorflowTopK(k=1) - - def eval_func(dataloader, metric): - warmup = 5 - iteration = None - latency_list = [] - if FLAGS.benchmark and FLAGS.mode == 'performance': - iteration = FLAGS.iters - for idx, (inputs, labels) in enumerate(dataloader): - inputs = np.array(inputs) - input_tensor = tf.constant(inputs) - start = time.time() - predictions = infer(input_tensor)[output_name] - end = time.time() - predictions = predictions.numpy() - predictions, labels = postprocess((predictions, labels)) - metric.update(predictions, labels) - latency_list.append(end - start) - if iteration and idx >= iteration: - break - latency = np.array(latency_list[warmup:]).mean() / eval_dataloader.batch_size - return latency - - latency = eval_func(eval_dataloader, metric) - if FLAGS.benchmark and FLAGS.mode == 'performance': - print("Batch size = {}".format(eval_dataloader.batch_size)) - print("Latency: {:.3f} ms".format(latency * 1000)) - print("Throughput: {:.3f} images/sec".format(1. / latency)) - acc = metric.result() - return acc - -def main(_): - from neural_compressor.utils import set_random_seed - set_random_seed(9527) - if FLAGS.tune: - from neural_compressor import quantization - from neural_compressor.config import PostTrainingQuantConfig - conf = PostTrainingQuantConfig(calibration_sampling_size=[50, 100]) - q_model = quantization.fit(FLAGS.input_model, conf=conf, calib_dataloader=calib_dataloader, - eval_func=evaluate) - q_model.save(FLAGS.output_model) - - if FLAGS.benchmark: - from neural_compressor.benchmark import fit - from neural_compressor.config import BenchmarkConfig - if FLAGS.mode == 'performance': - conf = BenchmarkConfig(cores_per_instance=4, num_of_instance=7) - fit(FLAGS.input_model, conf, b_func=evaluate) - else: - from neural_compressor.model import Model - model = Model(FLAGS.input_model).model - accuracy = evaluate(model) - print('Batch size = %d' % FLAGS.batch_size) - print("Accuracy: %.5f" % accuracy) - -if __name__ == "__main__": - tf.compat.v1.app.run() +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import time +import numpy as np +from neural_compressor import data +import tensorflow as tf +tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) + +flags = tf.compat.v1.flags +FLAGS = flags.FLAGS + +## Required parameters +flags.DEFINE_string( + 'input_model', None, 'Run inference with specified keras model.') + +flags.DEFINE_string( + 'output_model', None, 'The output quantized model.') + +flags.DEFINE_string( + 'mode', 'performance', 'define benchmark mode for accuracy or performance') + +flags.DEFINE_bool( + 'tune', False, 'whether to tune the model') + +flags.DEFINE_bool( + 'benchmark', False, 'whether to benchmark the model') + +flags.DEFINE_string( + 'calib_data', None, 'location of calibration dataset') + +flags.DEFINE_string( + 'eval_data', None, 'location of evaluate dataset') + +flags.DEFINE_integer( + 'batch_size', 32, 'batch_size of evaluation') + +flags.DEFINE_integer( + 'iters', 100, 'maximum iteration when evaluating performance') + +from neural_compressor.metric import TensorflowTopK +from neural_compressor.data import TensorflowImageRecord +from neural_compressor.data import DefaultDataLoader +from neural_compressor.data import ComposeTransform +from neural_compressor.data import LabelShift +from neural_compressor.data import BilinearImagenetTransform + +eval_dataset = TensorflowImageRecord(root=FLAGS.eval_data, transform=ComposeTransform(transform_list= \ + [BilinearImagenetTransform(height=299, width=299)])) +if FLAGS.benchmark and FLAGS.mode == 'performance': + eval_dataloader = DefaultDataLoader(dataset=eval_dataset, batch_size=1) +else: + eval_dataloader = DefaultDataLoader(dataset=eval_dataset, batch_size=FLAGS.batch_size) +if FLAGS.calib_data: + calib_dataset = TensorflowImageRecord(root=FLAGS.calib_data, transform=ComposeTransform(transform_list= \ + [BilinearImagenetTransform(height=299, width=299)])) + calib_dataloader = DefaultDataLoader(dataset=calib_dataset, batch_size=10) + +def evaluate(model): + """Custom evaluate function to inference the model for specified metric on validation dataset. + + Args: + model (tf.saved_model.load): The input model will be the class of tf.saved_model.load(quantized_model_path). + + Returns: + accuracy (float): evaluation result, the larger is better. + """ + infer = model.signatures["serving_default"] + output_dict_keys = infer.structured_outputs.keys() + output_name = list(output_dict_keys )[0] + postprocess = LabelShift(label_shift=1) + metric = TensorflowTopK(k=1) + + def eval_func(dataloader, metric): + warmup = 5 + iteration = None + latency_list = [] + if FLAGS.benchmark and FLAGS.mode == 'performance': + iteration = FLAGS.iters + for idx, (inputs, labels) in enumerate(dataloader): + inputs = np.array(inputs) + input_tensor = tf.constant(inputs) + start = time.time() + predictions = infer(input_tensor)[output_name] + end = time.time() + predictions = predictions.numpy() + predictions, labels = postprocess((predictions, labels)) + metric.update(predictions, labels) + latency_list.append(end - start) + if iteration and idx >= iteration: + break + latency = np.array(latency_list[warmup:]).mean() / eval_dataloader.batch_size + return latency + + latency = eval_func(eval_dataloader, metric) + if FLAGS.benchmark and FLAGS.mode == 'performance': + print("Batch size = {}".format(eval_dataloader.batch_size)) + print("Latency: {:.3f} ms".format(latency * 1000)) + print("Throughput: {:.3f} images/sec".format(1. / latency)) + acc = metric.result() + return acc + +def main(_): + from neural_compressor.utils import set_random_seed + set_random_seed(9527) + if FLAGS.tune: + from neural_compressor import quantization + from neural_compressor.config import PostTrainingQuantConfig + conf = PostTrainingQuantConfig(calibration_sampling_size=[50, 100]) + q_model = quantization.fit(FLAGS.input_model, conf=conf, calib_dataloader=calib_dataloader, + eval_func=evaluate) + q_model.save(FLAGS.output_model) + + if FLAGS.benchmark: + from neural_compressor.benchmark import fit + from neural_compressor.config import BenchmarkConfig + if FLAGS.mode == 'performance': + conf = BenchmarkConfig(cores_per_instance=4, num_of_instance=7) + fit(FLAGS.input_model, conf, b_func=evaluate) + else: + from neural_compressor.model import Model + model = Model(FLAGS.input_model).model + accuracy = evaluate(model) + print('Batch size = %d' % FLAGS.batch_size) + print("Accuracy: %.5f" % accuracy) + +if __name__ == "__main__": + tf.compat.v1.app.run() diff --git a/examples/tensorflow/image_recognition/keras_models/inception_v3/quantization/ptq/prepare_model.py b/examples/tensorflow/image_recognition/keras_models/inception_v3/quantization/ptq/prepare_model.py index fc6119d5c50..abf63dc93b4 100644 --- a/examples/tensorflow/image_recognition/keras_models/inception_v3/quantization/ptq/prepare_model.py +++ b/examples/tensorflow/image_recognition/keras_models/inception_v3/quantization/ptq/prepare_model.py @@ -1,35 +1,35 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2022 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import argparse -from tensorflow.keras.applications.inception_v3 import InceptionV3 -def get_inception_v3_model(saved_path): - model = InceptionV3(weights='imagenet') - model.save(saved_path) - -if __name__ == "__main__": - parser = argparse.ArgumentParser( - description='Export pretained keras model', - formatter_class=argparse.ArgumentDefaultsHelpFormatter) - parser.add_argument( - '--output_model', - type=str, - help='path to exported model file') - - args = parser.parse_args() - get_inception_v3_model(args.output_model) +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import argparse +from tensorflow.keras.applications.inception_v3 import InceptionV3 +def get_inception_v3_model(saved_path): + model = InceptionV3(weights='imagenet') + model.save(saved_path) + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description='Export pretained keras model', + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument( + '--output_model', + type=str, + help='path to exported model file') + + args = parser.parse_args() + get_inception_v3_model(args.output_model) diff --git a/examples/tensorflow/image_recognition/keras_models/resnet101/quantization/ptq/README.md b/examples/tensorflow/image_recognition/keras_models/resnet101/quantization/ptq/README.md index 41607ccf9b7..8c312d0fb12 100644 --- a/examples/tensorflow/image_recognition/keras_models/resnet101/quantization/ptq/README.md +++ b/examples/tensorflow/image_recognition/keras_models/resnet101/quantization/ptq/README.md @@ -1,85 +1,85 @@ -Step-by-Step -============ - -This document is used to enable Tensorflow Keras resnet101 model quantization and benchmark using Intel® Neural Compressor. -This example can run on Intel CPUs and GPUs. - - -# Prerequisite - -## 1. Environment - -### Installation -```shell -# Install Intel® Neural Compressor -pip install neural-compressor -``` - -### Install Tensorflow -```shell -pip install tensorflow -``` -> Note: Validated TensorFlow [Version](/docs/source/installation_guide.md#validated-software-environment). - -### Install Intel Extension for Tensorflow -#### Quantizing the model on Intel GPU -Intel Extension for Tensorflow is mandatory to be installed for quantizing the model on Intel GPUs. - -```shell -pip install --upgrade intel-extension-for-tensorflow[gpu] -``` -Please refer to the [Installation Guides](https://dgpu-docs.intel.com/installation-guides/ubuntu/ubuntu-focal-dc.html) for latest Intel GPU driver installation. -For any more details, please follow the procedure in [install-gpu-drivers](https://github.com/intel-innersource/frameworks.ai.infrastructure.intel-extension-for-tensorflow.intel-extension-for-tensorflow/blob/master/docs/install/install_for_gpu.md#install-gpu-drivers). - -#### Quantizing the model on Intel CPU(Experimental) -Intel Extension for Tensorflow for Intel CPUs is experimental currently. It's not mandatory for quantizing the model on Intel CPUs. - -```shell -pip install --upgrade intel-extension-for-tensorflow[cpu] -``` - -## 2. Prepare Pretrained model - -The pretrained model is provided by [Keras Applications](https://keras.io/api/applications/). prepare the model, Run as follow: - ``` -python prepare_model.py --output_model=/path/to/model - ``` -`--output_model ` the model should be saved as SavedModel format or H5 format. - -## 3. Prepare Dataset - - TensorFlow [models](https://github.com/tensorflow/models) repo provides [scripts and instructions](https://github.com/tensorflow/models/tree/master/research/slim#an-automated-script-for-processing-imagenet-data) to download, process and convert the ImageNet dataset to the TF records format. - We also prepared related scripts in `imagenet_prepare` directory. To download the raw images, the user must create an account with image-net.org. If you have downloaded the raw data and preprocessed the validation data by moving the images into the appropriate sub-directory based on the label (synset) of the image. we can use below command ro convert it to tf records format. - - ```shell - cd examples/tensorflow/image_recognition/keras_models/ - # convert validation subset - bash prepare_dataset.sh --output_dir=/resnet101/quantization/ptq/data --raw_dir=/PATH/TO/img_raw/val/ --subset=validation - # convert train subset - bash prepare_dataset.sh --output_dir=/resnet101/quantization/ptq/data --raw_dir=/PATH/TO/img_raw/train/ --subset=train - cd resnet101/quantization/ptq - ``` - -# Run Command - -## Quantization Config -The Quantization Config class has default parameters setting for running on Intel CPUs. If running this example on Intel GPUs, the 'backend' parameter should be set to 'itex' and the 'device' parameter should be set to 'gpu'. - -``` -config = PostTrainingQuantConfig( - device="gpu", - backend="itex", - ... - ) -``` - -## Quantization - ```shell - bash run_tuning.sh --input_model=./resnet101_keras/ --output_model=./result --dataset_location=/path/to/evaluation/dataset - ``` - -## Benchmark - ```shell - bash run_benchmark.sh --input_model=./result --mode=accuracy --dataset_location=/path/to/evaluation/dataset --batch_size=32 - bash run_benchmark.sh --input_model=./result --mode=performance --dataset_location=/path/to/evaluation/dataset --batch_size=1 +Step-by-Step +============ + +This document is used to enable Tensorflow Keras resnet101 model quantization and benchmark using Intel® Neural Compressor. +This example can run on Intel CPUs and GPUs. + + +# Prerequisite + +## 1. Environment + +### Installation +```shell +# Install Intel® Neural Compressor +pip install neural-compressor +``` + +### Install Tensorflow +```shell +pip install tensorflow +``` +> Note: Validated TensorFlow [Version](/docs/source/installation_guide.md#validated-software-environment). + +### Install Intel Extension for Tensorflow +#### Quantizing the model on Intel GPU +Intel Extension for Tensorflow is mandatory to be installed for quantizing the model on Intel GPUs. + +```shell +pip install --upgrade intel-extension-for-tensorflow[gpu] +``` +Please refer to the [Installation Guides](https://dgpu-docs.intel.com/installation-guides/ubuntu/ubuntu-focal-dc.html) for latest Intel GPU driver installation. +For any more details, please follow the procedure in [install-gpu-drivers](https://github.com/intel-innersource/frameworks.ai.infrastructure.intel-extension-for-tensorflow.intel-extension-for-tensorflow/blob/master/docs/install/install_for_gpu.md#install-gpu-drivers). + +#### Quantizing the model on Intel CPU(Experimental) +Intel Extension for Tensorflow for Intel CPUs is experimental currently. It's not mandatory for quantizing the model on Intel CPUs. + +```shell +pip install --upgrade intel-extension-for-tensorflow[cpu] +``` + +## 2. Prepare Pretrained model + +The pretrained model is provided by [Keras Applications](https://keras.io/api/applications/). prepare the model, Run as follow: + ``` +python prepare_model.py --output_model=/path/to/model + ``` +`--output_model ` the model should be saved as SavedModel format or H5 format. + +## 3. Prepare Dataset + + TensorFlow [models](https://github.com/tensorflow/models) repo provides [scripts and instructions](https://github.com/tensorflow/models/tree/master/research/slim#an-automated-script-for-processing-imagenet-data) to download, process and convert the ImageNet dataset to the TF records format. + We also prepared related scripts in `imagenet_prepare` directory. To download the raw images, the user must create an account with image-net.org. If you have downloaded the raw data and preprocessed the validation data by moving the images into the appropriate sub-directory based on the label (synset) of the image. we can use below command ro convert it to tf records format. + + ```shell + cd examples/tensorflow/image_recognition/keras_models/ + # convert validation subset + bash prepare_dataset.sh --output_dir=/resnet101/quantization/ptq/data --raw_dir=/PATH/TO/img_raw/val/ --subset=validation + # convert train subset + bash prepare_dataset.sh --output_dir=/resnet101/quantization/ptq/data --raw_dir=/PATH/TO/img_raw/train/ --subset=train + cd resnet101/quantization/ptq + ``` + +# Run Command + +## Quantization Config +The Quantization Config class has default parameters setting for running on Intel CPUs. If running this example on Intel GPUs, the 'backend' parameter should be set to 'itex' and the 'device' parameter should be set to 'gpu'. + +``` +config = PostTrainingQuantConfig( + device="gpu", + backend="itex", + ... + ) +``` + +## Quantization + ```shell + bash run_tuning.sh --input_model=./resnet101_keras/ --output_model=./result --dataset_location=/path/to/evaluation/dataset + ``` + +## Benchmark + ```shell + bash run_benchmark.sh --input_model=./result --mode=accuracy --dataset_location=/path/to/evaluation/dataset --batch_size=32 + bash run_benchmark.sh --input_model=./result --mode=performance --dataset_location=/path/to/evaluation/dataset --batch_size=1 ``` \ No newline at end of file diff --git a/examples/tensorflow/image_recognition/keras_models/resnet101/quantization/ptq/main.py b/examples/tensorflow/image_recognition/keras_models/resnet101/quantization/ptq/main.py index 8b44d4cf0ce..987f47070ac 100644 --- a/examples/tensorflow/image_recognition/keras_models/resnet101/quantization/ptq/main.py +++ b/examples/tensorflow/image_recognition/keras_models/resnet101/quantization/ptq/main.py @@ -1,142 +1,142 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2022 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -import time -import numpy as np -from neural_compressor import data -import tensorflow as tf -tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) - -flags = tf.compat.v1.flags -FLAGS = flags.FLAGS - -## Required parameters -flags.DEFINE_string( - 'input_model', None, 'Run inference with specified keras model.') - -flags.DEFINE_string( - 'output_model', None, 'The output quantized model.') - -flags.DEFINE_string( - 'mode', 'performance', 'define benchmark mode for accuracy or performance') - -flags.DEFINE_bool( - 'tune', False, 'whether to tune the model') - -flags.DEFINE_bool( - 'benchmark', False, 'whether to benchmark the model') - -flags.DEFINE_string( - 'calib_data', None, 'location of calibration dataset') - -flags.DEFINE_string( - 'eval_data', None, 'location of evaluate dataset') - -flags.DEFINE_integer( - 'batch_size', 32, 'batch_size of evaluation') - -flags.DEFINE_integer( - 'iters', 100, 'maximum iteration when evaluating performance') - -from neural_compressor.metric import TensorflowTopK -from neural_compressor.data import TensorflowImageRecord -from neural_compressor.data import DefaultDataLoader -from neural_compressor.data import ComposeTransform -from neural_compressor.data import LabelShift -from neural_compressor.data import TensorflowResizeCropImagenetTransform - -eval_dataset = TensorflowImageRecord(root=FLAGS.eval_data, transform=ComposeTransform(transform_list= \ - [TensorflowResizeCropImagenetTransform(height=224, width=224, mean_value=[123.68, 116.78, 103.94])])) -if FLAGS.benchmark and FLAGS.mode == 'performance': - eval_dataloader = DefaultDataLoader(dataset=eval_dataset, batch_size=1) -else: - eval_dataloader = DefaultDataLoader(dataset=eval_dataset, batch_size=FLAGS.batch_size) -if FLAGS.calib_data: - calib_dataset = TensorflowImageRecord(root=FLAGS.calib_data, transform=ComposeTransform(transform_list= \ - [TensorflowResizeCropImagenetTransform(height=224, width=224, mean_value=[123.68, 116.78, 103.94])])) - calib_dataloader = DefaultDataLoader(dataset=calib_dataset, batch_size=10) - -def evaluate(model): - """Custom evaluate function to inference the model for specified metric on validation dataset. - - Args: - model (tf.saved_model.load): The input model will be the class of tf.saved_model.load(quantized_model_path). - - Returns: - accuracy (float): evaluation result, the larger is better. - """ - infer = model.signatures["serving_default"] - output_dict_keys = infer.structured_outputs.keys() - output_name = list(output_dict_keys )[0] - postprocess = LabelShift(label_shift=1) - metric = TensorflowTopK(k=1) - - def eval_func(dataloader, metric): - warmup = 5 - iteration = None - latency_list = [] - if FLAGS.benchmark and FLAGS.mode == 'performance': - iteration = FLAGS.iters - for idx, (inputs, labels) in enumerate(dataloader): - inputs = np.array(inputs) - input_tensor = tf.constant(inputs) - start = time.time() - predictions = infer(input_tensor)[output_name] - end = time.time() - predictions = predictions.numpy() - predictions, labels = postprocess((predictions, labels)) - metric.update(predictions, labels) - latency_list.append(end - start) - if iteration and idx >= iteration: - break - latency = np.array(latency_list[warmup:]).mean() / eval_dataloader.batch_size - return latency - - latency = eval_func(eval_dataloader, metric) - if FLAGS.benchmark and FLAGS.mode == 'performance': - print("Batch size = {}".format(eval_dataloader.batch_size)) - print("Latency: {:.3f} ms".format(latency * 1000)) - print("Throughput: {:.3f} images/sec".format(1. / latency)) - acc = metric.result() - return acc - -def main(_): - from neural_compressor.utils import set_random_seed - set_random_seed(9527) - if FLAGS.tune: - from neural_compressor import quantization - from neural_compressor.config import PostTrainingQuantConfig - conf = PostTrainingQuantConfig(calibration_sampling_size=[50, 100]) - q_model = quantization.fit(FLAGS.input_model, conf=conf, calib_dataloader=calib_dataloader, - eval_func=evaluate) - q_model.save(FLAGS.output_model) - - if FLAGS.benchmark: - from neural_compressor.benchmark import fit - from neural_compressor.config import BenchmarkConfig - if FLAGS.mode == 'performance': - conf = BenchmarkConfig(cores_per_instance=4, num_of_instance=7) - fit(FLAGS.input_model, conf, b_func=evaluate) - else: - from neural_compressor.model import Model - model = Model(FLAGS.input_model).model - accuracy = evaluate(model) - print('Batch size = %d' % FLAGS.batch_size) - print("Accuracy: %.5f" % accuracy) - -if __name__ == "__main__": - tf.compat.v1.app.run() +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import time +import numpy as np +from neural_compressor import data +import tensorflow as tf +tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) + +flags = tf.compat.v1.flags +FLAGS = flags.FLAGS + +## Required parameters +flags.DEFINE_string( + 'input_model', None, 'Run inference with specified keras model.') + +flags.DEFINE_string( + 'output_model', None, 'The output quantized model.') + +flags.DEFINE_string( + 'mode', 'performance', 'define benchmark mode for accuracy or performance') + +flags.DEFINE_bool( + 'tune', False, 'whether to tune the model') + +flags.DEFINE_bool( + 'benchmark', False, 'whether to benchmark the model') + +flags.DEFINE_string( + 'calib_data', None, 'location of calibration dataset') + +flags.DEFINE_string( + 'eval_data', None, 'location of evaluate dataset') + +flags.DEFINE_integer( + 'batch_size', 32, 'batch_size of evaluation') + +flags.DEFINE_integer( + 'iters', 100, 'maximum iteration when evaluating performance') + +from neural_compressor.metric import TensorflowTopK +from neural_compressor.data import TensorflowImageRecord +from neural_compressor.data import DefaultDataLoader +from neural_compressor.data import ComposeTransform +from neural_compressor.data import LabelShift +from neural_compressor.data import TensorflowResizeCropImagenetTransform + +eval_dataset = TensorflowImageRecord(root=FLAGS.eval_data, transform=ComposeTransform(transform_list= \ + [TensorflowResizeCropImagenetTransform(height=224, width=224, mean_value=[123.68, 116.78, 103.94])])) +if FLAGS.benchmark and FLAGS.mode == 'performance': + eval_dataloader = DefaultDataLoader(dataset=eval_dataset, batch_size=1) +else: + eval_dataloader = DefaultDataLoader(dataset=eval_dataset, batch_size=FLAGS.batch_size) +if FLAGS.calib_data: + calib_dataset = TensorflowImageRecord(root=FLAGS.calib_data, transform=ComposeTransform(transform_list= \ + [TensorflowResizeCropImagenetTransform(height=224, width=224, mean_value=[123.68, 116.78, 103.94])])) + calib_dataloader = DefaultDataLoader(dataset=calib_dataset, batch_size=10) + +def evaluate(model): + """Custom evaluate function to inference the model for specified metric on validation dataset. + + Args: + model (tf.saved_model.load): The input model will be the class of tf.saved_model.load(quantized_model_path). + + Returns: + accuracy (float): evaluation result, the larger is better. + """ + infer = model.signatures["serving_default"] + output_dict_keys = infer.structured_outputs.keys() + output_name = list(output_dict_keys )[0] + postprocess = LabelShift(label_shift=1) + metric = TensorflowTopK(k=1) + + def eval_func(dataloader, metric): + warmup = 5 + iteration = None + latency_list = [] + if FLAGS.benchmark and FLAGS.mode == 'performance': + iteration = FLAGS.iters + for idx, (inputs, labels) in enumerate(dataloader): + inputs = np.array(inputs) + input_tensor = tf.constant(inputs) + start = time.time() + predictions = infer(input_tensor)[output_name] + end = time.time() + predictions = predictions.numpy() + predictions, labels = postprocess((predictions, labels)) + metric.update(predictions, labels) + latency_list.append(end - start) + if iteration and idx >= iteration: + break + latency = np.array(latency_list[warmup:]).mean() / eval_dataloader.batch_size + return latency + + latency = eval_func(eval_dataloader, metric) + if FLAGS.benchmark and FLAGS.mode == 'performance': + print("Batch size = {}".format(eval_dataloader.batch_size)) + print("Latency: {:.3f} ms".format(latency * 1000)) + print("Throughput: {:.3f} images/sec".format(1. / latency)) + acc = metric.result() + return acc + +def main(_): + from neural_compressor.utils import set_random_seed + set_random_seed(9527) + if FLAGS.tune: + from neural_compressor import quantization + from neural_compressor.config import PostTrainingQuantConfig + conf = PostTrainingQuantConfig(calibration_sampling_size=[50, 100]) + q_model = quantization.fit(FLAGS.input_model, conf=conf, calib_dataloader=calib_dataloader, + eval_func=evaluate) + q_model.save(FLAGS.output_model) + + if FLAGS.benchmark: + from neural_compressor.benchmark import fit + from neural_compressor.config import BenchmarkConfig + if FLAGS.mode == 'performance': + conf = BenchmarkConfig(cores_per_instance=4, num_of_instance=7) + fit(FLAGS.input_model, conf, b_func=evaluate) + else: + from neural_compressor.model import Model + model = Model(FLAGS.input_model).model + accuracy = evaluate(model) + print('Batch size = %d' % FLAGS.batch_size) + print("Accuracy: %.5f" % accuracy) + +if __name__ == "__main__": + tf.compat.v1.app.run() diff --git a/examples/tensorflow/image_recognition/keras_models/resnet101/quantization/ptq/prepare_model.py b/examples/tensorflow/image_recognition/keras_models/resnet101/quantization/ptq/prepare_model.py index 552a0942157..3c16f5353be 100644 --- a/examples/tensorflow/image_recognition/keras_models/resnet101/quantization/ptq/prepare_model.py +++ b/examples/tensorflow/image_recognition/keras_models/resnet101/quantization/ptq/prepare_model.py @@ -1,35 +1,35 @@ -# -# -*- coding: utf-8 -*- -# -# Copyright (c) 2022 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import argparse -import tensorflow as tf -def get_resnet101_model(saved_path): - model = tf.keras.applications.ResNet101(weights='imagenet') - model.save(saved_path) - -if __name__ == "__main__": - parser = argparse.ArgumentParser( - description='Export pretained keras model', - formatter_class=argparse.ArgumentDefaultsHelpFormatter) - parser.add_argument( - '--output_model', - type=str, - help='path to exported model file') - - args = parser.parse_args() - get_resnet101_model(args.output_model) +# +# -*- coding: utf-8 -*- +# +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import argparse +import tensorflow as tf +def get_resnet101_model(saved_path): + model = tf.keras.applications.ResNet101(weights='imagenet') + model.save(saved_path) + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description='Export pretained keras model', + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument( + '--output_model', + type=str, + help='path to exported model file') + + args = parser.parse_args() + get_resnet101_model(args.output_model) diff --git a/examples/tensorflow/image_recognition/keras_models/xception/quantization/ptq/README.md b/examples/tensorflow/image_recognition/keras_models/xception/quantization/ptq/README.md deleted file mode 100644 index bacddbc01c4..00000000000 --- a/examples/tensorflow/image_recognition/keras_models/xception/quantization/ptq/README.md +++ /dev/null @@ -1,82 +0,0 @@ -Step-by-Step -============ - -This document is used to enable Tensorflow Keras xception model quantization and benchmark using Intel® Neural Compressor. -This example can run on Intel CPUs and GPUs. - - -# Prerequisite - -## 1. Environment - -### Installation -```shell -# Install Intel® Neural Compressor -pip install neural-compressor -``` -### Install Intel Tensorflow -```shell -pip install intel-tensorflow -``` -> Note: Validated TensorFlow [Version](/docs/source/installation_guide.md#validated-software-environment). - -### Install Intel Extension for Tensorflow -#### Quantizing the model on Intel GPU -Intel Extension for Tensorflow is mandatory to be installed for quantizing the model on Intel GPUs. - -```shell -pip install --upgrade intel-extension-for-tensorflow[gpu] -``` -Please refer to the [Installation Guides](https://dgpu-docs.intel.com/installation-guides/ubuntu/ubuntu-focal-dc.html) for latest Intel GPU driver installation. -For any more details, please follow the procedure in [install-gpu-drivers](https://github.com/intel-innersource/frameworks.ai.infrastructure.intel-extension-for-tensorflow.intel-extension-for-tensorflow/blob/master/docs/install/install_for_gpu.md#install-gpu-drivers). - -#### Quantizing the model on Intel CPU(Experimental) -Intel Extension for Tensorflow for Intel CPUs is experimental currently. It's not mandatory for quantizing the model on Intel CPUs. - -```shell -pip install --upgrade intel-extension-for-tensorflow[cpu] -``` -## 2. Prepare Pretrained model - -The pretrained model is provided by [Keras Applications](https://keras.io/api/applications/). prepare the model, Run as follow: - ``` -python prepare_model.py --output_model=/path/to/model - ``` -`--output_model ` the model should be saved as SavedModel format or H5 format. - -## 3. Prepare Dataset - - TensorFlow [models](https://github.com/tensorflow/models) repo provides [scripts and instructions](https://github.com/tensorflow/models/tree/master/research/slim#an-automated-script-for-processing-imagenet-data) to download, process and convert the ImageNet dataset to the TF records format. - We also prepared related scripts in `imagenet_prepare` directory. To download the raw images, the user must create an account with image-net.org. If you have downloaded the raw data and preprocessed the validation data by moving the images into the appropriate sub-directory based on the label (synset) of the image. we can use below command ro convert it to tf records format. - - ```shell - cd examples/tensorflow/image_recognition/keras_models/ - # convert validation subset - bash prepare_dataset.sh --output_dir=/xception/quantization/ptq/data --raw_dir=/PATH/TO/img_raw/val/ --subset=validation - # convert train subset - bash prepare_dataset.sh --output_dir=/xception/quantization/ptq/data --raw_dir=/PATH/TO/img_raw/train/ --subset=train - cd xception/quantization/ptq - ``` - -# Run Command - -## Quantization Config -The Quantization Config class has default parameters setting for running on Intel CPUs. If running this example on Intel GPUs, the 'backend' parameter should be set to 'itex' and the 'device' parameter should be set to 'gpu'. - -``` -config = PostTrainingQuantConfig( - device="gpu", - backend="itex", - ... - ) -``` - -## Quantization - ```shell - bash run_tuning.sh --input_model=./xception_keras/ --output_model=./result --dataset_location=/path/to/evaluation/dataset - ``` - -## Benchmark - ```shell - bash run_benchmark.sh --input_model=./result --mode=accuracy --dataset_location=/path/to/evaluation/dataset --batch_size=32 - bash run_benchmark.sh --input_model=./result --mode=performance --dataset_location=/path/to/evaluation/dataset --batch_size=1 diff --git a/neural_compressor/adaptor/keras.py b/neural_compressor/adaptor/keras.py index 156f451a390..770178429ba 100644 --- a/neural_compressor/adaptor/keras.py +++ b/neural_compressor/adaptor/keras.py @@ -34,7 +34,9 @@ def _add_supported_quantized_objects(custom_objects): """Map all the quantized objects.""" from neural_compressor.adaptor.keras_utils.quantizer import Quantize, DeQuantize - from neural_compressor.adaptor.keras_utils.quantizer import FakeQuant, QConv2D, QDense + from neural_compressor.adaptor.keras_utils.quantizer import FakeQuant + from neural_compressor.adaptor.keras_utils.conv2d import QConv2D + from neural_compressor.adaptor.keras_utils.dense import QDense custom_objects["Quantize"] = Quantize custom_objects["DeQuantize"] = DeQuantize custom_objects["FakeQuant"] = FakeQuant @@ -106,6 +108,93 @@ def tuning_cfg_to_fw(self, tuning_cfg): weight_bit) self.fp32_ops = fp32_ops + def _pre_optimize(self, model): + model = self._fuse_bn(model) + return model + + def _fuse_bn(self, model): + keras_object = model._model_object + json_model = copy.deepcopy(json.loads(keras_object.to_json())) + config = json_model["config"] + fp32_layers = config["layers"] + def fuse_conv_bn(conv_weight, bn_weight, eps=1.0e-5): + gamma = bn_weight[0].reshape(1, 1, 1, bn_weight[0].shape[0]) + beta = bn_weight[1] + mean = bn_weight[2] + var = bn_weight[3].reshape(1, 1, 1, bn_weight[3].shape[0]) + scale_value = gamma / np.sqrt(var + eps) + weight = conv_weight[0] * scale_value + if len(conv_weight) == 1: + bias = np.zeros_like(mean) + else: + bias = conv_weight[1] + bias = beta + (bias - mean) * scale_value + bias = bias.reshape(-1) + return [weight, bias] + + bn_node_map = {} + for idx, layer in enumerate(copy.deepcopy(fp32_layers)): + layer_config = layer['config'] + if layer['class_name'] in ['BatchNormalization'] and 'inbound_nodes' in layer: + bn_node_map[layer['name']] = layer + + fuse_layers = [] + fold_conv = [] + for idx, layer in enumerate(copy.deepcopy(fp32_layers)): + layer_config = layer['config'] + if 'inbound_nodes' in layer: + if layer['class_name'] in ['BatchNormalization']: + bn_inbound_node = bn_node_map[layer_config['name']]['inbound_nodes'][0][0] + if bn_inbound_node[0] in self.conv_weights.keys(): + conv_weight = self.conv_weights[bn_inbound_node[0]] + bn_weight = self.bn_weights[layer_config['name']] + if len(bn_weight) > 3: + self.layer_weights[bn_inbound_node[0]] = \ + fuse_conv_bn(conv_weight, bn_weight) + fold_conv.append(bn_inbound_node[0]) + else: + fuse_layers.append(layer) + # (TODO) DepthwiseConv BN fuse + else: + fuse_layers.append(layer) + elif len(layer['inbound_nodes']): + new_bound_nodes = [] + for bound_node in layer['inbound_nodes'][0]: + if bound_node[0] in self.bn_weights.keys(): + bn_inbound_node = bn_node_map[bound_node[0]]['inbound_nodes'][0][0] + if bn_inbound_node[0] in self.conv_weights.keys(): + new_bound_nodes.append(bn_inbound_node) + else: + new_bound_nodes.append(bound_node) + else: + new_bound_nodes.append(bound_node) + layer['inbound_nodes'] = [new_bound_nodes] + fuse_layers.append(layer) + else: + fuse_layers.append(layer) + else: + if idx > 0 and layer['class_name'] in ['BatchNormalization'] and \ + fp32_layers[idx - 1]['class_name'] in ['Conv2D']: + conv_name = fp32_layers[idx - 1]['config']['name'] + conv_weight = self.conv_weights[conv_name] + bn_weight = self.bn_weights[layer_config['name']] + self.layer_weights[conv_name] = fuse_conv_bn(conv_weight, bn_weight) + fold_conv.append(conv_name) + else: + fuse_layers.append(layer) + + # bn folding will have a shift bias + for idx, layer in enumerate(fuse_layers): + layer_config = layer['config'] + if layer['class_name'] in ['Conv2D'] and layer_config['name'] in fold_conv: + layer_config['use_bias'] = True + + json_model['config']['layers'] = fuse_layers + fused_model = self._restore_model_from_json(json_model) + from neural_compressor.model.keras_model import KerasModel + fused_model = KerasModel(fused_model) + return fused_model + @dump_elapsed_time("Pass quantize model") def quantize(self, tune_cfg, model, dataloader, q_func=None): '''Execute the quantize process on the specified model. @@ -145,22 +234,31 @@ def quantize(self, tune_cfg, model, dataloader, q_func=None): "So the real sampling size is {}.". format(calib_sampling_size, dataloader.batch_size, dataloader.batch_size * iter)) + q_layers = [] - for idx, layer in enumerate(self.fp32_layers): + self.inbound_nodes_map = {} + for idx, layer in enumerate(copy.deepcopy(self.fp32_layers)): layer_config = layer["config"] if layer["class_name"] in ["Conv2D", "Dense"] and \ layer['config']['name'] in self.quantize_config['op_wise_config']: op_config = self.quantize_config['op_wise_config'][layer['config']['name']] mode = 'per_channel' if op_config[0] else 'per_tensor' - #(TODO) support asym/sym - fake_quant_name = 'fake_quant_' + str(idx) - q_layers.append({'class_name': 'FakeQuant', - 'config': {'mode': 'per_tensor', 'name': fake_quant_name}}) + fake_q_name = 'fake_quant_' + str(idx) + fake_q_layer = {'class_name': 'FakeQuant', + 'name': fake_q_name, + 'config': {'mode': 'per_tensor', 'name': fake_q_name}, + } + if 'inbound_nodes' in layer: + fake_q_layer['inbound_nodes'] = layer['inbound_nodes'] + layer['inbound_nodes'] = [[[fake_q_name, 0, 0, {}]]] + self.inbound_nodes_map[fake_q_name] = layer + + q_layers.append(fake_q_layer) q_layers.append(layer) else: q_layers.append(layer) - keras_object = model._model_object + keras_object = self.pre_optimized_model._model_object json_model = copy.deepcopy(json.loads(keras_object.to_json())) json_model['config']['layers'] = q_layers quantized_model = self._restore_model_from_json(json_model) @@ -199,40 +297,85 @@ def _calibrate(self, model, dataloader, calib_interation): config = json_model["config"] layers = config["layers"] q_layers = [] - for layer in layers: + # quantize_mode = self._check_quantize_mode(json_model) + inbound_reverse_map = {} + for idx, layer in enumerate(layers): layer_config = copy.deepcopy(layer['config']) if layer['class_name'] == 'FakeQuant': min_value = min(results[layer['config']['name']]['min']) max_value = max(results[layer['config']['name']]['max']) - q_layers.append({'class_name': 'Quantize', - 'config': {'min_range': min_value, - 'max_range': max_value, - }}) - q_layers.append({'class_name': 'DeQuantize', - 'config': {'min_range': min_value, - 'max_range': max_value, - }}) - elif layer['class_name'] == 'Conv2D' or layer['class_name'] == 'Dense': + quantize_layer = {'class_name': 'Quantize', + 'name': 'quantize_' + str(idx), + 'config': {'min_range': min_value, + 'max_range': max_value, + # 'mode': quantize_mode, + 'name': 'quantize_' + str(idx), + }} + dequantize_layer = {'class_name': 'DeQuantize', + 'name': 'dequantize_' + str(idx), + 'config': {'min_range': min_value, + 'max_range': max_value, + # 'mode': quantize_mode, + 'name': 'dequantize_' + str(idx), + }} + if 'inbound_nodes' in layer: + quantize_layer['inbound_nodes'] = layer['inbound_nodes'] + dequantize_layer['inbound_nodes'] = [[['quantize_' + str(idx), 0, 0, {}]]] + # find the conv/dense layer from fake quant map and + # change the conv/dense node inbound to dequantize + layer_name = self.inbound_nodes_map[layer['name']]['name'] + inbound_reverse_map[layer_name] = [[['dequantize_' + str(idx), 0, 0, {}]]] + + q_layers.append(quantize_layer) + q_layers.append(dequantize_layer) + elif layer['class_name'] in ['Conv2D', 'Dense'] and \ + layer['config']['name'] in self.quantize_config['op_wise_config']: # index 0 is weight, index 1 is bias q_layer_name = 'Q' + layer['class_name'] + # this is for inbounds search + q_name = layer['config']['name'] kernel = self.layer_weights[layer['config']['name']][0] - layer_config['min_value'] = str(kernel.min()) - layer_config['max_value'] = str(kernel.max()) - q_layers.append({'class_name': q_layer_name, 'config': layer_config}) + dim = list(range(0, kernel.ndim)) + t_dim = [dim.pop(-1)] + t_dim.extend(dim) + channel_size = kernel.shape[-1] + kernel_channel = kernel.transpose(t_dim).reshape(channel_size, -1) + layer_config['min_value'] = json.dumps(\ + np.min(kernel_channel, axis=1).tolist()) + layer_config['max_value'] = json.dumps(\ + np.max(kernel_channel, axis=1).tolist()) + layer_config['name'] = q_name + q_layer = {'class_name': q_layer_name, + 'name': q_name, + 'config': layer_config} + if 'inbound_nodes' in layer: + q_layer['inbound_nodes'] = inbound_reverse_map[layer['name']] + q_layers.append(q_layer) else: - q_layers.append(layer) + q_layers.append(layer) json_model['config']['layers'] = q_layers quantized_model = self._restore_model_from_json(json_model) return quantized_model + + #(TODO) choose the properly quantize mode + def _check_quantize_mode(self, json_model): + config = json_model["config"] + layers = config["layers"] + for idx, layer in enumerate(layers): + if 'ReLU' in layer['class_name']: + return 'MIN_FIRST' + return 'SCALED' + def _restore_model_from_json(self, json_model): from tensorflow.keras.models import model_from_json custom_objects = {} # We need to keep a dictionary of custom objects as our quantized library # is not recognized by keras. custom_objects = _add_supported_quantized_objects(custom_objects) - qmodel = model_from_json(json.dumps(json_model), custom_objects=custom_objects) + json_model_file = json.dumps(json_model) + qmodel = model_from_json(json_model_file, custom_objects=custom_objects) qmodel = self._set_weights(qmodel, self.layer_weights) return qmodel @@ -303,7 +446,6 @@ def query_fw_capability(self, model): Args: model (object): The model to query quantization tuning capability. ''' - self.pre_optimized_model = model fp32_config = {'weight': {'dtype': 'fp32'}, 'activation': {'dtype': 'fp32'}} int8_type = self.query_handler.get_op_types_by_precision(precision='int8') op_capability = self.query_handler.get_quantization_capability() @@ -311,18 +453,25 @@ def query_fw_capability(self, model): dense_config = copy.deepcopy(op_capability['int8']['Dense']) other_config = copy.deepcopy(op_capability['int8']['default']) - # get the layers info - keras_object = model._model_object - json_model = copy.deepcopy(json.loads(keras_object.to_json())) - config = json_model["config"] - self.fp32_layers = config["layers"] - # get fp32 layer weights + keras_object = model._model_object + self.conv_weights = {} + self.bn_weights = {} self.layer_weights = {} for layer in keras_object.layers: if layer.get_weights(): + if isinstance(layer, tf.keras.layers.Conv2D): + self.conv_weights[layer.name] = copy.deepcopy(layer.get_weights()) + elif isinstance(layer, tf.keras.layers.BatchNormalization): + self.bn_weights[layer.name] = copy.deepcopy(layer.get_weights()) self.layer_weights[layer.name] = copy.deepcopy(layer.get_weights()) + self.pre_optimized_model = self._pre_optimize(model) + keras_object = self.pre_optimized_model._model_object + json_model = copy.deepcopy(json.loads(keras_object.to_json())) + config = json_model["config"] + self.fp32_layers = config["layers"] + quantizable_op_details = OrderedDict() for details in self.fp32_layers: node_op = details['class_name'] diff --git a/neural_compressor/adaptor/keras_utils/conv2d.py b/neural_compressor/adaptor/keras_utils/conv2d.py new file mode 100644 index 00000000000..abeef818528 --- /dev/null +++ b/neural_compressor/adaptor/keras_utils/conv2d.py @@ -0,0 +1,76 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +import tensorflow as tf +from tensorflow.keras import activations +from tensorflow.keras import constraints +from tensorflow.keras import initializers +from tensorflow.keras import regularizers +from keras.layers.convolutional.base_conv import Conv # pylint: disable=E0401 +from tensorflow import quantization + +class QConv2D(Conv): + def __init__(self, filters, kernel_size, strides=(1, 1), padding='valid', + data_format=None, dilation_rate=(1, 1), groups=1, activation=None, + use_bias=True, kernel_initializer='glorot_uniform', + bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, + activity_regularizer=None, kernel_constraint=None, bias_constraint=None, + min_value=-10000, max_value=10000, **kwargs): + super(QConv2D, self).__init__(rank=2, filters=filters, kernel_size=kernel_size, + strides=strides, padding=padding, data_format=data_format, + dilation_rate=dilation_rate, groups=groups, + activation=activations.get(activation), + use_bias=use_bias, kernel_initializer=initializers.get(kernel_initializer), + bias_initializer=initializers.get(bias_initializer), + kernel_regularizer=regularizers.get(kernel_regularizer), + bias_regularizer=regularizers.get(bias_regularizer), + activity_regularizer=regularizers.get(activity_regularizer), + kernel_constraint=constraints.get(kernel_constraint), + bias_constraint=constraints.get(bias_constraint), **kwargs) + self.min_value = json.loads(min_value) + self.max_value = json.loads(max_value) + + def call(self, inputs): + # add the Q/DQ here + kernel, _, _ = quantization.quantize(self.kernel, self.min_value, + self.max_value, tf.qint8, + axis=3, mode='SCALED') + kernel = quantization.dequantize(kernel, self.min_value, + self.max_value, axis=3, mode='SCALED',) + outputs = tf.keras.backend.conv2d( + inputs, + kernel, + strides=self.strides, + padding=self.padding, + data_format=self.data_format, + dilation_rate=self.dilation_rate) + + if self.use_bias: + outputs = tf.keras.backend.bias_add( + outputs, self.bias, data_format=self.data_format) + + if self.activation is not None: + return self.activation(outputs) + + return outputs + + @classmethod + def from_config(cls, config): + return cls(**config) + + diff --git a/neural_compressor/adaptor/keras_utils/dense.py b/neural_compressor/adaptor/keras_utils/dense.py new file mode 100644 index 00000000000..cf5a6bf70d4 --- /dev/null +++ b/neural_compressor/adaptor/keras_utils/dense.py @@ -0,0 +1,72 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +import tensorflow as tf +from tensorflow.keras import activations +from tensorflow.keras import backend +from tensorflow.keras import constraints +from tensorflow.keras import initializers +from tensorflow.keras import regularizers +from tensorflow.keras.layers import Dense +from tensorflow import quantization + +class QDense(Dense): + def __init__(self, + units, + activation=None, + use_bias=True, + kernel_initializer='glorot_uniform', + bias_initializer='zeros', + kernel_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + kernel_constraint=None, + bias_constraint=None, + min_value=-10000, + max_value=10000, + **kwargs): + super(QDense, self).__init__( + units=units, + activation=activation, + use_bias=use_bias, + kernel_initializer=kernel_initializer, + bias_initializer=bias_initializer, + kernel_regularizer=kernel_regularizer, + bias_regularizer=bias_regularizer, + activity_regularizer=activity_regularizer, + kernel_constraint=kernel_constraint, + bias_constraint=bias_constraint, + **kwargs) + self.min_value = json.loads(min_value) + self.max_value = json.loads(max_value) + + def call(self, inputs): + # add the Q/DQ here + kernel, _, _ = quantization.quantize(self.kernel, self.min_value, + self.max_value, tf.qint8, + axis=1, mode='SCALED',) + kernel = quantization.dequantize(kernel, self.min_value, + self.max_value, axis=1, mode='SCALED',) + outputs = tf.keras.backend.dot(inputs, kernel) + + if self.use_bias: + outputs = tf.keras.backend.bias_add(outputs, self.bias) + if self.activation is not None: + outputs = self.activation(outputs) + return outputs + diff --git a/neural_compressor/adaptor/keras_utils/quantizer.py b/neural_compressor/adaptor/keras_utils/quantizer.py index ad14f1d5f65..1fb60ebc9bc 100644 --- a/neural_compressor/adaptor/keras_utils/quantizer.py +++ b/neural_compressor/adaptor/keras_utils/quantizer.py @@ -15,35 +15,9 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import numpy as np import tensorflow as tf - -from tensorflow.python.eager import context -from tensorflow.python.framework import tensor_shape -from tensorflow.python.keras import activations -from tensorflow.python.keras import backend -from tensorflow.python.keras import constraints -from tensorflow.python.keras import initializers -from tensorflow.python.keras import regularizers -from tensorflow.python.keras.engine.input_spec import InputSpec -from tensorflow.python.keras.utils import conv_utils -from tensorflow.python.keras.utils import tf_utils -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import embedding_ops -from tensorflow.python.ops import gen_math_ops -from tensorflow.python.ops import math_ops -from tensorflow.python.ops import nn -from tensorflow.python.ops import nn_ops -from tensorflow.python.ops import sparse_ops -from tensorflow.python.ops import standard_ops -from tensorflow.python.ops import variable_scope -from tensorflow.python.framework import sparse_tensor -from tensorflow.python.util.tf_export import keras_export - from tensorflow.keras.layers import Layer -from tensorflow.python.keras.layers.convolutional import Conv -from tensorflow.python.keras.layers.core import Dense class FakeQuant(Layer): def __init__(self, mode='per_tensor', **kwargs): @@ -73,9 +47,10 @@ def get_config(self): 'name': self.name} class Quantize(Layer): - def __init__(self, min_range, max_range, T=tf.qint8, mode='SCALED', - round_mode='HALF_AWAY_FROM_ZERO', narrow_range=False, axis=None): - super(Quantize, self).__init__() + def __init__(self, min_range, max_range, T=tf.qint8, mode='SCALED', + round_mode='HALF_AWAY_FROM_ZERO', narrow_range=False, + axis=None, **kwargs): + super(Quantize, self).__init__(**kwargs) self.min_range = float(min_range) self.max_range = float(max_range) self.T = T @@ -100,156 +75,11 @@ def get_config(self): def from_config(cls, config): return cls(**config) -class QConv2D(Conv): - def __init__(self, filters, kernel_size, strides=(1, 1), padding='valid', - data_format=None, dilation_rate=(1, 1), groups=1, activation=None, - use_bias=True, kernel_initializer='glorot_uniform', - bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, - activity_regularizer=None, kernel_constraint=None, bias_constraint=None, - min_value=-10000, max_value=10000, **kwargs): - super(QConv2D, self).__init__(rank=2, filters=filters, kernel_size=kernel_size, - strides=strides, padding=padding, data_format=data_format, - dilation_rate=dilation_rate, groups=groups, - activation=activations.get(activation), - use_bias=use_bias, kernel_initializer=initializers.get(kernel_initializer), - bias_initializer=initializers.get(bias_initializer), - kernel_regularizer=regularizers.get(kernel_regularizer), - bias_regularizer=regularizers.get(bias_regularizer), - activity_regularizer=regularizers.get(activity_regularizer), - kernel_constraint=constraints.get(kernel_constraint), - bias_constraint=constraints.get(bias_constraint), **kwargs) - self.weight_quantizer = Quantize(float(min_value), float(max_value)) - self.weight_dequantizer = DeQuantize(float(min_value), float(max_value)) - - def call(self, inputs): # pragma: no cover - # This call method will be automatically decorated by tf.function in TF2 - # and will not be observed by coverage check - input_shape = inputs.shape - - if self._is_causal: # Apply causal padding to inputs for Conv1D. - inputs = array_ops.pad(inputs, self._compute_causal_padding(inputs)) - - # add the Q/DQ here - kernel = self.weight_quantizer(self.kernel) - kernel = self.weight_dequantizer(kernel) - outputs = self._convolution_op(inputs, kernel) - - if self.use_bias: - output_rank = outputs.shape.rank - if self.rank == 1 and self._channels_first: - # nn.bias_add does not accept a 1D input tensor. - bias = array_ops.reshape(self.bias, (1, self.filters, 1)) - outputs += bias - else: - # Handle multiple batch dimensions. - if output_rank is not None and output_rank > 2 + self.rank: - - def _apply_fn(o): - return nn.bias_add(o, self.bias, data_format=self._tf_data_format) - - outputs = conv_utils.squeeze_batch_dims( - outputs, _apply_fn, inner_rank=self.rank + 1) - else: - outputs = nn.bias_add( - outputs, self.bias, data_format=self._tf_data_format) - - if not context.executing_eagerly(): - # Infer the static output shape: - out_shape = self.compute_output_shape(input_shape) - outputs.set_shape(out_shape) - - if self.activation is not None: - return self.activation(outputs) - return outputs - -class QDense(Dense): - def __init__(self, - units, - activation=None, - use_bias=True, - kernel_initializer='glorot_uniform', - bias_initializer='zeros', - kernel_regularizer=None, - bias_regularizer=None, - activity_regularizer=None, - kernel_constraint=None, - bias_constraint=None, - min_value=-10000, - max_value=10000, - **kwargs): - super(QDense, self).__init__( - units=units, - activation=activation, - use_bias=use_bias, - kernel_initializer=kernel_initializer, - bias_initializer=bias_initializer, - kernel_regularizer=kernel_regularizer, - bias_regularizer=bias_regularizer, - activity_regularizer=activity_regularizer, - kernel_constraint=kernel_constraint, - bias_constraint=bias_constraint, - **kwargs) - self.weight_quantizer = Quantize(float(min_value), float(max_value)) - self.weight_dequantizer = DeQuantize(float(min_value), float(max_value)) - - def call(self, inputs): # pragma: no cover - # This call method will be automatically decorated by tf.function in TF2 - # and will not be observed by coverage check - if inputs.dtype.base_dtype != self._compute_dtype_object.base_dtype: - inputs = math_ops.cast(inputs, dtype=self._compute_dtype_object) - - # add the Q/DQ here - # (TODO) we have not try sparse dense and may have issues - kernel = self.weight_quantizer(self.kernel) - kernel = self.weight_dequantizer(kernel) - rank = inputs.shape.rank - if rank == 2 or rank is None: - # We use embedding_lookup_sparse as a more efficient matmul operation for - # large sparse input tensors. The op will result in a sparse gradient, as - # opposed to sparse_ops.sparse_tensor_dense_matmul which results in dense - # gradients. This can lead to sigfinicant speedups, see b/171762937. - if isinstance(inputs, sparse_tensor.SparseTensor): - # We need to fill empty rows, as the op assumes at least one id per row. - inputs, _ = sparse_ops.sparse_fill_empty_rows(inputs, 0) - # We need to do some munging of our input to use the embedding lookup as - # a matrix multiply. We split our input matrix into separate ids and - # weights tensors. The values of the ids tensor should be the column - # indices of our input matrix and the values of the weights tensor - # can continue to the actual matrix weights. - # The column arrangement of ids and weights - # will be summed over and does not matter. See the documentation for - # sparse_ops.sparse_tensor_dense_matmul a more detailed explanation - # of the inputs to both ops. - ids = sparse_tensor.SparseTensor( - indices=inputs.indices, - values=inputs.indices[:, 1], - dense_shape=inputs.dense_shape) - weights = inputs - outputs = embedding_ops.embedding_lookup_sparse_v2( - kernel, ids, weights, combiner='sum') - else: - outputs = gen_math_ops.MatMul(a=inputs, b=kernel) - # Broadcast kernel to inputs. - else: - outputs = standard_ops.tensordot(inputs, kernel, [[rank - 1], [0]]) - # Reshape the output back to the original ndim of the input. - if not context.executing_eagerly(): - shape = inputs.shape.as_list() - output_shape = shape[:-1] + [kernel.shape[-1]] - outputs.set_shape(output_shape) - - if self.use_bias: - outputs = nn_ops.bias_add(outputs, self.bias) - - if self.activation is not None: - outputs = self.activation(outputs) - return outputs - class DeQuantize(Layer): def __init__(self, min_range, max_range, mode='SCALED', - narrow_range=False, axis=None): - super(DeQuantize, self).__init__() + narrow_range=False, axis=None, **kwargs): + super(DeQuantize, self).__init__(**kwargs) self.min_range = min_range self.max_range = max_range self.mode = mode diff --git a/neural_compressor/model/keras_model.py b/neural_compressor/model/keras_model.py index 1cbf50857f8..4ecf31ebdc4 100644 --- a/neural_compressor/model/keras_model.py +++ b/neural_compressor/model/keras_model.py @@ -20,7 +20,7 @@ import os from abc import abstractmethod from neural_compressor.model.base_model import BaseModel -from neural_compressor.utils.utility import LazyImport +from neural_compressor.utils.utility import LazyImport, compute_sparsity tf = LazyImport('tensorflow') class KerasModel(BaseModel): @@ -78,3 +78,76 @@ def _export( def framework(self): """Return framework.""" return 'keras' + + def get_all_weight_names(self): + """Get weight names of model. + + Returns: + list: weight names list. + """ + names = [] + for index, layer in enumerate(self.model.layers): + if len(layer.weights): + names.append(index) + return names + + def report_sparsity(self): + """Get sparsity of the model. + + Returns: + df (DataFrame): DataFrame of sparsity of each weight. + total_sparsity (float): total sparsity of model. + """ + import pandas as pd + import tensorflow as tf + import numpy as np + df = pd.DataFrame(columns=['Name', 'Shape', 'NNZ (dense)', 'NNZ (sparse)', "Sparsity(%)", + 'Std', 'Mean', 'Abs-Mean']) + pd.set_option('display.precision', 2) + param_dims = [2, 4] + params_size = 0 + sparse_params_size = 0 + for index, layer in enumerate(self.model.layers): + if not len(layer.weights): + continue + # Extract just the actual parameter's name, which in this context we treat + # as its "type" + weights = layer.get_weights()[0] + if weights.ndim in param_dims: + param_size, sparse_param_size, dense_param_size = compute_sparsity( + weights) + density = dense_param_size / param_size + params_size += param_size + sparse_params_size += sparse_param_size + df.loc[len(df.index)] = ([ + index, + list(weights.shape), + dense_param_size, + sparse_param_size, + (1 - density) * 100, + np.std(weights), + np.mean(weights), + np.mean(np.abs(weights)) + ]) + + total_sparsity = sparse_params_size / params_size * 100 + + df.loc[len(df.index)] = ([ + 'Total sparsity:', + params_size, + "-", + int(sparse_params_size), + total_sparsity, + 0, 0, 0]) + + return df, total_sparsity + + @property + def input_node_names(self): + """Return input node names.""" + return [] + + @property + def output_node_names(self): + """Return output node names.""" + return [] diff --git a/neural_compressor/model/model.py b/neural_compressor/model/model.py index 6a53bb2592a..8cb3e286d13 100644 --- a/neural_compressor/model/model.py +++ b/neural_compressor/model/model.py @@ -180,9 +180,9 @@ def __new__(cls, root, **kwargs): model_type = kwargs['modelType'] else: model_type = get_model_type(root) + if model_type == 'keras': + return MODELS['keras'](root, **kwargs) model = MODELS['tensorflow'](model_type, root, **kwargs) - elif backend == 'keras': - model = MODELS['keras'](root, **kwargs) else: model = MODELS[backend](root, **kwargs) return model diff --git a/test/itex/test_keras_in_keras_out.py b/test/itex/test_keras_in_keras_out.py index 064e08aaa30..d455e3d1703 100644 --- a/test/itex/test_keras_in_keras_out.py +++ b/test/itex/test_keras_in_keras_out.py @@ -145,7 +145,7 @@ def test_keras_in_keras_out(self): for layer in model.layers: if 'quantize' in layer.name: found_quantize = True - if 'de_quantize' in layer.name: + if 'dequantize' in layer.name: found_dequantize = True self.assertEqual(found_quantize, True) self.assertEqual(found_dequantize, True)