forked from intel/neural-compressor
-
Notifications
You must be signed in to change notification settings - Fork 0
/
inc_quantize_model.py
68 lines (47 loc) · 2.04 KB
/
inc_quantize_model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
"""
Environment Setting
Enable Intel Optimized TensorFlow 2.6.0 and newer by setting environment variable TF_ENABLE_ONEDNN_OPTS=1
That will accelerate training and inference, and it's mandatory requirement of running Intel® Neural Compressor quantize Fp32 model or deploying the quantized model.
"""
import neural_compressor as inc
print("neural_compressor version {}".format(inc.__version__))
import tensorflow as tf
print("tensorflow {}".format(tf.__version__))
from neural_compressor.experimental import Quantization, common
import numpy as np
import tensorflow_datasets as tfds
# define class number
class_num=3
# define input image size and class number
w=h=32
def preprocess(image, label):
image = tf.cast(image, tf.float32)/255.0
return tf.image.resize(image, [w, h]), tf.one_hot(label, class_num)
def load_raw_dataset():
raw_datasets, raw_info = tfds.load(name = 'beans', with_info = True,
as_supervised = True,
split = ['train', 'test'])
return raw_datasets, raw_info
class Dataset(object):
def __init__(self):
datasets , info = load_raw_dataset()
self.test_dataset = [preprocess(v, l) for v,l in datasets[-1]]
def __getitem__(self, index):
return self.test_dataset[index]
def __len__(self):
return len(list(self.test_dataset))
def auto_tune(input_graph_path, yaml_config, batch_size, int8_pb_file):
quantizer = Quantization(yaml_config)
dataset = Dataset()
quantizer.calib_dataloader = common.DataLoader(dataset, batch_size=batch_size)
quantizer.eval_dataloader = common.DataLoader(dataset, batch_size=batch_size)
quantizer.model = common.Model(input_graph_path)
q_model = quantizer.fit()
return q_model
yaml_file = "vgg19.yaml"
batch_size = 32
model_fp32_path="model_keras.fp32"
int8_pb_file = "model_pb.int8"
q_model = auto_tune(model_fp32_path, yaml_file, batch_size, int8_pb_file)
q_model.save(int8_pb_file)
print("Save quantized model to {}".format(int8_pb_file))