Skip to content

Commit

Permalink
Retain qnn input kernel scales (#4292)
Browse files Browse the repository at this point in the history
* Add qnn conv2d attributes for input_tensor_scale and
kernel_tensor_scale.

The lowering in the tflite frontend loses the input_tensor_scale
and the kernel_tensor_scale by multiplying it and putting it into
the Requantize operation. This means that any graph partitioning
passes or other passes that need to access this information no longer
have it available in the qnn dialect.

regards
Ramana

* Store input tensor scale and Weight tensor scale for Dense as well

As for conv2d, the tflite frontend drops the input tensor
scale and the weight tensor scale from the relay op. Store
it as separate fields in there.

* Fix unintentional tab

* Rename input_tensor_scale to input_scale and kernel_tensor_scale
to kernel_scale for conv2d.

* input_tensor_scale -> input_scale weight_tensor_scale->weight_scale

* Rework dense testcase

And use input_scale and kernel_scale

* Be consistent in use of input_scale and kernel_scale values

* Fixup qnn conv2d tests for input_scale and kernel_scale

* Make pydoc identical between conv2d and dense for weight_tensor

* Fix up conv2d parameters to be in the same order between C++ and python

* Fix ordering of parameters for dense.

* Add input_scale and output_scale to try and satisfy ci gods

* Delete input_scale and kernel_scale.

nn.conv2d does not contain input_scale and kernel_scale. We need
to delete it when lowering it to nn.conv2d.

* Add input_scale and kernel_scale for qnn.conv2d
  • Loading branch information
Ramana Radhakrishnan authored and zhiics committed Nov 16, 2019
1 parent 560280d commit 3ba9dd0
Show file tree
Hide file tree
Showing 9 changed files with 134 additions and 7 deletions.
14 changes: 14 additions & 0 deletions include/tvm/relay/qnn/attrs.h
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,10 @@ struct QnnConv2DAttrs : public tvm::AttrsNode<QnnConv2DAttrs> {
// Quantization related attributes.
int32_t input_zero_point;
int32_t kernel_zero_point;
// The input tensor scale and kernel tensor scales are stored
// for easy access to this information.
double input_scale;
double kernel_scale;

TVM_DECLARE_ATTRS(QnnConv2DAttrs, "relay.attrs.QnnConv2DAttrs") {
TVM_ATTR_FIELD(strides).set_default(Array<IndexExpr>({1, 1}))
Expand Down Expand Up @@ -177,6 +181,10 @@ struct QnnConv2DAttrs : public tvm::AttrsNode<QnnConv2DAttrs> {
.describe("The zero point of the input tensor.");
TVM_ATTR_FIELD(kernel_zero_point)
.describe("The zero point of the kernel tensor.");
TVM_ATTR_FIELD(input_scale)
.describe("The quantization scale for the input tensor.");
TVM_ATTR_FIELD(kernel_scale)
.describe("The quantization scale for the weight tensor.");
}
};

Expand Down Expand Up @@ -212,6 +220,8 @@ struct QnnDenseAttrs : public tvm::AttrsNode<QnnDenseAttrs> {
// Quantization related attributes.
int32_t input_zero_point;
int32_t kernel_zero_point;
double input_scale;
double kernel_scale;

TVM_DECLARE_ATTRS(QnnDenseAttrs, "relay.attrs.QnnDenseAttrs") {
TVM_ATTR_FIELD(units)
Expand All @@ -222,6 +232,10 @@ struct QnnDenseAttrs : public tvm::AttrsNode<QnnDenseAttrs> {
.describe("The zero point of the input tensor.");
TVM_ATTR_FIELD(kernel_zero_point)
.describe("The zero point of the kernel tensor.");
TVM_ATTR_FIELD(input_scale)
.describe("The input tensor scale.");
TVM_ATTR_FIELD(kernel_scale)
.describe("The kernel tensor scale.");
}
};

Expand Down
6 changes: 6 additions & 0 deletions python/tvm/relay/frontend/tflite.py
Original file line number Diff line number Diff line change
Expand Up @@ -729,9 +729,13 @@ def convert_fully_connected(self, op):
weight_expr = self.exp_tab.new_const(weight_value, dtype=weight_tensor_type_str)

if input_tensor.qnn_params:
input_scale = input_tensor.qnn_params['scale']
kernel_scale = weight_tensor.qnn_params['scale']
out = _qnn.op.dense(in_expr, weight_expr,
input_zero_point=input_tensor.qnn_params['zero_point'],
kernel_zero_point=weight_tensor.qnn_params['zero_point'],
input_scale=input_scale,
kernel_scale=kernel_scale,
out_dtype='int32')
else:
out = _op.nn.dense(in_expr, weight_expr)
Expand Down Expand Up @@ -935,6 +939,8 @@ def convert_conv(self, op, conv_type):
qnn_conv2d_params['input_zero_point'] = input_tensor.qnn_params['zero_point']
qnn_conv2d_params['kernel_zero_point'] = weight_tensor.qnn_params['zero_point']
qnn_conv2d_params['out_dtype'] = 'int32'
qnn_conv2d_params['input_scale'] = input_tensor.qnn_params['scale']
qnn_conv2d_params['kernel_scale'] = weight_tensor.qnn_params['scale']
out = _qnn.op.conv2d(in_expr, weight_expr, **qnn_conv2d_params)
else:
out = _op.nn.conv2d(in_expr, weight_expr, **params)
Expand Down
2 changes: 2 additions & 0 deletions python/tvm/relay/qnn/op/legalizations.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,8 @@ def helper_no_fast_int8_hw_legalization(attrs, inputs, types, relay_op):
new_attrs = {k : attrs[k] for k in attrs.keys()}
del new_attrs['kernel_zero_point']
del new_attrs['input_zero_point']
del new_attrs['input_scale']
del new_attrs['kernel_scale']
return relay_op(shift_data, shift_kernel, **new_attrs)

# Helper function to change dtypes to uint8 x int8. Intel VNNI instructions prefer this setting.
Expand Down
30 changes: 29 additions & 1 deletion python/tvm/relay/qnn/op/qnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,8 @@ def conv2d(data,
kernel,
input_zero_point,
kernel_zero_point,
input_scale,
kernel_scale,
strides=(1, 1),
padding=(0, 0),
dilation=(1, 1),
Expand Down Expand Up @@ -219,6 +221,16 @@ def conv2d(data,
input_zero_point: int
The zero point of the data distribution.
input_scale: float
The scale for the input tensor. The scale for the input tensor is
stored purely for convenience here. See more commentary below.
kernel_scale: float
The scale for the weight tensor. The scale for the weight tensor is
stored for access to this during relay. This information is not
needed in the pass pipeline after qnn.conv2d is lowered to the
sequence of steps as in nn.conv2d. See also input_scale in Requantize.
kernel_zero_point: int
The zero point of the quantized_kernel distribution.
Expand Down Expand Up @@ -260,6 +272,7 @@ def conv2d(data,

return _make.conv2d(data, kernel,
input_zero_point, kernel_zero_point,
input_scale, kernel_scale,
strides, padding, dilation,
groups, channels, kernel_size,
data_layout, kernel_layout, out_layout, out_dtype)
Expand Down Expand Up @@ -317,6 +330,8 @@ def dense(data,
weight,
input_zero_point,
kernel_zero_point,
input_scale,
kernel_scale,
units=None,
out_dtype="int32"):
"""Qnn Dense operator.
Expand All @@ -332,6 +347,17 @@ def dense(data,
The quantized input data to the operator.
weight : tvm.relay.Expr
The quantized weight expressions.
input_zero_point: int
The input zero point.
kernel_zero_point: int
The kernel zero point.
input_scale: float
The scale for the input tensor.
kernel_scale: float
The scale for the weight tensor. The scale for the weight tensor is
stored for access to this during relay. This information is not
needed in the pass pipeline after qnn.conv2d is lowered to the
sequence of steps as in nn.conv2d. See also input_scale in Requantize.
units : int, optional
Number of hidden units of the dense transformation.
out_dtype : str, optional
Expand All @@ -345,9 +371,11 @@ def dense(data,

return _make.dense(data,
weight,
units,
input_zero_point,
kernel_zero_point,
input_scale,
kernel_scale,
units,
out_dtype)


Expand Down
5 changes: 4 additions & 1 deletion src/relay/qnn/op/convolution.cc
Original file line number Diff line number Diff line change
Expand Up @@ -440,7 +440,8 @@ Expr QnnConv2DCanonicalize(const Attrs& attrs, const Array<Expr>& new_args,
// Positional relay function to create quantized conv2d operator
// used by frontend FFI.
Expr MakeQnnConv2D(Expr data, Expr weight, int32_t input_zero_point, int32_t kernel_zero_point,
Array<IndexExpr> strides, Array<IndexExpr> padding, Array<IndexExpr> dilation,
double input_scale, double kernel_scale, Array<IndexExpr> strides,
Array<IndexExpr> padding, Array<IndexExpr> dilation,
int groups, IndexExpr channels, Array<IndexExpr> kernel_size,
std::string data_layout, std::string kernel_layout, std::string out_layout,
DataType out_dtype) {
Expand All @@ -457,6 +458,8 @@ Expr MakeQnnConv2D(Expr data, Expr weight, int32_t input_zero_point, int32_t ker
attrs->out_dtype = std::move(out_dtype);
attrs->input_zero_point = std::move(input_zero_point);
attrs->kernel_zero_point = std::move(kernel_zero_point);
attrs->input_scale = std::move(input_scale);
attrs->kernel_scale = std::move(kernel_scale);
static const Op& op = Op::Get("qnn.conv2d");
return CallNode::make(op, {data, weight}, Attrs(attrs), {});
}
Expand Down
8 changes: 6 additions & 2 deletions src/relay/qnn/op/dense.cc
Original file line number Diff line number Diff line change
Expand Up @@ -57,13 +57,17 @@ bool QnnDenseRel(const Array<Type>& types, int num_inputs, const Attrs& attrs,
}

// Positional relay function to create quantized dense operator used by frontend FFI.
Expr MakeQuantizedDense(Expr data, Expr weight, IndexExpr units, int32_t input_zero_point,
int32_t kernel_zero_point, DataType out_dtype) {
Expr MakeQuantizedDense(Expr data, Expr weight, int32_t input_zero_point,
int32_t kernel_zero_point, double input_scale,
double kernel_scale, IndexExpr units,
DataType out_dtype) {
auto attrs = make_node<QnnDenseAttrs>();
attrs->units = std::move(units);
attrs->out_dtype = out_dtype;
attrs->input_zero_point = input_zero_point;
attrs->kernel_zero_point = kernel_zero_point;
attrs->input_scale = input_scale;
attrs->kernel_scale = kernel_scale;
static const Op& op = Op::Get("qnn.dense");
return CallNode::make(op, {data, weight}, Attrs(attrs), {});
}
Expand Down
Loading

0 comments on commit 3ba9dd0

Please sign in to comment.