apache · shoubhik · Jul 8, 2019 · Jul 8, 2019 · Jul 10, 2019 · Jul 10, 2019
diff --git a/docs/langref/relay_op.rst b/docs/langref/relay_op.rst
@@ -198,6 +198,16 @@ This level support backpropagation of broadcast operators. It is temporary.
    tvm.relay.contrib.adaptive_avg_pool2d
 
 
+**Level 11: QNN Dialect Operators**
+
+This level supports quantized operators present in the QNN dialect.
+
+.. autosummary::
+   :nosignatures:
+
+   tvm.relay.qnn.op.requantize
+
+
 Level 1 Definitions
 -------------------
 .. autofunction:: tvm.relay.log
@@ -332,3 +342,8 @@ Level 10 Definitions
 .. autofunction:: tvm.relay.nn.batch_matmul
 .. autofunction:: tvm.relay.contrib.adaptive_max_pool2d
 .. autofunction:: tvm.relay.contrib.adaptive_avg_pool2d
+
+
+Level 11 Definitions
+--------------------
+.. autofunction:: tvm.relay.qnn.op.requantize
diff --git a/include/tvm/relay/qnn/attrs.h b/include/tvm/relay/qnn/attrs.h
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file tvm/relay/qnn/attrs.h
+ * \brief Auxiliary attributes for qnn operators.
+ */
+
+#ifndef TVM_RELAY_QNN_ATTRS_H_
+#define TVM_RELAY_QNN_ATTRS_H_
+
+#include <tvm/attrs.h>
+#include <tvm/relay/base.h>
+#include <string>
+
+namespace tvm {
+namespace relay {
+namespace qnn {
+
+/*! \brief Attribute for requantize operator */
+struct RequantizeAttrs : public tvm::AttrsNode<RequantizeAttrs> {
+  double input_scale;
+  int32_t input_zero_point;
+  double output_scale;
+  int32_t output_zero_point;
+  std::string rounding;
+  DataType out_dtype;
+
+  TVM_DECLARE_ATTRS(RequantizeAttrs, "relay.attrs.RequantizeAttrs") {
+    TVM_ATTR_FIELD(input_scale)
+        .describe("The scale of the input tensor.");
+    TVM_ATTR_FIELD(input_zero_point)
+        .describe("The zero point of the input tensor.");
+    TVM_ATTR_FIELD(output_scale)
+        .describe("The scale of the output tensor.");
+    TVM_ATTR_FIELD(output_zero_point)
+        .describe("The zero point of the output tensor.");
+    TVM_ATTR_FIELD(rounding).set_default("AWAY_FROM_ZERO")
+        .describe("Defines the rounding direction when the value is midway between"
+                  "two representable values. There are two supported modes - UPWARD"
+                  "or AWAY_FROM_ZERO. Both modes behave exactly same except at the"
+                  "midpoints between the two representable values. At the midpoint,"
+                  "UPWARD rounds towards positive infinity (for example -1.5 will be"
+                  "rounded to -1). AWAY_FROM_ZERO is the standard rounding where the"
+                  "value is rounded away from zero at midpoints (for example, -1.5"
+                  "rounds to -2). More context can be found at following gblic manual"
+                  "https://www.gnu.org/software/libc/manual/html_node/Rounding.html."
+                  "FE_UPWARD corresponds to UPWARD here and FE_TONEAREST corresponds"
+                  "to AWAY_FROM_ZERO rounding mode.");
+    TVM_ATTR_FIELD(out_dtype)
+        .set_default(NullValue<DataType>())
+        .describe("Output data type, set to explicit type under mixed precision setting");
+  }
+};
+
+/*! \brief Attributes for quantized dense operator */
+struct QDenseAttrs : public tvm::AttrsNode<QDenseAttrs> {
+  IndexExpr units;
+  DataType out_dtype;
+  // Quantization related attributes.
+  int32_t input_zero_point;
+  int32_t kernel_zero_point;
+
+  TVM_DECLARE_ATTRS(QDenseAttrs, "relay.attrs.QDenseAttrs") {
+    TVM_ATTR_FIELD(units)
+        .describe("Number of hidden units of the dense transformation.");
+
+    TVM_ATTR_FIELD(out_dtype)
+        .describe("Output data type, set to explicit type under mixed precision setting");
+
+    TVM_ATTR_FIELD(input_zero_point)
+        .describe("The zero point of the input tensor.");
+    TVM_ATTR_FIELD(kernel_zero_point)
+        .describe("The zero point of the kernel tensor.");
+  }
+};
+
+}  // namespace qnn
+}  // namespace relay
+}  // namespace tvm
+#endif  // TVM_RELAY_ATTRS_QNN_H_
diff --git a/python/tvm/relay/__init__.py b/python/tvm/relay/__init__.py
@@ -51,6 +51,9 @@
 from . import backend
 from . import quantize
 
+# Dialects
+from . import qnn
+
 from .scope_builder import ScopeBuilder
 
 # Span

diff --git a/python/tvm/relay/qnn/__init__.py b/python/tvm/relay/qnn/__init__.py
@@ -0,0 +1,21 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# pylint: disable=wildcard-import
+"""QNN dialect operators and IR passes."""
+from __future__ import absolute_import as _abs
+from . import op
+from . import transform
diff --git a/python/tvm/relay/qnn/_transform.py b/python/tvm/relay/qnn/_transform.py
@@ -0,0 +1,22 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#pylint: disable=unused-argument
+"""Internal module for quantization."""
+from __future__ import absolute_import
+from tvm._ffi.function import _init_api
+
+_init_api("relay.qnn._transform", __name__)
diff --git a/python/tvm/relay/qnn/op/__init__.py b/python/tvm/relay/qnn/op/__init__.py
@@ -0,0 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# pylint: disable=wildcard-import
+"""Neural network related operators."""
+from __future__ import absolute_import as _abs
+from .qnn import *
diff --git a/python/tvm/relay/qnn/op/_make.py b/python/tvm/relay/qnn/op/_make.py
@@ -0,0 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""Constructor APIs"""
+from ...._ffi.function import _init_api
+
+_init_api("relay.qnn.op._make", __name__)
diff --git a/python/tvm/relay/qnn/op/qnn.py b/python/tvm/relay/qnn/op/qnn.py
@@ -0,0 +1,104 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#pylint: disable=invalid-name
+"""QNN dialect operators."""
+
+from __future__ import absolute_import as _abs
+from . import _make
+
+def requantize(data,
+               input_scale,
+               input_zero_point,
+               output_scale,
+               output_zero_point,
+               rounding="AWAY_FROM_ZERO",
+               out_dtype="int8"):
+    r"""Requantized operator.
+
+    The requantize operator converts one quantized tensor representation to
+    another quantized tensor representation. For the output tensor, we are
+    provided with output scale and zero point. The computation is as follows
+
+    Q_output = zp_output +  (scale_input)/(scale_output) * (Q_input - zp_input)
+
+
+    Parameters
+    ----------
+    data : tvm.relay.Expr
+        The input data to the operator.
+
+    input_scale: float
+           The quantization scale for the input tensor.
+
+    input_zero_point: int
+           The zero point of the input tensor.
+
+    output_scale: float
+           The quantization scale for the output tensor.
+
+    output_zero_point: int
+           The zero point of the output tensor.
+
+    rounding : string, optional
+        Defines the rounding direction when the value is midway between two
+        representable values.
+
+    out_dtype : str, optional
+        Specifies the output data type.
+
+    Returns
+    -------
+    result : tvm.relay.Expr
+        The computed result.
+    """
+
+    return _make.requantize(data,
+                            input_scale,
+                            input_zero_point,
+                            output_scale,
+                            output_zero_point,
+                            rounding,
+                            out_dtype)
+
+def quantized_dense(data, weight, input_zero_point, kernel_zero_point, units=None, out_dtype="int32"):
+    """Dense operator.
+    Applies a linear transformation
+
+    .. math::
+
+    `Y = X * W`
+
+    Parameters
+    ----------
+    data : tvm.relay.Expr
+        The quantied input data to the operator.
+
+    weight : tvm.relay.Expr
+        The quantized weight expressions.
+
+    units : int, optional
+        Number of hidden units of the dense transformation.
+
+    out_dtype : str, optional
+        Specifies the output data type for mixed precision dense can be int32 or int16.
+
+    Returns
+    -------
+    result : tvm.relay.Expr
+        The computed result.
+    """
+    return _make.dense(data, weight, units, input_zero_point, kernel_zero_point, out_dtype)
diff --git a/python/tvm/relay/qnn/transform.py b/python/tvm/relay/qnn/transform.py
@@ -0,0 +1,33 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# pylint: disable=invalid-name
+
+"""QNN Dialect transformation passes."""
+from __future__ import absolute_import
+
+from . import _transform
+
+def QnnLower():
+    """
+    Rewrites the high-level quantized ops into low-level exisiting Relay ops.
+
+    Returns
+    -------
+    Pass : tvm.relay.transform.Pass
+        The optmized pas.
+    """
+    return _transform.QnnLower()