Add the Arm(R) Ethos(TM)-U NPU identity operator (apache#9457)

* Add the Arm(R) Ethos(TM)-U NPU identity operator * Add the ethosu.identity operator which returns the input tensor * Add an opportunity to requantize the tensor * Add legalization for reshape and strided slice * Add a pass that puts an indentity op after a no-op Change-Id: I0adb5ca269f8529c79e0e7681ca4b5147d8f53c8 * Fix the pylint errors Change-Id: Icc9b6507f164681a5d6b1fcff2ae4a5051d44734 * Changes in response to review comments Change-Id: I63f30f84ad481789fc047ad8c2107f5313562f7f
mehrdadh · Dec 1, 2021 · 0e7635a · 0e7635a
1 parent 4d5c56a
commit 0e7635a
Show file tree

Hide file tree

Showing 17 changed files with 1,211 additions and 13 deletions.
diff --git a/python/tvm/relay/backend/contrib/ethosu/legalize.py b/python/tvm/relay/backend/contrib/ethosu/legalize.py
@@ -631,6 +631,116 @@ def __call__(self, *args, **kwargs):
         pass
 
 
+class StridedSliceRewriter(DFPatternCallback):
+    """This pass brings the strided slice out of the partitioned function"""
+
+    def __init__(self):
+        super().__init__(require_type=True, rewrite_once=True)
+        self.pattern = (
+            wildcard().has_attr({"Composite": ethosu_patterns.StridedSliceParams.composite_name})
+        )(wildcard())
+
+    def callback(
+        self, pre: tvm.relay.Expr, post: tvm.relay.Expr, node_map: tvm.ir.container.Map
+    ) -> tvm.relay.Expr:
+
+        slice_input = post.args[0]
+        params = ethosu_patterns.StridedSliceParams(post.op.body)
+        strided_slice = relay.op.strided_slice(
+            slice_input,
+            params.begin,
+            params.end,
+            strides=params.strides,
+            axes=params.axes,
+            slice_mode=params.slice_mode,
+        )
+        return strided_slice
+
+
+@ir.transform.module_pass(opt_level=1)
+class LegalizeStridedSlice:
+    """This is the pass that wraps StridedSliceRewriter"""
+
+    def transform_module(
+        self, mod: tvm.ir.IRModule, ctx: tvm.ir.transform.PassContext
+    ) -> tvm.ir.IRModule:
+        for global_var, func in mod.functions.items():
+            func = rewrite(StridedSliceRewriter(), func)
+            mod.update_func(global_var, func)
+        return mod
+
+    def __call__(self, *args, **kwargs):
+        pass
+
+
+class ReshapeRewriter(DFPatternCallback):
+    """This pass brings the reshape out of the partitioned function"""
+
+    def __init__(self):
+        super().__init__(require_type=True, rewrite_once=True)
+        self.pattern = (
+            wildcard().has_attr({"Composite": ethosu_patterns.ReshapeParams.composite_name})
+        )(wildcard())
+
+    def callback(
+        self, pre: tvm.relay.Expr, post: tvm.relay.Expr, node_map: tvm.ir.container.Map
+    ) -> tvm.relay.Expr:
+        reshape_input = post.args[0]
+        reshape_params = ethosu_patterns.ReshapeParams(post.op.body)
+        new_shape = reshape_params.new_shape
+        return relay.op.reshape(reshape_input, newshape=new_shape)
+
+
+@ir.transform.module_pass(opt_level=1)
+class LegalizeReshape:
+    """This is the pass that wraps ReshapeRewriter"""
+
+    def transform_module(
+        self, mod: tvm.ir.IRModule, ctx: tvm.ir.transform.PassContext
+    ) -> tvm.ir.IRModule:
+        for global_var, func in mod.functions.items():
+            func = rewrite(ReshapeRewriter(), func)
+            mod.update_func(global_var, func)
+        return mod
+
+    def __call__(self, *args, **kwargs):
+        pass
+
+
+class NoOpRewriter(DFPatternCallback):
+    """This pass adds an idenity operator to reshape and strided slice to avoid a no op
+    without a consumer"""
+
+    def __init__(self):
+        super().__init__(require_type=True, rewrite_once=True)
+        self.reshape = is_op("reshape")(wildcard())
+        self.strided_slice = is_op("strided_slice")(wildcard())
+        self.pattern = self.reshape | self.strided_slice
+
+    def callback(
+        self, pre: tvm.relay.Expr, post: tvm.relay.Expr, node_map: tvm.ir.container.Map
+    ) -> tvm.relay.Expr:
+        if pre.checked_type.dtype == "int32":
+            return post
+        return ethosu_ops.ethosu_identity(ifm=post, lut=relay.const([], dtype="int8"))
+
+
+@ir.transform.module_pass(opt_level=1)
+class LegalizeNoOps:
+    """This is the pass that wraps RewriteNoOps"""
+
+    def transform_module(
+        self, mod: tvm.ir.IRModule, ctx: tvm.ir.transform.PassContext
+    ) -> tvm.ir.IRModule:
+        for global_var, func in mod.functions.items():
+            func = rewrite(NoOpRewriter(), func)
+            mod.update_func(global_var, func)
+        return mod
+
+    def __call__(self, *args, **kwargs):
+        pass
+
+
 @ir.transform.module_pass(opt_level=1)
 class LegalizeEthosU:
     """This is the pass to call graph-rewrites to perform graph transformation
@@ -655,6 +765,9 @@ def transform_module(
         mod = LegalizeMin()(mod)
         mod = LegalizeMax()(mod)
         mod = LegalizeShl()(mod)
+        mod = LegalizeReshape()(mod)
+        mod = LegalizeStridedSlice()(mod)
+        mod = LegalizeNoOps()(mod)
         return mod
 
     def __call__(self, *args, **kwargs):

diff --git a/python/tvm/relay/backend/contrib/ethosu/op/__init__.py b/python/tvm/relay/backend/contrib/ethosu/op/__init__.py
@@ -20,3 +20,4 @@
 from .depthwise import ethosu_depthwise_conv2d
 from .pooling import ethosu_pooling
 from .binary_elementwise import ethosu_binary_elementwise
+from .identity import ethosu_identity
diff --git a/python/tvm/relay/backend/contrib/ethosu/op/identity.py b/python/tvm/relay/backend/contrib/ethosu/op/identity.py
@@ -0,0 +1,98 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# pylint: disable=unused-argument
+"""Relay identity operator for Arm(R) Ethos(TM)-U NPU"""
+
+import tvm
+from tvm.relay.op import _make
+from tvm.topi.generic import schedule_injective
+from tvm.relay.op.op import OpStrategy
+from tvm.relay.op import strategy as _strategy
+
+from ..te import identity_compute
+
+
+@tvm.ir.register_op_attr("contrib.ethosu.identity", "FTVMCompute")
+def create_ethosu_identity_compute(attrs, args, out_type):
+    """Create an ethosu_identity compute op."""
+    ifm = args[0]
+    lut = args[1]
+    ifm_scale = attrs.ifm_scale
+    ifm_zero_point = attrs.ifm_zero_point
+    ofm_scale = attrs.ofm_scale
+    ofm_zero_point = attrs.ofm_zero_point
+    activation = attrs.activation
+    op = identity_compute(
+        ifm, lut, ifm_scale, ifm_zero_point, ofm_scale, ofm_zero_point, activation
+    )
+    return [op]
+
+
+@tvm.ir.register_op_attr("contrib.ethosu.identity", "FTVMStrategy")
+def identity_strategy_ethosu(attrs, inputs, out_type, target):
+    strategy = OpStrategy()
+    strategy.add_implementation(
+        create_ethosu_identity_compute,
+        _strategy.wrap_topi_schedule(schedule_injective),
+        name="ethosu_identity",
+    )
+    return strategy
+
+
+def ethosu_identity(
+    ifm: tvm.relay.Expr,
+    lut: tvm.relay.Expr,
+    ifm_scale: float = 1,
+    ifm_zero_point: int = 0,
+    ofm_scale: float = 1,
+    ofm_zero_point: int = 0,
+    activation: str = "NONE",
+) -> tvm.relay.Call:
+    """The Identity operator that runs on the NPU.
+
+    This operator takes in a tensor of any shape and returns the same tensor,
+    with the data optionally requantized.
+
+    Parameters
+    ----------
+    ifm : tvm.relay.Expr
+        The Input Feature Map tensor (IFM).
+    lut : tvm.relay.Expr
+        The look-up table values to use if activation = "LUT", "TANH" or "SIGMOID".
+    ifm_scale : float
+        The quantization scale for the Input Feature Map tensor.
+    ifm_zero_point : int
+        The quantization zero point for the Input Feature Map tensor.
+    ofm_scale : float
+        The quantization scale for the Output Feature Map tensor.
+    ofm_zero_point : int
+       The quantization zero point for the Output Feature Map tensor.
+    activation : str, optional
+        The activation function to use.
+            "NONE" - no activation function.
+            "TANH" - tanh activation function.
+            "SIGMOID" - sigmoid activation function.
+            "LUT" - use a look-up table to perform the activation function.
+
+    Returns
+    -------
+    out : tvm.relay.Call
+        A call to the ethosu_identity op.
+    """
+    return _make.ethosu_identity(
+        ifm, lut, ifm_scale, ifm_zero_point, ofm_scale, ofm_zero_point, activation
+    )
diff --git a/python/tvm/relay/backend/contrib/ethosu/te/__init__.py b/python/tvm/relay/backend/contrib/ethosu/te/__init__.py
@@ -20,3 +20,4 @@
 from .depthwise import *
 from .pooling import *
 from .binary_elementwise import *
+from .identity import *
diff --git a/python/tvm/relay/backend/contrib/ethosu/te/dma.py b/python/tvm/relay/backend/contrib/ethosu/te/dma.py
@@ -67,63 +67,74 @@ def _pad(*indices):
     return _pad
 
 
-def read_compute(tensor: te.Tensor, layout: str, zero_point: int, scale: float) -> te.Tensor:
+def read_compute(
+    tensor: te.Tensor, zero_point: int, scale: float, layout: Optional[str] = None
+) -> te.Tensor:
     """A tensor expression which represents a read.
 
     Parameters
     ----------
     tensor : te.Tensor
         The tensor to read.
-    layout : str
-        The layout of the tensor, either NHWC or NHCWB16.
     zero_point : int
         The zero point of the tensor.
     scale : float
         The scale of the tensor.
+    layout : Optional[str]
+        The layout of the tensor, either NHWC or NHCWB16.
 
     Returns
     -------
     te.Tensor
         The tensor having been read.
 
     """
-    assert layout in {"NHWC", "NHCWB16"}
     read_attrs = {
         "op": "ethosu_read",
-        "layout": layout,
         "zero_point": zero_point,
         "scale": scale,
     }
+
+    if layout:
+        assert layout in {"NHWC", "NHCWB16"}
+        read_attrs["layout"] = layout
+
     return te.compute(tensor.shape, lambda *i: tensor(*i), name="ethosu_read", attrs=read_attrs)
 
 
-def write_compute(tensor: te.Tensor, layout: str, zero_point: int, scale: float) -> te.Tensor:
+def write_compute(
+    tensor: te.Tensor, zero_point: int, scale: float, layout: Optional[str] = None
+) -> te.Tensor:
     """A tensor expression which represents a write.
 
     Parameters
     ----------
     tensor : te.Tensor
         The tensor to write.
-    layout : str
-        The layout of the tensor, either NHWC or NHCWB16.
     zero_point : int
         The zero point of the tensor.
     scale : float
         The scale of the tensor.
+    layout : Optional[str]
+        The layout of the tensor, either NHWC or NHCWB16.
 
     Returns
     -------
     te.Tensor
         The tensor having been written.
 
     """
-    assert layout in {"NHWC", "NHCWB16"}
+
     write_attrs = {
         "op": "ethosu_write",
-        "layout": layout,
         "zero_point": zero_point,
         "scale": scale,
     }
+
+    if layout:
+        assert layout in {"NHWC", "NHCWB16"}
+        write_attrs["layout"] = layout
+
     return te.compute(
         tensor.shape,
         lambda *i: tensor(*i),
@@ -278,7 +289,7 @@ def dma_ifm_compute(
         The dma-ed IFM tensor.
 
     """
-    read_ifm = read_compute(ifm, layout, zero_point, scale)
+    read_ifm = read_compute(ifm, zero_point, scale, layout=layout)
     convert_to_nhwc_ifm = convert_to_nhwc_compute(read_ifm, layout, channels)
     return pad_compute(convert_to_nhwc_ifm, padding)
 
@@ -308,4 +319,4 @@ def dma_ofm_compute(
 
     """
     convert_to_nhcwb16_ofm = convert_to_nhcwb16_compute(ofm, layout, channels)
-    return write_compute(convert_to_nhcwb16_ofm, layout, zero_point, scale)
+    return write_compute(convert_to_nhcwb16_ofm, zero_point, scale, layout=layout)