[6/6] Arm(R) Ethos(TM)-U NPU codegen integration with tvmc (#8854)

* Add Arm(R) Ethos(TM)-U codegen support on tvmc * Include `ethos-u` as a new target for tvmc * Adds testing for the new target Co-authored-by: Manupa Karunaratne <[email protected]> * Add Arm(R) Ethos(TM)-U codegen support on tvmc * move partition_for_ethosu from tvm.relay.backend.contrib.ethosu to tvm.relay.op.contrib.ethosu * lazy load ethos-u-vela dependencies and show an appropriate error message in case the dependency is not present * Adjust test cases Co-authored-by: Leandro Nunes <[email protected]> * Add Arm(R) Ethos(TM)-U codegen support on tvmc * add missing import Change-Id: Ieefa0ee6e86bdc09ff93fcc632ed003b5f3f3a99 Co-authored-by: Manupa Karunaratne <[email protected]>
apache · Sep 30, 2021 · dda75fe · dda75fe
1 parent 3d64783
commit dda75fe
Show file tree

Hide file tree

Showing 9 changed files with 128 additions and 62 deletions.
diff --git a/python/tvm/driver/tvmc/composite_target.py b/python/tvm/driver/tvmc/composite_target.py
@@ -25,6 +25,7 @@
 from tvm.relay.op.contrib.arm_compute_lib import partition_for_arm_compute_lib
 from tvm.relay.op.contrib.ethosn import partition_for_ethosn
 from tvm.relay.op.contrib.cmsisnn import partition_for_cmsisnn
+from tvm.relay.op.contrib.ethosu import partition_for_ethosu
 from tvm.relay.op.contrib.bnns import partition_for_bnns
 from tvm.relay.op.contrib.vitis_ai import partition_for_vitis_ai
 
@@ -58,6 +59,10 @@
         "config_key": "relay.ext.ethos-n.options",
         "pass_pipeline": partition_for_ethosn,
     },
+    "ethos-u": {
+        "config_key": "relay.ext.ethosu.options",
+        "pass_pipeline": partition_for_ethosu,
+    },
     "bnns": {
         "config_key": None,
         "pass_pipeline": partition_for_bnns,

diff --git a/python/tvm/relay/backend/contrib/ethosu/__init__.py b/python/tvm/relay/backend/contrib/ethosu/__init__.py
@@ -22,4 +22,3 @@
 from . import codegen
 from . import vela_api
 from . import tir_to_cs_translator
-from .util import partition_for_ethosu
diff --git a/python/tvm/relay/backend/contrib/ethosu/util.py b/python/tvm/relay/backend/contrib/ethosu/util.py
@@ -23,13 +23,11 @@
 
 from inspect import signature
 from enum import Enum
-from typing import Union, Tuple, Dict, Optional
+from typing import Union, Tuple
 import numpy as np  # type: ignore
 
 import tvm  # type: ignore
 from tvm import relay
-from tvm.relay.build_module import bind_params_by_name  # type: ignore
-from tvm.relay.backend.contrib.ethosu import preprocess  # type: ignore
 
 
 class QConv2DArgs(Enum):
@@ -145,41 +143,6 @@ def get_accelerator_config():
     return compiler_attrs.accelerator_config
 
 
-# pylint: disable=unused-argument
-def partition_for_ethosu(
-    mod: tvm.ir.IRModule, params: Optional[Dict[str, tvm.runtime.NDArray]] = None, **opts
-):
-    """This helper function partition the relay graph as produced by the
-    relay frontend for a given model into external functions
-    to be presented to the codegen.
-
-    Parameters
-    ----------
-    mod : tvm.ir.IRModule
-        The IRModule that gets generated from a relay frontend
-    params : Optional[Dict[str, tvm.runtime.NDArray]]
-        Constant input parameters.
-
-    Returns
-    -------
-    mod : IRModule
-        The partitioned IRModule with external global functions
-    """
-    if params:
-        mod["main"] = bind_params_by_name(mod["main"], params)
-
-    pattern = relay.op.contrib.get_pattern_table("ethosu")
-    mod = relay.transform.InferType()(mod)
-    mod = relay.transform.MergeComposite(pattern)(mod)
-    mod = relay.transform.AnnotateTarget("ethosu")(mod)
-    mod = relay.transform.MergeCompilerRegions()(mod)
-    mod = relay.transform.InferType()(mod)
-    mod = relay.transform.PartitionGraph()(mod)
-    mod = relay.transform.InferType()(mod)
-    mod = preprocess.preprocess_ext_io()(mod)
-    return mod
-
-
 def get_arg_count(func):
     """Helper function to get the number of
     arguments in a python function"""

diff --git a/python/tvm/relay/op/contrib/ethosu.py b/python/tvm/relay/op/contrib/ethosu.py
@@ -14,19 +14,51 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
+# pylint: disable=ungrouped-imports
 """Arm(R) Ethos(TM)-U NPU supported operators."""
-from typing import List, Tuple, Callable
+import functools
+
+from typing import Dict, List, Tuple, Callable, Optional
 import numpy as np  # type: ignore
 
 import tvm  # type: ignore
+from tvm import relay
 from tvm.relay.expr import Constant  # type: ignore
 from tvm.relay.op.contrib.register import register_pattern_table  # type: ignore
 from tvm.relay.dataflow_pattern import wildcard, is_op, is_constant  # type: ignore
-from tvm.relay.backend.contrib.ethosu.util import QConv2DArgs  # type: ignore
-from tvm.relay.backend.contrib.ethosu.util import BiasAddArgs
-from tvm.relay.backend.contrib.ethosu.util import RequantArgs
-from tvm.relay.backend.contrib.ethosu.util import get_dim_value
-from ethosu.vela import api as vapi  # type: ignore
+from tvm.relay.build_module import bind_params_by_name  # type: ignore
+
+try:
+    # As ethos-u-vela package is an optional TVM dependency, we want to lazy load it
+    # and check whether it is installed or not.
+    #
+    # In order to show the appropriate error messages when we try to invoke code that
+    # rely on imports from ethos-u-vela, we protect them with the decorator @requires_vela
+    # implemented below.
+    from ethosu.vela import api as vapi  # type: ignore
+    from tvm.relay.backend.contrib.ethosu import preprocess
+    from tvm.relay.backend.contrib.ethosu.util import QConv2DArgs  # type: ignore
+    from tvm.relay.backend.contrib.ethosu.util import BiasAddArgs
+    from tvm.relay.backend.contrib.ethosu.util import RequantArgs
+    from tvm.relay.backend.contrib.ethosu.util import get_dim_value
+except ImportError:
+    vapi = None
+
+
+def requires_vela(func):
+    """Decorator to check whether we have the required dependency ethos-u-vela
+    installed as a python package"""
+
+    @functools.wraps(func)
+    def wrapper(*args, **kwargs):
+        if not vapi:
+            raise ImportError(
+                "The 'ethos-u-vela' python package is required for the Arm(R) Ethos(TM)-U NPU "
+                "backend. Please install the dependency using your Python package manager."
+            ) from None
+        return func(*args, **kwargs)
+
+    return wrapper
 
 
 class TensorParams:
@@ -36,6 +68,7 @@ class TensorParams:
     for the creation of tensors in Vela.
     """
 
+    @requires_vela
     def __init__(self, tensor, layout=None, scale=None, zero_point=None):
         self.tensor = tensor
         if isinstance(tensor, Constant):
@@ -148,6 +181,7 @@ class QnnConv2DParams:
     padding_bounds = [31, 31, 32, 32]
     activation_map = {"clip": "CLIP"}
 
+    @requires_vela
     def __init__(self, func_body: tvm.relay.Function):
         activation = None
         if str(func_body.op) in self.activation_map.keys():
@@ -247,3 +281,39 @@ def pattern_table() -> List[Tuple[str, tvm.relay.dataflow_pattern.DFPattern, Cal
             lambda pat: QnnConv2DParams(pat).is_valid(),
         )
     ]
+
+
+# pylint: disable=unused-argument
+@requires_vela
+def partition_for_ethosu(
+    mod: tvm.ir.IRModule, params: Optional[Dict[str, tvm.runtime.NDArray]] = None, **opts
+):
+    """This helper function partition the relay graph as produced by the
+    relay frontend for a given model into external functions
+    to be presented to the codegen.
+
+    Parameters
+    ----------
+    mod : tvm.ir.IRModule
+        The IRModule that gets generated from a relay frontend
+    params : Optional[Dict[str, tvm.runtime.NDArray]]
+        Constant input parameters.
+
+    Returns
+    -------
+    mod : IRModule
+        The partitioned IRModule with external global functions
+    """
+    if params:
+        mod["main"] = bind_params_by_name(mod["main"], params)
+
+    pattern = relay.op.contrib.get_pattern_table("ethosu")
+    mod = relay.transform.InferType()(mod)
+    mod = relay.transform.MergeComposite(pattern)(mod)
+    mod = relay.transform.AnnotateTarget("ethosu")(mod)
+    mod = relay.transform.MergeCompilerRegions()(mod)
+    mod = relay.transform.InferType()(mod)
+    mod = relay.transform.PartitionGraph()(mod)
+    mod = relay.transform.InferType()(mod)
+    mod = preprocess.preprocess_ext_io()(mod)
+    return mod
diff --git a/tests/python/contrib/test_ethosu/test_codegen.py b/tests/python/contrib/test_ethosu/test_codegen.py
@@ -27,6 +27,7 @@
 from tvm import relay
 from tvm.relay.backend.contrib import ethosu
 from tvm.relay.backend.contrib.ethosu import util
+from tvm.relay.op.contrib.ethosu import partition_for_ethosu
 from tests.python.relay.aot.aot_test_utils import generate_ref_data
 
 from . import relay_ir_builder
@@ -139,7 +140,7 @@ def create_graph_activation(input_tensor_name, input_tensor_shape, input_tensor_
     for test_case in test_cases:
         relay_module, conv_params = test_case[0](*test_case[1])
         input_tensor, input_shape, input_dtype = test_case[1]
-        mod = ethosu.partition_for_ethosu(relay_module)
+        mod = partition_for_ethosu(relay_module)
 
         # Generate reference data
         in_min, in_max = util.get_range_for_dtype_str(input_dtype)

diff --git a/tests/python/contrib/test_ethosu/test_legalize.py b/tests/python/contrib/test_ethosu/test_legalize.py
@@ -294,7 +294,7 @@ def verify_linear(ext_func, conv2d_params):
     ]
     for test_case in test_cases:
         mod, conv_params = test_case[0](*test_case[1])
-        mod = ethosu.partition_for_ethosu(mod)
+        mod = partition_for_ethosu(mod)
         mod = legalize.LegalizeEthosUConv2D()(mod)
         verify_linear(mod["tvmgen_default_ethosu_main_0"], conv_params)
 
@@ -327,7 +327,7 @@ def create_graph_single_unsupported_ifm_layout(
 
     for test_case in test_cases:
         mod, conv_params = test_case[0](*test_case[1])
-        mod = ethosu.partition_for_ethosu(mod)
+        mod = partition_for_ethosu(mod)
         with pytest.raises(
             tvm._ffi.base.TVMError, match="EthosUCodegenError: Unsupported Layout NCHW"
         ):

diff --git a/tests/python/contrib/test_ethosu/test_networks.py b/tests/python/contrib/test_ethosu/test_networks.py
@@ -27,8 +27,9 @@
 import tvm
 import tvm.micro as micro
 from tvm import relay
-from tvm.relay.backend.contrib import ethosu
 from tvm.relay.backend.contrib.ethosu import util
+from tvm.relay.op.contrib.ethosu import partition_for_ethosu
+
 import tvm.relay.testing.tf as tf_testing
 
 from . import infra
@@ -56,7 +57,7 @@ def test_forward_mobilenet_v1(accel_type="ethos-u55-256"):
     input_data = {input_tensor: input_data}
     output_data = generate_ref_data(relay_mod, input_data)
 
-    mod = ethosu.partition_for_ethosu(relay_mod, params)
+    mod = partition_for_ethosu(relay_mod, params)
     compiled_models = infra.build_source(mod, input_data, output_data, accel_type)
     infra.verify_source(compiled_models, accel_type)
 

diff --git a/tests/python/contrib/test_ethosu/test_vela_api.py b/tests/python/contrib/test_ethosu/test_vela_api.py
@@ -347,9 +347,7 @@ def verify(test_vec, mock_obj):
         assert mock_obj.call_args[1]["block_traversal"] == test_vec["block_traversal"]
 
     def create_mock(test_vec):
-        with patch(
-            "tvm.relay.backend.contrib.ethosu.vela_api.vapi.npu_encode_weights"
-        ) as mock_npu_encode_weights:
+        with patch("ethosu.vela.api.npu_encode_weights") as mock_npu_encode_weights:
             ifm_bitdepth = np.iinfo(test_vec["ifm_dtype"]).bits
             ifm_dtype = test_vec["ifm_dtype"]
             max = np.iinfo(ifm_dtype).max
@@ -427,9 +425,7 @@ def verify(test_vec, mock_obj, packed_biases):
             assert test_vec["hw_shifts"][idx] == mock_obj.call_args_list[idx][0][2]
 
     def create_mock(test_vec):
-        with patch(
-            "tvm.relay.backend.contrib.ethosu.vela_api.vapi.npu_encode_bias"
-        ) as mock_npu_encode_bias:
+        with patch("ethosu.vela.api.npu_encode_bias") as mock_npu_encode_bias:
             mock_npu_encode_bias.return_value = bytearray(10)
             ifm_dtype = test_vec["ifm_dtype"]
             max = np.iinfo(ifm_dtype).max
@@ -507,12 +503,8 @@ def test_encode_weights(accel):
     ]
 
     def create_mock(test_vec):
-        with patch(
-            "tvm.relay.backend.contrib.ethosu.vela_api.vapi.npu_encode_weights"
-        ) as mock_enc_w:
-            with patch(
-                "tvm.relay.backend.contrib.ethosu.vela_api.vapi.npu_find_block_configs"
-            ) as mock_blk_cfg:
+        with patch("ethosu.vela.api.npu_encode_weights") as mock_enc_w:
+            with patch("ethosu.vela.api.npu_find_block_configs") as mock_blk_cfg:
                 mock_blk_cfg.return_value = [vapi.NpuShape3D(8, 8, 8)]
                 ethosu_conv2d_calls = extract_ethosu_conv2d_extern_calls(test_vec["tir_module"])
                 buffer_info = tirtocs.extract_buffer_info(

diff --git a/tests/python/driver/tvmc/test_compiler.py b/tests/python/driver/tvmc/test_compiler.py
@@ -361,6 +361,41 @@ def test_compile_tflite_module_with_external_codegen_vitis_ai(tflite_mobilenet_v
     assert os.path.exists(dumps_path)
 
 
+def test_compile_tflite_module_with_external_codegen_ethosu(
+    tmpdir_factory, tflite_mobilenet_v1_1_quant
+):
+    pytest.importorskip("tflite")
+    pytest.importorskip("ethosu.vela")
+    ACCEL_TYPES = ["ethos-u55-256", "ethos-u55-128", "ethos-u55-64", "ethos-u55-32"]
+
+    output_dir = tmpdir_factory.mktemp("mlf")
+
+    tvmc_model = tvmc.load(tflite_mobilenet_v1_1_quant)
+
+    for accel_type in ACCEL_TYPES:
+        output_file_name = f"{output_dir}/file_{accel_type}.tar"
+
+        tvmc_package = tvmc.compiler.compile_model(
+            tvmc_model,
+            target=f"ethos-u -accelerator_config={accel_type}, c -runtime=c --system-lib --link-params -mcpu=cortex-m55 --executor=aot",
+            output_format="mlf",
+            package_path=output_file_name,
+            pass_context_configs=["tir.disable_vectorize=true"],
+        )
+
+        # check whether an MLF package was created
+        assert os.path.exists(output_file_name)
+
+        # check whether the expected number of C sources are in the tarfile
+        with tarfile.open(output_file_name) as mlf_package:
+            c_source_files = [
+                name
+                for name in mlf_package.getnames()
+                if re.match(r"\./codegen/host/src/\D+\d+\.c", name)
+            ]
+            assert len(c_source_files) == 17
+
+
 @mock.patch("tvm.relay.build")
 @mock.patch("tvm.driver.tvmc.composite_target.get_codegen_by_target")
 @mock.patch("tvm.driver.tvmc.load")