Add Adreno GPU target and topi supporting textures with dynamically a…

…llocated textures (apache#11161) * Add Adreno GPU target and topi supporting textures - There are 5 compute/schedules: conv2d for NCHW/NHWC, depthwise_conv2d for NCHW/NHWC, average pooling - Fix of dynamically allocated textures caching - Add texture-nhwc scope - Fix issue with codegen of vars having non acceptable symbols Co-authored-by: Chris Sullivan <[email protected]> Co-authored-by: Egor Churaev <[email protected]> * Address comments * Add vectorization into some adreno pool flow Co-authored-by: Li <[email protected]> * Fix adreno tests for running on the opencl host platform * remove unnecessary kDriverVersion in DeviceAttrKind * Move utils adreno functinos to separate shared file * fix black hits Co-authored-by: Chris Sullivan <[email protected]> Co-authored-by: Egor Churaev <[email protected]> Co-authored-by: Li <[email protected]>
Deelvin · May 17, 2022 · bc6eef2 · bc6eef2
1 parent 4f4abac
commit bc6eef2
Show file tree

Hide file tree

Showing 24 changed files with 3,903 additions and 1 deletion.
diff --git a/python/tvm/_ffi/runtime_ctypes.py b/python/tvm/_ffi/runtime_ctypes.py
@@ -432,6 +432,17 @@ def driver_version(self):
         """
         return self._GetDeviceAttr(self.device_type, self.device_id, 12)
 
+    def texture_spatial_limit(self):
+        """Returns limits for textures by spatial dimensions
+
+        Returns
+        -------
+        limit : int or None
+            Maximum size of the texture by spatial dimensions
+
+        """
+        return self._GetDeviceAttr(self.device_type, self.device_id, 12)
+
     def create_raw_stream(self):
         """Create a new runtime stream at the context.
 

diff --git a/python/tvm/relay/op/strategy/__init__.py b/python/tvm/relay/op/strategy/__init__.py
@@ -29,3 +29,4 @@
 from . import rocm
 from . import intel_graphics
 from . import hexagon
+from . import adreno
diff --git a/python/tvm/relay/op/strategy/adreno.py b/python/tvm/relay/op/strategy/adreno.py
@@ -0,0 +1,162 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""Definition of adreno operator strategy."""
+# pylint: disable=invalid-name,unused-argument,wildcard-import,unused-wildcard-import
+from tvm import topi
+from .generic import *
+from .. import op as _op
+
+
+@conv2d_NCHWc_strategy.register("adreno")
+@conv2d_strategy.register("adreno")
+def conv2d_strategy_adreno(attrs, inputs, out_type, target):
+    """conv2d adreno strategy"""
+    strategy = _op.OpStrategy()
+    data, kernel = inputs
+    dilation_h, dilation_w = attrs.get_int_tuple("dilation")
+    groups = attrs.groups
+    data_layout = attrs.data_layout
+    kernel_layout = attrs.kernel_layout
+    if dilation_h < 1 or dilation_w < 1:
+        raise ValueError("dilation should be positive value")
+
+    if groups == 1:
+        if (data_layout == "NCHW" and kernel_layout == "OIHW") or (
+            data_layout == "NCHW4c" and kernel_layout == "OIHW4o"
+        ):
+            if out_type.dtype == "float16":
+                strategy.add_implementation(
+                    wrap_compute_conv2d(topi.adreno.conv2d_nchwc),
+                    wrap_topi_schedule(topi.adreno.schedule_conv2d_nchwc),
+                    name="conv2d_nchwc.image2d",
+                    plevel=10,
+                )
+            strategy.add_implementation(
+                wrap_compute_conv2d(topi.adreno.conv2d_nchwc_acc32),
+                wrap_topi_schedule(topi.adreno.schedule_conv2d_nchwc_acc32),
+                name="conv2d_nchwc_tpack.image2d",
+                plevel=20,
+            )
+        elif (data_layout == "NHWC" and kernel_layout == "HWIO") or (
+            data_layout == "NHWC4c" and kernel_layout == "HWIO4o"
+        ):
+            if out_type.dtype == "float16":
+                strategy.add_implementation(
+                    wrap_compute_conv2d(topi.adreno.conv2d_nhwc),
+                    wrap_topi_schedule(topi.adreno.schedule_conv2d_nhwc),
+                    name="conv2d_nhwc.image2d",
+                    plevel=10,
+                )
+            strategy.add_implementation(
+                wrap_compute_conv2d(topi.adreno.conv2d_nhwc_acc32),
+                wrap_topi_schedule(topi.adreno.schedule_conv2d_nhwc_acc32),
+                name="conv2d_nhwc_acc32.image2d",
+                plevel=20,
+            )
+        else:
+            raise RuntimeError(
+                "Layout not supported: ("
+                + data_layout
+                + ", "
+                + kernel_layout
+                + ") - only support NCHW4c / OIHW4o and NHWC / HWOI layouts for conv2d"
+            )
+    else:
+        # cannot use is_depthwise_conv2d because it does not know about NHWC4c/HWOI4o layouts
+        if data_layout == "NCHW":
+            ic = data.shape[1]
+        elif data_layout == "NCHW4c":
+            ic = data.shape[1] * data.shape[4]
+        elif data_layout == "NHWC":
+            ic = data.shape[3]
+        elif data_layout == "NHWC4c":
+            ic = data.shape[3] * data.shape[4]
+        else:
+            raise RuntimeError("Unsupported depthwise_conv2d data layout {}".format(data_layout))
+        if kernel_layout == "OIHW":
+            oc = kernel.shape[0]
+        elif kernel_layout == "OIHW4o":
+            oc = kernel.shape[0] * kernel.shape[4]
+        elif kernel_layout == "HWOI":
+            oc = kernel.shape[2]
+        elif kernel_layout == "HWOI4o":
+            oc = kernel.shape[2] * kernel.shape[4]
+        else:
+            raise RuntimeError(
+                "Unsupported depthwise_conv2d kernel layout {}".format(kernel_layout)
+            )
+
+        if ic == oc == groups:
+            if (data_layout == "NCHW" and kernel_layout == "OIHW") or (
+                data_layout == "NCHW4c" and kernel_layout == "OIHW4o"
+            ):
+                if out_type.dtype == "float16":
+                    strategy.add_implementation(
+                        wrap_compute_conv2d(topi.adreno.depthwise_conv2d_nchwc),
+                        wrap_topi_schedule(topi.adreno.schedule_depthwise_conv2d_nchwc),
+                        name="depthwise_conv2d_nchwc.image2d",
+                        plevel=10,
+                    )
+                strategy.add_implementation(
+                    wrap_compute_conv2d(topi.adreno.depthwise_conv2d_nchwc_acc32),
+                    wrap_topi_schedule(topi.adreno.schedule_depthwise_conv2d_nchwc_acc32),
+                    name="depthwise_conv2d_nchwc_acc32.image2d",
+                    plevel=20,
+                )
+            elif (data_layout == "NHWC" and kernel_layout == "HWOI") or (
+                data_layout == "NHWC4c" and kernel_layout == "HWOI4o"
+            ):
+                if data.shape[-1] >= 4:
+                    if out_type.dtype == "float16":
+                        strategy.add_implementation(
+                            wrap_compute_conv2d(topi.adreno.depthwise_conv2d_nhwc),
+                            wrap_topi_schedule(topi.adreno.schedule_depthwise_conv2d_nhwc),
+                            name="depthwise_conv2d_nhwc.image2d",
+                            plevel=10,
+                        )
+                    strategy.add_implementation(
+                        wrap_compute_conv2d(topi.adreno.depthwise_conv2d_nhwc_acc32),
+                        wrap_topi_schedule(topi.adreno.schedule_depthwise_conv2d_nhwc_acc32),
+                        name="depthwise_conv2d_nhwc_acc32.image2d",
+                        plevel=20,
+                    )
+                else:
+                    strategy.add_implementation(
+                        wrap_compute_conv2d(topi.nn.depthwise_conv2d_nhwc),
+                        wrap_topi_schedule(topi.cuda.schedule_depthwise_conv2d_nhwc),
+                        name="depthwise_conv2d_nhwc.cuda",
+                    )
+            else:
+                raise RuntimeError(
+                    "Layout not supported: ("
+                    + data_layout
+                    + ", "
+                    + kernel_layout
+                    + ") - only support NCHW4c / OIHW4o and NHWC / HWOI layouts for conv2d"
+                )
+        else:
+            raise RuntimeError("General group convolution is not currently supported")
+    return strategy
+
+
+@schedule_pool.register("adreno")
+def schedule_pool_adreno(attrs, outs, target):
+    """schedule pooling ops for adreno"""
+    with target:
+        if attrs.layout == "NCHW4c":
+            return topi.adreno.schedule_pool(outs, attrs.layout)
+        return topi.cuda.schedule_pool(outs, attrs.layout)
diff --git a/python/tvm/target/target.py b/python/tvm/target/target.py
@@ -814,6 +814,20 @@ def stm32(series="unknown", options=None):
     return Target(" ".join(["c"] + opts))
 
 
+def adreno(model="unknown", options=None):
+    """Returns a Qualcomm GPU target.
+    Parameters
+    ----------
+    model: str
+        The model of this device
+    options : str or list of str
+        Additional options
+    """
+    opts = ["-device=adreno", "-model=%s" % model]
+    opts = _merge_opts(opts, options)
+    return Target(" ".join(["opencl"] + opts))
+
+
 def create(target):
     """Deprecated. Use the constructor of :py:mod:`tvm.target.Target` directly."""
     warnings.warn("tvm.target.create() is being deprecated. Please use tvm.target.Target() instead")

diff --git a/python/tvm/topi/__init__.py b/python/tvm/topi/__init__.py
@@ -64,6 +64,7 @@
 from . import hls
 from . import random
 from . import hexagon
+from . import adreno
 
 # error reporting
 from .utils import InvalidShapeError

diff --git a/python/tvm/topi/adreno/__init__.py b/python/tvm/topi/adreno/__init__.py
@@ -0,0 +1,25 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# pylint: disable=redefined-builtin, wildcard-import
+"""Qualcomm Adreno GPU specific declaration and schedules."""
+from .conv2d_nchw import *
+from .depthwise_conv2d_nchw import *
+from .conv2d_nhwc import *
+from .depthwise_conv2d_nhwc import *
+from .pooling import *
+from .conv2d_alter_op import *