Skip to content

Commit

Permalink
Add Adreno GPU target and topi supporting textures with dynamically a…
Browse files Browse the repository at this point in the history
…llocated textures (apache#11161)

* Add Adreno GPU target and topi supporting textures

- There are 5 compute/schedules: conv2d for NCHW/NHWC, depthwise_conv2d
  for NCHW/NHWC, average pooling
- Fix of dynamically allocated textures caching
- Add texture-nhwc scope
- Fix issue with codegen of vars having non acceptable symbols

Co-authored-by: Chris Sullivan <[email protected]>
Co-authored-by: Egor Churaev <[email protected]>

* Address comments

* Add vectorization into some adreno pool flow

Co-authored-by: Li <[email protected]>

* Fix adreno tests for running on the opencl host platform

* remove unnecessary kDriverVersion in DeviceAttrKind

* Move utils adreno functinos to separate shared file

* fix black hits

Co-authored-by: Chris Sullivan <[email protected]>
Co-authored-by: Egor Churaev <[email protected]>
Co-authored-by: Li <[email protected]>
  • Loading branch information
4 people authored and Sergey Shtin committed May 17, 2022
1 parent 4f4abac commit bc6eef2
Show file tree
Hide file tree
Showing 24 changed files with 3,903 additions and 1 deletion.
11 changes: 11 additions & 0 deletions python/tvm/_ffi/runtime_ctypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -432,6 +432,17 @@ def driver_version(self):
"""
return self._GetDeviceAttr(self.device_type, self.device_id, 12)

def texture_spatial_limit(self):
"""Returns limits for textures by spatial dimensions
Returns
-------
limit : int or None
Maximum size of the texture by spatial dimensions
"""
return self._GetDeviceAttr(self.device_type, self.device_id, 12)

def create_raw_stream(self):
"""Create a new runtime stream at the context.
Expand Down
1 change: 1 addition & 0 deletions python/tvm/relay/op/strategy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,4 @@
from . import rocm
from . import intel_graphics
from . import hexagon
from . import adreno
162 changes: 162 additions & 0 deletions python/tvm/relay/op/strategy/adreno.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Definition of adreno operator strategy."""
# pylint: disable=invalid-name,unused-argument,wildcard-import,unused-wildcard-import
from tvm import topi
from .generic import *
from .. import op as _op


@conv2d_NCHWc_strategy.register("adreno")
@conv2d_strategy.register("adreno")
def conv2d_strategy_adreno(attrs, inputs, out_type, target):
"""conv2d adreno strategy"""
strategy = _op.OpStrategy()
data, kernel = inputs
dilation_h, dilation_w = attrs.get_int_tuple("dilation")
groups = attrs.groups
data_layout = attrs.data_layout
kernel_layout = attrs.kernel_layout
if dilation_h < 1 or dilation_w < 1:
raise ValueError("dilation should be positive value")

if groups == 1:
if (data_layout == "NCHW" and kernel_layout == "OIHW") or (
data_layout == "NCHW4c" and kernel_layout == "OIHW4o"
):
if out_type.dtype == "float16":
strategy.add_implementation(
wrap_compute_conv2d(topi.adreno.conv2d_nchwc),
wrap_topi_schedule(topi.adreno.schedule_conv2d_nchwc),
name="conv2d_nchwc.image2d",
plevel=10,
)
strategy.add_implementation(
wrap_compute_conv2d(topi.adreno.conv2d_nchwc_acc32),
wrap_topi_schedule(topi.adreno.schedule_conv2d_nchwc_acc32),
name="conv2d_nchwc_tpack.image2d",
plevel=20,
)
elif (data_layout == "NHWC" and kernel_layout == "HWIO") or (
data_layout == "NHWC4c" and kernel_layout == "HWIO4o"
):
if out_type.dtype == "float16":
strategy.add_implementation(
wrap_compute_conv2d(topi.adreno.conv2d_nhwc),
wrap_topi_schedule(topi.adreno.schedule_conv2d_nhwc),
name="conv2d_nhwc.image2d",
plevel=10,
)
strategy.add_implementation(
wrap_compute_conv2d(topi.adreno.conv2d_nhwc_acc32),
wrap_topi_schedule(topi.adreno.schedule_conv2d_nhwc_acc32),
name="conv2d_nhwc_acc32.image2d",
plevel=20,
)
else:
raise RuntimeError(
"Layout not supported: ("
+ data_layout
+ ", "
+ kernel_layout
+ ") - only support NCHW4c / OIHW4o and NHWC / HWOI layouts for conv2d"
)
else:
# cannot use is_depthwise_conv2d because it does not know about NHWC4c/HWOI4o layouts
if data_layout == "NCHW":
ic = data.shape[1]
elif data_layout == "NCHW4c":
ic = data.shape[1] * data.shape[4]
elif data_layout == "NHWC":
ic = data.shape[3]
elif data_layout == "NHWC4c":
ic = data.shape[3] * data.shape[4]
else:
raise RuntimeError("Unsupported depthwise_conv2d data layout {}".format(data_layout))
if kernel_layout == "OIHW":
oc = kernel.shape[0]
elif kernel_layout == "OIHW4o":
oc = kernel.shape[0] * kernel.shape[4]
elif kernel_layout == "HWOI":
oc = kernel.shape[2]
elif kernel_layout == "HWOI4o":
oc = kernel.shape[2] * kernel.shape[4]
else:
raise RuntimeError(
"Unsupported depthwise_conv2d kernel layout {}".format(kernel_layout)
)

if ic == oc == groups:
if (data_layout == "NCHW" and kernel_layout == "OIHW") or (
data_layout == "NCHW4c" and kernel_layout == "OIHW4o"
):
if out_type.dtype == "float16":
strategy.add_implementation(
wrap_compute_conv2d(topi.adreno.depthwise_conv2d_nchwc),
wrap_topi_schedule(topi.adreno.schedule_depthwise_conv2d_nchwc),
name="depthwise_conv2d_nchwc.image2d",
plevel=10,
)
strategy.add_implementation(
wrap_compute_conv2d(topi.adreno.depthwise_conv2d_nchwc_acc32),
wrap_topi_schedule(topi.adreno.schedule_depthwise_conv2d_nchwc_acc32),
name="depthwise_conv2d_nchwc_acc32.image2d",
plevel=20,
)
elif (data_layout == "NHWC" and kernel_layout == "HWOI") or (
data_layout == "NHWC4c" and kernel_layout == "HWOI4o"
):
if data.shape[-1] >= 4:
if out_type.dtype == "float16":
strategy.add_implementation(
wrap_compute_conv2d(topi.adreno.depthwise_conv2d_nhwc),
wrap_topi_schedule(topi.adreno.schedule_depthwise_conv2d_nhwc),
name="depthwise_conv2d_nhwc.image2d",
plevel=10,
)
strategy.add_implementation(
wrap_compute_conv2d(topi.adreno.depthwise_conv2d_nhwc_acc32),
wrap_topi_schedule(topi.adreno.schedule_depthwise_conv2d_nhwc_acc32),
name="depthwise_conv2d_nhwc_acc32.image2d",
plevel=20,
)
else:
strategy.add_implementation(
wrap_compute_conv2d(topi.nn.depthwise_conv2d_nhwc),
wrap_topi_schedule(topi.cuda.schedule_depthwise_conv2d_nhwc),
name="depthwise_conv2d_nhwc.cuda",
)
else:
raise RuntimeError(
"Layout not supported: ("
+ data_layout
+ ", "
+ kernel_layout
+ ") - only support NCHW4c / OIHW4o and NHWC / HWOI layouts for conv2d"
)
else:
raise RuntimeError("General group convolution is not currently supported")
return strategy


@schedule_pool.register("adreno")
def schedule_pool_adreno(attrs, outs, target):
"""schedule pooling ops for adreno"""
with target:
if attrs.layout == "NCHW4c":
return topi.adreno.schedule_pool(outs, attrs.layout)
return topi.cuda.schedule_pool(outs, attrs.layout)
14 changes: 14 additions & 0 deletions python/tvm/target/target.py
Original file line number Diff line number Diff line change
Expand Up @@ -814,6 +814,20 @@ def stm32(series="unknown", options=None):
return Target(" ".join(["c"] + opts))


def adreno(model="unknown", options=None):
"""Returns a Qualcomm GPU target.
Parameters
----------
model: str
The model of this device
options : str or list of str
Additional options
"""
opts = ["-device=adreno", "-model=%s" % model]
opts = _merge_opts(opts, options)
return Target(" ".join(["opencl"] + opts))


def create(target):
"""Deprecated. Use the constructor of :py:mod:`tvm.target.Target` directly."""
warnings.warn("tvm.target.create() is being deprecated. Please use tvm.target.Target() instead")
Expand Down
1 change: 1 addition & 0 deletions python/tvm/topi/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@
from . import hls
from . import random
from . import hexagon
from . import adreno

# error reporting
from .utils import InvalidShapeError
Expand Down
25 changes: 25 additions & 0 deletions python/tvm/topi/adreno/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

# pylint: disable=redefined-builtin, wildcard-import
"""Qualcomm Adreno GPU specific declaration and schedules."""
from .conv2d_nchw import *
from .depthwise_conv2d_nchw import *
from .conv2d_nhwc import *
from .depthwise_conv2d_nhwc import *
from .pooling import *
from .conv2d_alter_op import *
Loading

0 comments on commit bc6eef2

Please sign in to comment.