Skip to content

Commit

Permalink
[BYOC][ACL] Add maximum support for float32 (apache#6506)
Browse files Browse the repository at this point in the history
* ACL integration: add maximum support for float32.

* Added the code generation flow in arm_compute_lib.py
* Added the runtime calls in acl_runtime.cc

Change-Id: I69c5522f05a46c1dd235da5d57fe499134de0425

* Add maximum to the list of supported functions

Change-Id: Ia49087756be4c3ac92a3dc76fe03fb00de468f8d
  • Loading branch information
Giuseppe Rossini authored and Tushar Dey committed Oct 14, 2020
1 parent ae88b98 commit f382d8d
Show file tree
Hide file tree
Showing 4 changed files with 132 additions and 1 deletion.
2 changes: 2 additions & 0 deletions docs/deploy/arm_compute_lib.rst
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,8 @@ Operator support
+----------------------+-------------------------------------------------------------------------+
| reshape | fp32, uint8 |
+----------------------+-------------------------------------------------------------------------+
| maximum | fp32 |
+----------------------+-------------------------------------------------------------------------+

.. note::
A composite operator is a series of operators that map to a single Arm Compute Library operator. You can view this
Expand Down
8 changes: 8 additions & 0 deletions python/tvm/relay/op/contrib/arm_compute_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,3 +337,11 @@ def global_avg_pool2d(attrs, args):
if attrs.layout != "NHWC":
return False
return True


@tvm.ir.register_op_attr("maximum", "target.arm_compute_lib")
def maximum(attrs, args):
"""Check if the external ACL codegen for maximum should be used."""
type_a = args[0].checked_type
type_b = args[0].checked_type
return (type_a.dtype == "float32") and (type_b.dtype == "float32")
19 changes: 18 additions & 1 deletion src/runtime/contrib/arm_compute_lib/acl_runtime.cc
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
#ifdef TVM_GRAPH_RUNTIME_ARM_COMPUTE_LIB
#include <arm_compute/core/Types.h>
#include <arm_compute/runtime/NEON/functions/NEConvolutionLayer.h>
#include <arm_compute/runtime/NEON/functions/NEElementwiseOperations.h>
#include <arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h>
#include <arm_compute/runtime/NEON/functions/NEPoolingLayer.h>
#include <arm_compute/runtime/NEON/functions/NEReshapeLayer.h>
Expand Down Expand Up @@ -139,12 +140,13 @@ class ACLRuntime : public JSONRuntimeBase {
CreateGlobalPoolingLayer(&layer_, node);
} else if ("reshape" == op_name) {
CreateReshapeLayer(&layer_, node);
} else if ("maximum" == op_name) {
CreateMaximumLayer(&layer_, node);
} else {
LOG(FATAL) << "Unsupported op: " << op_name;
}
}
}

this->layer_.function->prepare();
if (num_pools > 0) mm->populate(this->allocator_, num_pools);
}
Expand Down Expand Up @@ -401,6 +403,21 @@ class ACLRuntime : public JSONRuntimeBase {
layer->function = function;
}

/*!
* \brief Create a maximum layer.
*
* \param layer The ACL layer to build. Containing inputs, outputs and the ACL function.
* \param node The JSON representation of the operator.
*/
void CreateMaximumLayer(CachedLayer* layer, const JSONGraphNode& node) {
layer->inputs.push_back(MakeACLTensorFromJSONEntry(node.GetInputs()[0]));
layer->inputs.push_back(MakeACLTensorFromJSONEntry(node.GetInputs()[1]));
layer->outputs.push_back(MakeACLTensorFromJSONNode(node));
auto function = std::make_shared<arm_compute::NEElementwiseMax>();
function->configure(&layer->inputs[0], &layer->inputs[1], &layer->outputs[0]);
layer->function = function;
}

/*! \brief Allow ACL functions to request auxiliary memory from TVM. */
ACLAllocator allocator_;
/*!
Expand Down
104 changes: 104 additions & 0 deletions tests/python/contrib/test_arm_compute_lib/test_maximum.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Arm Compute Library integration reshape tests."""

import numpy as np

import tvm
from tvm import relay

from .infrastructure import (
skip_runtime_test,
skip_codegen_test,
build_and_run,
verify,
verify_codegen,
)
from .infrastructure import Device


def _get_model(input_shape, dtype, var_names):
"""Return a model and any parameters it may have."""
a = relay.var(next(var_names), shape=input_shape, dtype=dtype)
b = relay.var(next(var_names), shape=input_shape, dtype=dtype)
max = relay.maximum(a, b)
return max


def _get_expected_codegen(shape, dtype):
node = {
"op": "kernel",
"name": "maximum",
"inputs": [[0, 0, 0], [1, 0, 0]],
"attrs": {
"num_inputs": "2",
"num_outputs": "1",
"shape": [[list(shape)]],
"dtype": [[dtype]],
},
}

inputs = [
{"op": "input", "name": "", "attrs": {"shape": [[list(shape)]], "dtype": [[dtype]]}},
{"op": "input", "name": "", "attrs": {"shape": [[list(shape)]], "dtype": [[dtype]]}},
]
inputs.append(node)
return inputs


def test_maximum():
Device.load("test_config.json")

if skip_runtime_test():
return

device = Device()
np.random.seed(0)

for dtype, low, high, atol, rtol in [
("float32", -127, 128, 0.001, 0.001),
("float32", -1, 1, 0.001, 0.001),
]:
inputs = {
"a": tvm.nd.array(np.random.uniform(low, high, (100, 100)).astype(dtype)),
"b": tvm.nd.array(np.random.uniform(low, high, (100, 100)).astype(dtype)),
}
outputs = []
func = _get_model(inputs["a"].shape, dtype, iter(inputs))

for acl in [False, True]:
outputs.append(build_and_run(func, inputs, 1, None, device, enable_acl=acl)[0])

verify(outputs, atol=1e-7, rtol=1e-7)


def test_codegen_maximum():
if skip_codegen_test():
return

shape = (100, 100)
inputs = {"a", "b"}
for dtype in ["float32"]:
args = (shape, dtype)
func = _get_model(*args, iter(inputs))
exp_codegen = _get_expected_codegen(*args)
verify_codegen(func, exp_codegen, 1)


if __name__ == "__main__":
test_maximum()
test_codegen_maximum()

0 comments on commit f382d8d

Please sign in to comment.