diff --git a/CMakeLists.txt b/CMakeLists.txt index aaddebdfe3c57..bdb38e3fb8058 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -70,6 +70,8 @@ tvm_option(USE_CPP_RPC "Build CPP RPC" OFF) tvm_option(USE_TFLITE "Build with tflite support" OFF) tvm_option(USE_TENSORFLOW_PATH "TensorFlow root path when use TFLite" none) tvm_option(USE_COREML "Build with coreml support" OFF) +tvm_option(USE_ACL "Build with Arm Compute Library" OFF) +tvm_option(USE_ACL_GRAPH_RUNTIME "Build with Arm Compute Library graph runtime" OFF) if(USE_CPP_RPC AND UNIX) message(FATAL_ERROR "USE_CPP_RPC is only supported with WIN32. Use the Makefile for non-Windows.") @@ -327,6 +329,7 @@ include(cmake/modules/contrib/HybridDump.cmake) include(cmake/modules/contrib/TFLite.cmake) include(cmake/modules/contrib/TF_TVMDSOOP.cmake) include(cmake/modules/contrib/CoreML.cmake) +include(cmake/modules/contrib/ACL.cmake) include(CheckCXXCompilerFlag) if(NOT MSVC) diff --git a/cmake/config.cmake b/cmake/config.cmake index 1b196922ca059..e59690da2c04e 100644 --- a/cmake/config.cmake +++ b/cmake/config.cmake @@ -184,6 +184,18 @@ set(USE_SORT ON) # Whether use MKL-DNN (DNNL) codegen set(USE_DNNL_CODEGEN OFF) +# Whether to use ACL (Arm Compute Library) codegen +# We provide 2 separate flags since we cannot build the ACL runtime on x86. +# This is useful for cases where you want to cross-compile a relay graph +# on x86 then run on AArch. +# +# USE_ACL - Support for compiling a relay graph offloading supported +# operators to ACL. OFF/ON +# USE_ACL_GRAPH_RUNTIME - Run ACL annotated functions via the ACL +# runtime. OFF/ON/"path/to/ACL" +set(USE_ACL OFF) +set(USE_ACL_GRAPH_RUNTIME OFF) + # Build ANTLR parser for Relay text format # Possible values: # - ON: enable ANTLR by searching default locations (cmake find_program for antlr4 and /usr/local for jar) diff --git a/cmake/modules/contrib/ACL.cmake b/cmake/modules/contrib/ACL.cmake new file mode 100644 index 0000000000000..94db11d1fdf05 --- /dev/null +++ b/cmake/modules/contrib/ACL.cmake @@ -0,0 +1,68 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# We separate the codegen and runtime build since ACL can only be built +# for AArch. In the world where we take the cross compilation approach, +# which is common with arm devices, we need to be able to cross-compile +# a relay graph on x86 for AArch and then run the graph on AArch. +if(USE_ACL) + file(GLOB ACL_RELAY_CONTRIB_SRC src/relay/backend/contrib/acl/*.cc) + file(GLOB ACL_RUNTIME_MODULE src/runtime/contrib/acl/acl_runtime.cc) + list(APPEND COMPILER_SRCS ${ACL_RELAY_CONTRIB_SRC}) + list(APPEND COMPILER_SRCS ${ACL_RUNTIME_MODULE}) + message(STATUS "Build with ACL support...") +endif() + +if(USE_ACL_GRAPH_RUNTIME) + set(ACL_PATH ${CMAKE_CURRENT_SOURCE_DIR}/acl) + # Detect custom ACL path. + if (NOT USE_ACL_GRAPH_RUNTIME STREQUAL "ON") + set(ACL_PATH ${USE_ACL_GRAPH_RUNTIME}) + endif() + + file(GLOB ACL_CONTRIB_SRC src/runtime/contrib/acl/*) + file(GLOB ACL_API src/relay/backend/contrib/acl/acl_api.cc) + + set(ACL_INCLUDE_DIRS ${ACL_PATH}/include ${ACL_PATH}) + include_directories(${ACL_INCLUDE_DIRS}) + + find_library(EXTERN_ACL_COMPUTE_LIB + NAMES arm_compute libarm_compute + HINTS "${ACL_PATH}" "${ACL_PATH}/lib" "${ACL_PATH}/build" + ) + find_library(EXTERN_ACL_COMPUTE_CORE_LIB + NAMES arm_compute_core libarm_compute_core + HINTS "${ACL_PATH}" "${ACL_PATH}/lib" "${ACL_PATH}/build" + ) + find_library(EXTERN_ACL_COMPUTE_GRAPH_LIB + NAMES arm_compute_graph libarm_compute_graph + HINTS "${ACL_PATH}" "${ACL_PATH}/lib" "${ACL_PATH}/build" + ) + + list(APPEND TVM_RUNTIME_LINKER_LIBS ${EXTERN_ACL_COMPUTE_LIB}) + list(APPEND TVM_RUNTIME_LINKER_LIBS ${EXTERN_ACL_COMPUTE_CORE_LIB}) + list(APPEND TVM_RUNTIME_LINKER_LIBS ${EXTERN_ACL_COMPUTE_GRAPH_LIB}) + list(APPEND RUNTIME_SRCS ${ACL_CONTRIB_SRC}) + list(APPEND RUNTIME_SRCS ${ACL_API}) + message(STATUS "Build with ACL graph runtime support: " + ${EXTERN_ACL_COMPUTE_LIB} ", \n" + ${EXTERN_ACL_COMPUTE_CORE_LIB} ", \n" + ${EXTERN_ACL_COMPUTE_GRAPH_LIB}) + + # Set flag to detect ACL graph runtime support. + add_definitions(-DTVM_GRAPH_RUNTIME_ACL) +endif() diff --git a/python/tvm/relay/op/contrib/__init__.py b/python/tvm/relay/op/contrib/__init__.py index 0e1b4b024a5aa..fad7183d92987 100644 --- a/python/tvm/relay/op/contrib/__init__.py +++ b/python/tvm/relay/op/contrib/__init__.py @@ -18,5 +18,6 @@ """Contrib modules.""" from .register import get_pattern_table, register_pattern_table +from .acl import * from .dnnl import * from .coreml import * diff --git a/python/tvm/relay/op/contrib/acl.py b/python/tvm/relay/op/contrib/acl.py new file mode 100644 index 0000000000000..8207575460450 --- /dev/null +++ b/python/tvm/relay/op/contrib/acl.py @@ -0,0 +1,125 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# pylint: disable=invalid-name, unused-argument +"""ACL library supported operators.""" +import tvm +from tvm.relay import transform +from tvm.relay.build_module import bind_params_by_name + +from ...dataflow_pattern import wildcard, is_op, is_constant +from .register import register_pattern_table + + +def is_acl_runtime_present(): + """Check if the ACL graph runtime is present. + + Returns + ------- + ret: bool + True if present, False if not. + """ + return tvm.get_global_func("relay.op.is_acl_runtime_enabled", True) + + +def partition_for_acl(mod, params=None): + """Partition the graph greedily offloading supported + operators to ACL. + + Parameters + ---------- + mod : Module + The module to run passes on. + params : dict[str, NDArray] + Constant input parameters. + + Returns + ------- + ret : annotated and partitioned module. + """ + if params: + mod['main'] = bind_params_by_name(mod['main'], params) + + seq = tvm.transform.Sequential([transform.MergeComposite(pattern_table()), + transform.AnnotateTarget('acl'), + transform.PartitionGraph()]) + + return seq(mod) + + +@register_pattern_table("acl") +def pattern_table(): + """Get the ACL pattern table.""" + + def conv_pattern(): + """Create a convolution pattern. + + Returns + ------- + pattern : dataflow_pattern.AltPattern + Denotes the convolution pattern. + """ + pattern = is_op('nn.pad')(wildcard()) | wildcard() + pattern = is_op('nn.conv2d')(pattern, is_constant()) + pattern = pattern.optional(lambda x: is_op('nn.bias_add')(x, is_constant())) + pattern = pattern.optional(is_op('nn.relu')) + return pattern + + def check_conv(extract): + """Check conv pattern is supported by ACL.""" + call = extract + while call.op.name != "nn.conv2d": + call = call.args[0] + return conv2d(call.attrs, call.args) + + return [('acl.conv2d', conv_pattern(), check_conv)] + + +def _register_external_op_helper(op_name, supported=True): + @tvm.ir.register_op_attr(op_name, "target.acl") + def _func_wrapper(attrs, args): + return supported + + return _func_wrapper + + +_register_external_op_helper("reshape") + + +@tvm.ir.register_op_attr("nn.conv2d", "target.acl") +def conv2d(attrs, args): + """Check if the external ACL codegen for conv2d should be used.""" + + # ACL only supports group size of 1 + if attrs.groups != 1: + return False + + # ACL only supports NHWC layout + if attrs.data_layout != "NHWC": + return False + + return True + + +@tvm.ir.register_op_attr("nn.max_pool2d", "target.acl") +def max_pool2d(attrs, args): + """Check if the external ACL codegen for maxpool2d should be used.""" + + # ACL only supports NHWC layout + if attrs.layout != "NHWC": + return False + + return True diff --git a/src/relay/backend/contrib/acl/README.md b/src/relay/backend/contrib/acl/README.md new file mode 100644 index 0000000000000..111f64c2c1f28 --- /dev/null +++ b/src/relay/backend/contrib/acl/README.md @@ -0,0 +1,111 @@ + + +# Relay Arm® Compute Library Integration +Arm Compute Library (ACL) is an open source project that provides accelerated kernels for Arm CPU's +and GPU's. Currently the integration offloads operators to ACL to use hand-crafted assembler +routines in the library. By offloading select operators from a relay graph to ACL we can achieve +a performance boost on such devices. + +## Building with ACL support +The current implementation has two separate build options in cmake. The reason for this split is +because ACL cannot be used on an x86 machine. However, we still want to be able compile an ACL +runtime module on an x86 machine. + +* USE_ACL=ON/OFF - Enabling this flag will add support for compiling an ACL runtime module. +* USE_GRAPH_RUNTIME_ACL=ON/OFF/path-to-acl - Enabling this flag will allow the graph runtime to +compute the ACL offloaded functions. + +These flags can be used in different scenarios depending on your setup. For example, if you want +to compile ACL on an x86 machine and then run the module on a remote Arm device via RPC, you will +need to use USE_ACL=ON on the x86 machine and USE_GRAPH_RUNTIME_ACL=ON on the remote AArch64 +device. +## Usage +_Note:_ this may not stay up-to-date with changes to the API. +1. Create a relay graph. This may be a single operator or a whole graph. The intention is that any +relay graph can be input. The ACL integration will only pick supported operators to be offloaded +whilst the rest will be computed via TVM. (For this example we will use a single +max_pool2d operator). + ``` + import tvm + from tvm import relay + + data_type = "float32" + data_shape = (1, 14, 14, 512) + strides = (2, 2) + padding = (0, 0, 0, 0) + pool_size = (2, 2) + layout = "NHWC" + output_shape = (1, 7, 7, 512) + + data = relay.var('data', shape=data_shape, dtype=data_type) + out = relay.nn.max_pool2d(data, pool_size=pool_size, strides=strides, + layout=layout, padding=padding) + module = tvm.IRModule.from_expr(out) + ``` +2. Annotate and partition the graph for ACL. + ``` + module = relay.transform.AnnotateTarget("acl")(module) + module = relay.transform.PartitionGraph()(module) + ``` +3. Build the Relay graph. + ``` + target = "llvm -target=aarch64-linux-gnu -mattr=+neon" + with relay.build_config(opt_level=3, disabled_pass=["AlterOpLayout"]): + json, lib, params = relay.build(module, target=target) + ``` +4. Export the module. + ``` + lib_path = '~/lib_acl.so' + cross_compile = 'aarch64-linux-gnu-c++' + lib.export_library(lib_path, cc=cross_compile) + ``` + 5. Run Inference. This must be on an Arm device. If compiling on x86 device and running on aarch64 + consider using the RPC mechanism. + ``` + tvm.runtime.load_module('lib_acl.so') + gen_module = tvm.contrib.graph_runtime.create(json, lib, ctx) + + d_data = np.random.uniform(0, 1, data_shape).astype(data_type) + map_inputs = {'data': d_data} + gen_module.map_inputs(**map_inputs) + gen_module.run() + ``` + +## More examples +The example above only shows a basic example of how ACL can be used for offloading a single +Maxpool2D. If you would like to see more examples for each implemented operator and for +networks refer to the tests: `tests/python/contrib/test_acl`. Here you can modify +`infrastructure.py` to use the remote device you have setup. + +## Adding a new operator +Adding a new operator requires changes to a series of places. This section will give a hint on +what needs to be changed and where, it will not however dive into the complexities for an +individual operator. This is left to the developer. + +There are a series of files we need to make changes to: +* `python/relay/op/contrib/acl.py` In this file we define the operators we wish to offload using the +`op.register` decorator. This will mean the annotation pass recognizes this operator as ACL +offloadable. +* `src/relay/backend/contrib/acl/codegen_acl.h` Implement `Make[OpName]` method. This is where we +declare how the operator should be represented by JSON. This will be used to create the ACL module. +* `src/runtime/contrib/acl/acl_kernel.h` Implement `Create[OpName]Layer` method. This is where we +define how the JSON representation can be used to create an ACL function. We simply define how to +translate from the JSON representation to ACL API. +* `tests/python/contrib/test_acl` Add unit tests for the given operator. diff --git a/src/relay/backend/contrib/acl/acl_api.cc b/src/relay/backend/contrib/acl/acl_api.cc new file mode 100644 index 0000000000000..5e3aa9c5679ef --- /dev/null +++ b/src/relay/backend/contrib/acl/acl_api.cc @@ -0,0 +1,72 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file src/relay/backend/contrib/acl/acl_api.cc + * \brief A common JSON interface between relay and the ACL runtime module. + */ + +#include "acl_api.h" + +namespace tvm { +namespace relay { +namespace contrib { +namespace acl { + +std::pair> DeserializeSubgraph( + std::string* serialized_function) { + dmlc::MemoryStringStream mstrm(serialized_function); + dmlc::Stream* strm = &mstrm; + std::string serialized_json; + strm->Read(&serialized_json); + std::istringstream is(serialized_json); + dmlc::JSONReader reader(&is); + JSONSubGraph function; + function.Load(&reader); + std::vector constants; + size_t const_count; + strm->Read(&const_count); + for (size_t i = 0; i < const_count; i++) { + runtime::NDArray temp; + temp.Load(strm); + constants.push_back(temp); + } + return std::make_pair(function, constants); +} + +std::string SerializeSubgraph(const JSONSubGraph& subgraph, + const std::vector& constants) { + std::ostringstream os; + dmlc::JSONWriter writer(&os); + subgraph.Save(&writer); + std::string serialized_subgraph; + dmlc::MemoryStringStream mstrm(&serialized_subgraph); + dmlc::Stream* strm = &mstrm; + strm->Write(os.str()); + strm->Write(constants.size()); + for (const auto& it : constants) { + it.Save(strm); + } + return serialized_subgraph; +} + +} // namespace acl +} // namespace contrib +} // namespace relay +} // namespace tvm diff --git a/src/relay/backend/contrib/acl/acl_api.h b/src/relay/backend/contrib/acl/acl_api.h new file mode 100644 index 0000000000000..60ea03e5b3fe4 --- /dev/null +++ b/src/relay/backend/contrib/acl/acl_api.h @@ -0,0 +1,172 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file src/relay/backend/contrib/acl/acl_api.h + * \brief A common JSON interface between relay and the ACL runtime module. + */ + +#ifndef TVM_RELAY_BACKEND_CONTRIB_ACL_ACL_API_H_ +#define TVM_RELAY_BACKEND_CONTRIB_ACL_ACL_API_H_ + +#include +#include +#include + +#include +#include +#include +#include + +namespace tvm { +namespace relay { +namespace contrib { +namespace acl { + +DMLC_JSON_ENABLE_ANY(std::vector, IntVector); +DMLC_JSON_ENABLE_ANY(int, Int); +DMLC_JSON_ENABLE_ANY(size_t, Size_t); +DMLC_JSON_ENABLE_ANY(std::string, String); + +/*! + * JSON interface for ACL tensor. + */ +class JSONTensor { + public: + JSONTensor() = default; + explicit JSONTensor(std::vector shape) : type("var"), shape(std::move(shape)) {} + + JSONTensor(std::string type, std::vector shape) + : type(std::move(type)), shape(std::move(shape)) {} + + void Save(dmlc::JSONWriter* writer) const { + writer->BeginObject(); + writer->WriteObjectKeyValue("type", type); + writer->WriteObjectKeyValue("shape", shape); + writer->EndObject(); + } + + void Load(dmlc::JSONReader* reader) { + dmlc::JSONObjectReadHelper helper; + helper.DeclareField("type", &type); + helper.DeclareField("shape", &shape); + helper.ReadAllFields(reader); + } + + /*! \brief The type of the tensor var/const. */ + std::string type; + /*! \brief The shape of the tensor. */ + std::vector shape; +}; + +/*! + * JSON interface for an ACL operator. + */ +class JSONOp { + public: + JSONOp() = default; + explicit JSONOp(std::string name) : name(std::move(name)) {} + + void Save(dmlc::JSONWriter* writer) const { + auto op_attrs = attrs; + op_attrs["num_inputs"] = dmlc::any(inputs.size()); + op_attrs["num_outputs"] = dmlc::any(outputs.size()); + writer->BeginObject(); + writer->WriteObjectKeyValue("name", name); + writer->WriteObjectKeyValue("inputs", inputs); + writer->WriteObjectKeyValue("outputs", outputs); + writer->WriteObjectKeyValue("attrs", op_attrs); + writer->EndObject(); + } + + void Load(dmlc::JSONReader* reader) { + dmlc::JSONObjectReadHelper helper; + helper.DeclareField("name", &name); + helper.DeclareField("inputs", &inputs); + helper.DeclareField("outputs", &outputs); + helper.DeclareField("attrs", &attrs); + helper.ReadAllFields(reader); + } + + /*! The name of the operator. */ + std::string name; + /*! The required variable inputs to the operator. */ + std::vector inputs; + /*! The required outputs to the operator. */ + std::vector outputs; + /*! The attributes of the operator e.g. padding, strides, etc. */ + std::unordered_map attrs; +}; + +/*! + * JSON interface for a series of ACL ops. + */ +class JSONSubGraph { + public: + JSONSubGraph() = default; + explicit JSONSubGraph(JSONOp op) : op(std::move(op)) {} + + void Save(dmlc::JSONWriter* writer) const { + writer->BeginObject(); + writer->WriteObjectKeyValue("node", op); + writer->EndObject(); + } + + void Load(dmlc::JSONReader* reader) { + dmlc::JSONObjectReadHelper helper; + helper.DeclareField("node", &op); + helper.ReadAllFields(reader); + } + + /*! \brief JSON op to be serialized. */ + JSONOp op; +}; + +/*! + * \brief Deserialize a function (or subgraph). The function is serialized in the + * format: Serialized JSON (using dmlc::JSONWriter), number of constants, serialized + * NDArray constants. + * + * \param serialized_function Pointer to a serialized function (or subgraph). + * \return A pair consisting of deserialized json subgraph object and deserialized + * NDArray. + */ +std::pair> DeserializeSubgraph( + std::string* serialized_function); + +/*! + * \brief Serialize a single subgraph which can be saved to disk. + * + * A subgraph is serialized so that the output is as follows: + * - Serialized JSON. + * - Number of constant tensors. + * - Serialized constant tensors. + * + * \param subgraph JSON subgraph representation. + * \constants Serialized JSON constants. + */ +std::string SerializeSubgraph(const JSONSubGraph& subgraph, + const std::vector& constants); + +} // namespace acl +} // namespace contrib +} // namespace relay +} // namespace tvm + +#endif // TVM_RELAY_BACKEND_CONTRIB_ACL_ACL_API_H_ diff --git a/src/relay/backend/contrib/acl/codegen.cc b/src/relay/backend/contrib/acl/codegen.cc new file mode 100644 index 0000000000000..1c61a6b09fce4 --- /dev/null +++ b/src/relay/backend/contrib/acl/codegen.cc @@ -0,0 +1,287 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file src/relay/backend/contrib/acl/codegen_acl.cc + * \brief Implementation of the Relay -> ACL JSON schema compiler. + */ +#include +#include + +#include "../../utils.h" +#include "codegen_acl.h" + +namespace tvm { +namespace relay { +namespace contrib { +namespace acl { + +void CodegenACL::VisitLeaf(const Expr& expr) { + if (expr->IsInstance()) { + const auto* constant_node = expr.as(); + this->constants_.push_back(constant_node->data); + } else if (!expr->IsInstance()) { + // Don't enter functions + MixedModeVisitor::VisitLeaf(expr); + } +} + +void CodegenACL::VisitExpr_(const CallNode* node) { + Call call = GetRef(node); + if (this->layer_table_.find(call) == this->layer_table_.end()) { + for (const auto& arg : call->args) { + this->VisitExpr(arg); + } + // Determine call -> ACL mapping + JSONOp layer; + if (IsAclFunc(node, "acl.conv2d") || backend::IsOp(node, "nn.conv2d")) { + layer = MakeConvolutionOp(call); + } else if (backend::IsOp(node, "nn.max_pool2d")) { + layer = MakeMaxPool2DOp(call); + } else if (backend::IsOp(node, "reshape")) { + layer = MakeReshapeOp(call); + } else { + LOG(FATAL) << "Unsupported op: " << AsText(node->op, false); + } + this->layer_table_[call] = layer; + } +} + +runtime::Module CodegenACL::CreateRuntimeModule(const ObjectRef& ref) { + std::vector> serialized_functions; + if (ref->IsInstance()) { + IRModule mod; + Function func = Downcast(ref); + auto name_node = func->GetAttr(tvm::attr::kGlobalSymbol); + CHECK(name_node.defined()) << "Failed to retrieve external symbol"; + mod->Add(GlobalVar(name_node.value()), func); + mod = this->PreProcessModule(mod); + for (const auto& it : mod->functions) { + this->SerializeFunction(it.second, &serialized_functions); + } + } else { + LOG(FATAL) << "The input ref is expected to be a Relay function."; + } + std::string data; + dmlc::MemoryStringStream fs(&data); + dmlc::SeekStream* strm = &fs; + strm->Write(serialized_functions.size()); + for (const auto& it : serialized_functions) { + strm->Write(it.first); + strm->Write(it.second); + } + strm->Seek(0); + std::string make_acl_module = "runtime.module.loadbinary_acl"; + auto pf = tvm::runtime::Registry::Get(make_acl_module); + if (pf) { + return (*pf)(strm); + } else { + return runtime::Module(); + } +} + +JSONSubGraph CodegenACL::CreateJSONSubgraph(const Function& func) { + Expr body = func->body; + this->layer_table_.clear(); + this->constants_.clear(); + this->VisitExpr(body); + std::vector ops; + for (const auto& it : this->layer_table_) { + ops.push_back(it.second); + } + CHECK_EQ(layer_table_.size(), 1) << "ACL codegen expects only a single op per function."; + return JSONSubGraph(ops[0]); +} + +void CodegenACL::SerializeFunction( + const ObjectRef& ref, std::vector>* serialized_functions) { + Function func = Downcast(ref); + JSONSubGraph subgraph = this->CreateJSONSubgraph(func); + const auto name_node = func->GetAttr(tvm::attr::kGlobalSymbol); + CHECK(name_node != "") << "Fail to retrieve external symbol"; + std::string serialized_pair = SerializeSubgraph(subgraph, this->constants_); + serialized_functions->emplace_back(name_node.value(), serialized_pair); +} + +IRModule CodegenACL::PreProcessModule(const IRModule& mod) { + IRModule preprocessed_module; + tvm::Map> desired_layouts = { + {"nn.conv2d", {String("NHWC"), String("OHWI")}}}; + preprocessed_module = transform::ConvertLayout(desired_layouts)(mod); + preprocessed_module = transform::FoldConstant()(preprocessed_module); + return preprocessed_module; +} + +JSONOp CodegenACL::MakeConvolutionOp(const Call& call) { + JSONOp op("conv2d"); + const CallNode* pad = nullptr; + const CallNode* conv; + const CallNode* bias = nullptr; + bool has_activation = false; + if (call->op->IsInstance()) { + Expr composite_conv = GetCompositeExpr(call); + // Unpack composite function + const auto* current_call = composite_conv.as(); + if (backend::IsOp(current_call, "nn.relu")) { + has_activation = true; + current_call = current_call->args[0].as(); + } + if (backend::IsOp(current_call, "nn.bias_add")) { + bias = current_call; + current_call = current_call->args[0].as(); + } + CHECK(backend::IsOp(current_call, "nn.conv2d")); + conv = current_call; + if (!current_call->args.empty() && current_call->args[0]->IsInstance()) { + current_call = current_call->args[0].as(); + if (backend::IsOp(current_call, "nn.pad")) { + pad = current_call; + } + } + } else { + conv = call.as(); + } + const auto* conv_attr = conv->attrs.as(); + CHECK(conv_attr); + CHECK(conv_attr->kernel_layout == "OHWI") + << "Kernel layout must be OHWI, has the module been pre-processed correctly?"; + if (pad) { + op.inputs.push_back(MakeJSONTensor(pad->args[0])); + } else { + op.inputs.push_back(MakeJSONTensor(conv->args[0])); + } + op.inputs.push_back(MakeJSONConstTensor(conv->args[1])); + op.outputs.push_back(MakeJSONTensor(GetRef(conv))); + if (bias) { + op.inputs.push_back(MakeJSONConstTensor(bias->args[1])); + } + // It seems there are two different methods for padding a convolution: + // - using nn.pad operator before convolution + // - using conv2d_attrs to add padding + // + // Cover both cases here. + std::vector padding; + if (pad) { + const auto* pad_attr = pad->attrs.as(); + CHECK(pad_attr); + padding = GetPadVector(pad_attr->pad_width); + } else { + padding = GetPadVector(conv_attr->padding); + } + op.attrs["padding"] = padding; + op.attrs["groups"] = conv_attr->groups; + op.attrs["strides"] = ToVector(conv_attr->strides); + if (has_activation) op.attrs["activation_type"] = std::string("relu"); + return op; +} + +JSONOp CodegenACL::MakeMaxPool2DOp(const Call& call) { + JSONOp op("max_pool"); + const auto* attr = call->attrs.as(); + CHECK(attr); + op.inputs.push_back(MakeJSONTensor(call->args[0])); + op.outputs.push_back(MakeJSONTensor(call)); + op.attrs["padding"] = GetPadVector(attr->padding); + op.attrs["strides"] = ToVector(attr->strides); + op.attrs["pooling_type"] = std::string("max"); + op.attrs["pool_size"] = ToVector(attr->pool_size); + return op; +} + +JSONOp CodegenACL::MakeReshapeOp(const Call& call) { + JSONOp op("reshape"); + const auto* attr = call->attrs.as(); + CHECK(attr); + op.inputs.push_back(MakeJSONTensor(call->args[0])); + op.outputs.push_back(MakeJSONTensor(call)); + return op; +} + +JSONTensor CodegenACL::MakeJSONTensor(const Expr& expr) { + const auto* ttnode = expr->checked_type().as(); + CHECK(ttnode); + std::vector shape = ToVector(ttnode->shape); + return JSONTensor("var", shape); +} + +JSONTensor CodegenACL::MakeJSONConstTensor(const Expr& expr) { + const auto* ttnode = expr->checked_type().as(); + CHECK(ttnode); + std::vector shape = ToVector(ttnode->shape); + VisitExpr(expr); + return JSONTensor("const", shape); +} + +bool CodegenACL::IsAclFunc(const CallNode* call, const std::string& op_name) const { + if (call->op->IsInstance()) { + Function func = Downcast(call->op); + CHECK(func.defined()); + auto name_node = func->GetAttr(attr::kComposite); + return name_node.value() == op_name; + } + return false; +} + +Expr CodegenACL::GetCompositeExpr(const Call& call) { + Function composite_function = Downcast(call->op); + Expr composite_expr = composite_function->body; + CHECK(composite_expr->IsInstance()); + return composite_expr; +} + +std::vector CodegenACL::ToVector(const Array& array) { + std::vector stl_vector; + for (auto it : array) { + const auto* val = it.as(); + CHECK(val); + stl_vector.push_back(val->value); + } + return stl_vector; +} + +std::vector CodegenACL::GetPadVector(const Array>& pad) { + // TVM nn.pad: top, bottom, left, right -> ACL Pad: left, right, top, bottom + auto acl_pad = {pad[2][0], pad[2][1], pad[1][0], pad[1][1]}; + return ToVector(acl_pad); +} + +std::vector CodegenACL::GetPadVector(const Array& pad) { + Array acl_pad; + switch (pad.size()) { + case 1: + acl_pad = {pad[0], pad[0], pad[0], pad[0]}; + break; + case 2: + // TVM Pad: height, width -> ACL Pad: left, right, top, bottom + acl_pad = {pad[1], pad[1], pad[0], pad[0]}; + break; + case 4: + // TVM Pad: top, left, bottom, right -> ACL Pad: left, right, top, bottom + acl_pad = {pad[1], pad[3], pad[0], pad[2]}; + break; + default: + LOG(FATAL) << "Unsupported padding dimensions"; + } + return ToVector(acl_pad); +} + +} // namespace acl +} // namespace contrib +} // namespace relay +} // namespace tvm diff --git a/src/relay/backend/contrib/acl/codegen_acl.h b/src/relay/backend/contrib/acl/codegen_acl.h new file mode 100644 index 0000000000000..23efb09521b2a --- /dev/null +++ b/src/relay/backend/contrib/acl/codegen_acl.h @@ -0,0 +1,198 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file src/relay/backend/contrib/acl/codegen_acl.h + * \brief The Relay -> ACL JSON schema compiler. + */ + +#ifndef TVM_RELAY_BACKEND_CONTRIB_ACL_CODEGEN_ACL_H_ +#define TVM_RELAY_BACKEND_CONTRIB_ACL_CODEGEN_ACL_H_ + +#include +#include +#include + +#include +#include +#include +#include + +#include "acl_api.h" + +namespace tvm { +namespace relay { +namespace contrib { +namespace acl { + +/*! + * \brief Generates an ACLModule from a relay expression. This "compilation" + * does not require ACL since the actual conversion using ACL APIs is + * deferred until creation of the runtime. This step simply serializes the + * relay program into a JSON string. + */ +class CodegenACL : public MixedModeVisitor { + public: + CodegenACL() = default; + void VisitExpr_(const CallNode* node) final; + void VisitLeaf(const Expr& expr) final; + + /*! + * \brief Create a runtime module for ACL. + * + * This consists of a series of "serialized functions" which each represent a + * subgraph to be computed by ACL and will each be executed independently from + * one another. Each function consists of serialized JSON describing the subgraph + * and serialized constant tensors. + * + * \note The ACL runtime module only currently supports a single operator per + * subgraph currently. + * + * \param ref The ext_func Relay expression/module to be executed using extern ops. + * \return A runtime module. + */ + runtime::Module CreateRuntimeModule(const ObjectRef& ref); + + /*! + * \brief Create a JSON representation of a subgraph. + * + * \param func The function to be represented. + * \return A JSON representation of the function. + */ + JSONSubGraph CreateJSONSubgraph(const Function& func); + + private: + /*! + * \brief Serialize a single subgraph which can be saved to disk. + * + * A subgraph is serialized so that the output is as follows. + * - Serialized JSON. + * - Number of constant tensors. + * - Serialized constant tensors. + * + * \param ref Reference to the function to be serialized. + * \param serialized_functions A vector of serialized functions to add to. + */ + void SerializeFunction(const ObjectRef& ref, + std::vector>* serialized_functions); + + /*! + * \brief Pre-process a module containing functions ready for ACL codegen. + * + * For now we enforce OHWI kernel layout and fold the transforms away. + * + * \param mod The module to be pre-processed. + * \return The processed module. + */ + IRModule PreProcessModule(const IRModule& mod); + + /*! + * \brief Create a JSON representation of an operator. + * + * \param call The call to be represented. + * \return A JSON representation of a specific operator. + */ + JSONOp MakeConvolutionOp(const Call& call); + static JSONOp MakeMaxPool2DOp(const Call& call); + static JSONOp MakeReshapeOp(const Call& call); + + /*! + * \brief Make a JSON representation of a (constant)tensor. + * + * \param expr Expression of a tensor to be represented. + * \return A JSON representation of a tensor. + */ + static JSONTensor MakeJSONTensor(const Expr& expr); + JSONTensor MakeJSONConstTensor(const Expr& expr); + + /*! + * \brief Check whether CallNode is a composite function and has the same + * op_name. + * + * \param call The current call node. + * \param op_name The expected name of the call node to check. + * \return True if the call node is composite and has the same name as + * op_name, False otherwise. + */ + bool IsAclFunc(const CallNode* call, const std::string& op_name) const; + + /*! + * \brief Get composite expression from call node. + * + * \param call The call node to get expression from. + * \return Expression for composite function. + */ + static Expr GetCompositeExpr(const Call& call); + + /*! + * \brief Convert a relay array to std::vector. + * + * \param array A relay array to be converted. + * \return std::vector. + */ + static std::vector ToVector(const Array& array); + + /*! + * \brief Create a padding vector compatible with ACL. + * + * Currently TVM has many ways to pad a an operator, so each method is taken care of here. + * + * \param pad Padding array. + * \return ACL compatible padding vector. + */ + static std::vector GetPadVector(const Array>& pad); + static std::vector GetPadVector(const Array& pad); + + /*! \brief A vector of constants to be serialized after the JSON representation is constructed. */ + std::vector constants_; + /*! \brief A look-up table from Expr to JSONOp. */ + std::map layer_table_; +}; + +/*! + * \brief The external ACL compiler/codegen tool. It takes a Relay + * expression/module and compiles it into a runtime module. + */ +runtime::Module ACLCompiler(const ObjectRef& ref) { + CodegenACL acl_codegen; + return acl_codegen.CreateRuntimeModule(ref); +} + +TVM_REGISTER_GLOBAL("relay.ext.acl").set_body_typed(ACLCompiler); + +/*! + * \brief Check whether ACL graph runtime is used. + * \return True if ACL graph runtime is enabled, False if not. + */ +inline constexpr bool IsACLRuntimeEnabled() { +#if TVM_GRAPH_RUNTIME_ACL + return true; +#else + return false; +#endif +} + +TVM_REGISTER_GLOBAL("relay.op.is_acl_runtime_enabled").set_body_typed(IsACLRuntimeEnabled); + +} // namespace acl +} // namespace contrib +} // namespace relay +} // namespace tvm + +#endif // TVM_RELAY_BACKEND_CONTRIB_ACL_CODEGEN_ACL_H_ diff --git a/src/runtime/contrib/acl/acl_allocator.cc b/src/runtime/contrib/acl/acl_allocator.cc new file mode 100644 index 0000000000000..b72ec9552130c --- /dev/null +++ b/src/runtime/contrib/acl/acl_allocator.cc @@ -0,0 +1,73 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file src/runtime/contrib/acl/acl_allocator.cc + * \brief ACL Allocator implementation that requests memory from TVM. + */ + +#include "acl_allocator.h" + +namespace tvm { +namespace runtime { +namespace contrib { +namespace acl { + +void* ACLAllocator::allocate(size_t size, size_t alignment) { + CHECK_GT(size, 0) << "Cannot allocate size less than or equal to zero"; + return this->device_api_->AllocWorkspace(this->ctx_, size, {}); +} + +void ACLAllocator::free(void* ptr) { this->device_api_->FreeWorkspace(this->ctx_, ptr); } + +std::unique_ptr ACLAllocator::make_region(size_t size, size_t alignment) { + return acl::support::cpp14::make_unique(size, alignment); +} + +ACLMemoryRegion::ACLMemoryRegion(size_t size, size_t alignment) : IMemoryRegion(size) { + CHECK_GT(size, 0) << "Cannot allocate size less than or equal to zero"; + this->ptr_ = this->device_api_->AllocDataSpace(this->ctx_, size, alignment, {}); +} + +ACLMemoryRegion::ACLMemoryRegion(void* ptr, size_t size) + : IMemoryRegion(size), is_subregion_(true) { + if (size != 0) { + this->ptr_ = ptr; + } +} + +ACLMemoryRegion::~ACLMemoryRegion() { + if (!is_subregion_) { + this->device_api_->FreeDataSpace(this->ctx_, this->ptr_); + } +} + +std::unique_ptr ACLMemoryRegion::extract_subregion(size_t offset, size_t size) { + if (this->ptr_ != nullptr && (offset < _size) && (_size - offset >= size)) { + return acl::support::cpp14::make_unique( + static_cast(this->ptr_) + offset, size); + } else { + return nullptr; + } +} + +} // namespace acl +} // namespace contrib +} // namespace runtime +} // namespace tvm diff --git a/src/runtime/contrib/acl/acl_allocator.h b/src/runtime/contrib/acl/acl_allocator.h new file mode 100644 index 0000000000000..d608645947891 --- /dev/null +++ b/src/runtime/contrib/acl/acl_allocator.h @@ -0,0 +1,141 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file src/runtime/contrib/acl/acl_allocator.h + * \brief ACL Allocator implementation that requests memory from TVM. + */ + +#ifndef TVM_RUNTIME_CONTRIB_ACL_ACL_ALLOCATOR_H_ +#define TVM_RUNTIME_CONTRIB_ACL_ACL_ALLOCATOR_H_ + +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace tvm { +namespace runtime { +namespace contrib { +namespace acl { + +namespace acl = arm_compute; + +/*! + * \brief Override ACL memory allocator and replace with TVM workspace based allocation. + */ +class ACLAllocator : public arm_compute::IAllocator { + public: + ACLAllocator() = default; + + /*! + * \brief Allocate bytes to ACL runtime. + * + * Specific implementation requests memory from TVM using their device api. + * + * \param size Size to allocate. + * \param alignment Alignment that the returned pointer should comply with. + * \return A pointer to the allocated memory. + */ + void* allocate(size_t size, size_t alignment) override; + + /*! + * \brief Free memory from ACL runtime. + * + * \param ptr Pointer to workspace to free. + */ + void free(void* ptr) override; + + /*! + * \brief Create self-managed memory region. + * + * \param size Size of the memory region. + * \param alignment Alignment of the memory region. + * \return The memory region object. + */ + std::unique_ptr make_region(size_t size, size_t alignment) override; + + private: + /*! \brief Always allocate data in the context of the current CPU. */ + const TVMContext ctx_{kDLCPU, 0}; + /*! \brief Device API which allows requests for memory from TVM. */ + runtime::DeviceAPI* device_api_ = runtime::DeviceAPI::Get(ctx_); +}; + +/*! + * \brief Memory region that can request TVM memory for ACL to use. + */ +class ACLMemoryRegion : public arm_compute::IMemoryRegion { + public: + ACLMemoryRegion(size_t size, size_t alignment); + ACLMemoryRegion(void* ptr, size_t size); + + ~ACLMemoryRegion() override; + + /*! \brief Prevent instances of this class from being copied (As this class contains + * pointers). */ + ACLMemoryRegion(const ACLMemoryRegion&) = delete; + /*! \brief Default move constructor. */ + ACLMemoryRegion(ACLMemoryRegion&&) = default; + /*! \brief Prevent instances of this class from being copied (As this class + * contains pointers) */ + ACLMemoryRegion& operator=(const ACLMemoryRegion&) = delete; + /*! Default move assignment operator. */ + ACLMemoryRegion& operator=(ACLMemoryRegion&&) = default; + + void* buffer() override { return this->ptr_; } + + const void* buffer() const override { return this->ptr_; } + + /*! + * \brief Extract a sub-region from the memory. + * + * \warning Ownership is maintained by the parent memory, + * while a wrapped raw memory region is returned by this function. + * Thus parent memory should not be released before this. + * + * \param offset Offset to the region. + * \param size Size of the region. + * \return A wrapped memory sub-region with no ownership of the + * underlying memory. + */ + std::unique_ptr extract_subregion(size_t offset, size_t size) override; + + private: + /*! \brief Points to a region of memory allocated by TVM. */ + void* ptr_; + /*! \brief A subregion doesn't manage TVM memory so we don't need to free it. */ + bool is_subregion_ = false; + /*! \brief Always allocate data in the context of the current CPU. */ + const TVMContext ctx_{kDLCPU, 0}; + /*! \brief Device API which allows requests for memory from TVM. */ + runtime::DeviceAPI* device_api_ = runtime::DeviceAPI::Get(ctx_); +}; + +} // namespace acl +} // namespace contrib +} // namespace runtime +} // namespace tvm + +#endif // TVM_RUNTIME_CONTRIB_ACL_ACL_ALLOCATOR_H_ diff --git a/src/runtime/contrib/acl/acl_kernel.cc b/src/runtime/contrib/acl/acl_kernel.cc new file mode 100644 index 0000000000000..a87b1b525e2e5 --- /dev/null +++ b/src/runtime/contrib/acl/acl_kernel.cc @@ -0,0 +1,157 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file src/runtime/contrib/acl/acl_kernel.cc + * \brief TVM compatible wrappers for ACL kernels. + */ + +#include "acl_kernel.h" + +#include +#include +#include +#include +#include +#include + +#include +#include + +namespace tvm { +namespace runtime { +namespace contrib { +namespace acl { + +CachedLayer::CachedLayer(const api::JSONSubGraph& function, const std::vector& constants, + ACLAllocator* allocator, + const std::shared_ptr& mm) + : constants_(constants), allocator_(allocator) { + api::JSONOp op = function.op; + // Make tensors + int const_tensor_idx = 0; + for (const auto& it : op.inputs) { + if (it.type == "const") { + this->function_.const_inputs.push_back(MakeTensor(it, constants[const_tensor_idx++]->data)); + } else if (it.type == "var") { + this->function_.inputs.push_back(MakeTensor(it)); + } else { + LOG(FATAL) << "Unsupported tensor type"; + } + } + for (const auto& it : op.outputs) { + this->function_.outputs.push_back(MakeTensor(it)); + } + // Create layer + if (op.name == "conv2d") { + CreateConvolution2DLayer(&this->function_, function.op, mm); + this->is_mm_ = true; + } else if (op.name == "max_pool") { + CreateMaxPoolLayer(&this->function_, function.op); + } else if (op.name == "reshape") { + CreateReshapeLayer(&this->function_, function.op); + } else { + LOG(FATAL) << "Operator not yet supported"; + } + // Prepare function + this->function_.function->prepare(); +} + +bool CachedLayer::Inference(const std::vector& inputs, + const std::vector& outputs) { + for (size_t i = 0; i < inputs.size(); i++) { + CheckACLError(function_.inputs[i].allocator()->import_memory(inputs[i]->data)); + } + for (size_t i = 0; i < outputs.size(); i++) { + CheckACLError(function_.outputs[i].allocator()->import_memory(outputs[i]->data)); + } + + this->function_.function->run(); + return true; +} + +size_t CachedLayer::GetNumInputs() const { return this->function_.inputs.size(); } + +void CachedLayer::CreateConvolution2DLayer(CacheItems* cache, const api::JSONOp& params, + const std::shared_ptr& mm) { + auto padding = dmlc::get>(params.attrs.at("padding")); + auto strides = dmlc::get>(params.attrs.at("strides")); + auto groups = dmlc::get(params.attrs.at("groups")); + + CHECK(groups == 1) << "ACL NEON Convolution only supports group size of 1"; + + acl::PadStrideInfo pad_stride_info = + acl::PadStrideInfo(strides[0], strides[1], padding[0], padding[1], padding[2], padding[3], + acl::DimensionRoundingType::FLOOR); + acl::ActivationLayerInfo act_info = acl::ActivationLayerInfo(); + if (params.attrs.find("activation_type") != params.attrs.end()) { + auto activation_function = dmlc::get(params.attrs.at("activation_type")); + + if (activation_function == "relu") { + act_info = acl::ActivationLayerInfo(acl::ActivationLayerInfo::ActivationFunction::RELU); + } else { + LOG(FATAL) << "Unsupported activation function"; + } + } + + auto function = std::make_shared(mm); + function->configure(&cache->inputs[0], &cache->const_inputs[0], + cache->const_inputs.size() > 1 ? &cache->const_inputs[1] : nullptr, + &cache->outputs[0], pad_stride_info, acl::WeightsInfo(), acl::Size2D(1U, 1U), + act_info); + + cache->function = function; +} + +void CachedLayer::CreateMaxPoolLayer(CacheItems* cache, const api::JSONOp& params) { + auto padding = dmlc::get>(params.attrs.at("padding")); + auto strides = dmlc::get>(params.attrs.at("strides")); + auto pool_size = dmlc::get>(params.attrs.at("pool_size")); + auto pooling_type = dmlc::get(params.attrs.at("pooling_type")); + + acl::PoolingType pool_type; + if (pooling_type == "max") { + pool_type = acl::PoolingType::MAX; + } else { + LOG(FATAL) << "Pooling type not supported"; + } + + acl::PadStrideInfo pad_stride_info = + acl::PadStrideInfo(strides[0], strides[1], padding[0], padding[1], padding[2], padding[3], + acl::DimensionRoundingType::FLOOR); + acl::PoolingLayerInfo pool_info = acl::PoolingLayerInfo( + pool_type, acl::Size2D(pool_size[0], pool_size[1]), acl::DataLayout::NHWC, pad_stride_info); + + auto function = std::make_shared(); + function->configure(&cache->inputs[0], &cache->outputs[0], pool_info); + + cache->function = function; +} + +void CachedLayer::CreateReshapeLayer(CacheItems* cache, const api::JSONOp& params) { + auto function = std::make_shared(); + function->configure(&cache->inputs[0], &cache->outputs[0]); + + cache->function = function; +} + +} // namespace acl +} // namespace contrib +} // namespace runtime +} // namespace tvm diff --git a/src/runtime/contrib/acl/acl_kernel.h b/src/runtime/contrib/acl/acl_kernel.h new file mode 100644 index 0000000000000..8ab8eaf229109 --- /dev/null +++ b/src/runtime/contrib/acl/acl_kernel.h @@ -0,0 +1,129 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file src/runtime/contrib/acl/acl_kernel.h + * \brief Use ACL library kernels, we create an interface to these. + */ + +#ifndef TVM_RUNTIME_CONTRIB_ACL_ACL_KERNEL_H_ +#define TVM_RUNTIME_CONTRIB_ACL_ACL_KERNEL_H_ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "../../../relay/backend/contrib/acl/acl_api.h" +#include "acl_allocator.h" +#include "acl_utils.h" + +namespace tvm { +namespace runtime { +namespace contrib { +namespace acl { + +namespace api = relay::contrib::acl; +namespace acl = arm_compute; + +/*! + * \brief ACL objects we cache in order to avoid needing to construct + * a new layer each time. + */ +struct CacheItems { + std::shared_ptr function; + std::vector inputs; + std::vector const_inputs; + std::vector outputs; +}; + +/*! + * \brief A cached ACL layer containing a single ACL function. + */ +class CachedLayer { + public: + /*! + * \brief Create an ACL layer from a JSON representation. Also prepare + * the layer for execution - this will perform actions such as pre- + * transposing of weights. + * + * \note The naming suggests a subgraph directly maps to a layer. + * In general this is not true, but since ACL only expects subgraphs + * consisting of a single op it is. + * + * \param function A JSON representation of a subgraph. + * \param constants The constants used in the subgraph. + * \param allocator ACL can request memory from TVM. + */ + CachedLayer(const api::JSONSubGraph& function, const std::vector& constants, + ACLAllocator* allocator, const std::shared_ptr& mm); + + /*! + * \brief Run inference on the ACL layer. + * + * \param inputs The inputs for the layer. + * \param outputs The outputs for the layer. + * \return True if success, False if not successful. + */ + bool Inference(const std::vector& inputs, const std::vector& outputs); + + /*! + * \brief Get the number of inputs the layer takes. + * + * \return Number of inputs. + */ + size_t GetNumInputs() const; + + /*! + * \brief Check if the layer requires working memory to be allocated. + * + * \return True if it does, False if not. + */ + bool IsMemoryManaged() const { return this->is_mm_; } + + private: + /*! \brief Constant tensors used in the layer. */ + std::vector constants_; + /*! \brief Cache ACL function and tensors for execution. */ + CacheItems function_; + /*! \brief ACL Allocator to request auxiliary memory from TVM. */ + ACLAllocator* allocator_; + /*! \brief Check if the function requires working memory to be allocated. */ + bool is_mm_ = false; + + /*! \brief Create individual ACL layer. */ + static void CreateConvolution2DLayer(CacheItems* cache, const api::JSONOp& params, + const std::shared_ptr& mm); + static void CreateMaxPoolLayer(CacheItems* cache, const api::JSONOp& params); + static void CreateReshapeLayer(CacheItems* cache, const api::JSONOp& params); +}; + +} // namespace acl +} // namespace contrib +} // namespace runtime +} // namespace tvm + +#endif // TVM_RUNTIME_CONTRIB_ACL_ACL_KERNEL_H_ diff --git a/src/runtime/contrib/acl/acl_runtime.cc b/src/runtime/contrib/acl/acl_runtime.cc new file mode 100644 index 0000000000000..1c372fe2c7e01 --- /dev/null +++ b/src/runtime/contrib/acl/acl_runtime.cc @@ -0,0 +1,233 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include +#include +#include +#include +#include + +#include + +#include "../../../relay/backend/contrib/acl/acl_api.h" +#include "../../file_util.h" + +#ifdef TVM_GRAPH_RUNTIME_ACL +#include +#include +#include + +#include "acl_allocator.h" +#include "acl_kernel.h" +#endif + +namespace tvm { +namespace runtime { + +namespace api = relay::contrib::acl; + +class ACLModule : public ModuleNode { + public: + /*! + * \brief The ACL runtime module. Deserialize the provided functions + * on creation and store in the layer cache. + * + * \param serialized_graphs A vector of (external symbol, serialized JSON subgraph) pairs. + */ + explicit ACLModule(const std::vector>& serialized_functions) { +#ifdef TVM_GRAPH_RUNTIME_ACL + auto lifetime_mgr = std::make_shared(); + auto pool_mgr = std::make_shared(); + auto mm = std::make_shared(lifetime_mgr, pool_mgr); + int num_pools = 0; +#endif + + for (const auto& it : serialized_functions) { + std::string serialized_function = it.second; + auto ds = api::DeserializeSubgraph(&serialized_function); + this->deserialized_functions_.emplace_back(it.first, ds); + +#ifdef TVM_GRAPH_RUNTIME_ACL + this->subgraph_cache_[it.first] = + std::make_shared(ds.first, ds.second, &this->allocator_, mm); + if (this->subgraph_cache_[it.first]->IsMemoryManaged()) num_pools++; +#endif + } +#ifdef TVM_GRAPH_RUNTIME_ACL + // Allocate working memory for layers. + if (num_pools > 0) mm->populate(this->allocator_, num_pools); +#endif + } + + /*! + * \brief Get a PackedFunc from the ACL module. + * + * \param name The name of the function. + * \param sptr_to_self The ObjectPtr that points to this module node. + * \return The function pointer when it is found, otherwise, PackedFunc(nullptr). + */ + PackedFunc GetFunction(const std::string& name, const ObjectPtr& sptr_to_self) final { +#ifdef TVM_GRAPH_RUNTIME_ACL + if (this->subgraph_cache_.find(name) != this->subgraph_cache_.end()) { + return PackedFunc([sptr_to_self, this, name](TVMArgs args, TVMRetValue* rv) { + *rv = tvm::runtime::ACLModule::Inference(args, this->subgraph_cache_[name].get()); + }); + } +#endif + return PackedFunc(nullptr); + } + + /*! + * \brief The type key of the module. + * + * \return module type key. + */ + const char* type_key() const override { return "acl"; } + + /*! + * \brief Unpack inputs and outputs and run inference on a given layer. + * + * \param args Access inputs and outputs. + * \param function The layer to execute inference on. + * \return Status of inference. + */ +#ifdef TVM_GRAPH_RUNTIME_ACL + static bool Inference(tvm::runtime::TVMArgs args, contrib::acl::CachedLayer* function) { + // Unpack parameters + int argc = 0; + std::vector inputs; + for (size_t i = 0; i < function->GetNumInputs(); i++) { + inputs.push_back(args[argc++]); + } + std::vector outputs; + for (; argc < args.size(); argc++) { + outputs.push_back(args[argc]); + } + return function->Inference(inputs, outputs); + } +#endif + + /*! + * \brief Save a compiled network to a binary stream, which can then be + * serialized to disk. + * + * \param stream The stream to save the binary. + */ + void SaveToBinary(dmlc::Stream* stream) final { + stream->Write(this->deserialized_functions_.size()); + for (const auto& it : this->deserialized_functions_) { + stream->Write(it.first); + std::pair> subgraph_pair = it.second; + std::string serialized_function = + api::SerializeSubgraph(subgraph_pair.first, subgraph_pair.second); + stream->Write(serialized_function); + } + } + + /*! + * \brief Load a compiled network from stream. + * + * \param strm The binary stream to load. + * \return The created ACL module. + */ + static Module LoadFromBinary(void* strm) { + auto stream = static_cast(strm); + size_t func_count; + stream->Read(&func_count); + std::vector> serialized_functions; + for (unsigned int i = 0; i < func_count; i++) { + std::string ext_symbol; + std::string serialized_function; + stream->Read(&ext_symbol); + stream->Read(&serialized_function); + serialized_functions.emplace_back(std::make_pair(ext_symbol, serialized_function)); + } + auto n = make_object(serialized_functions); + return Module(n); + } + + /*! + * \brief Save a module to a specified path. + * + * \param path Where to save the serialized module. + * \param format The format of the file. + */ + void SaveToFile(const std::string& path, const std::string& format) override { + std::string data; + dmlc::MemoryStringStream writer(&data); + dmlc::SeekStream* strm = &writer; + SaveToBinary(strm); + SaveBinaryToFile(path, data); + } + + /*! + * \brief Create a module from a file. + * + * \param path The path of the file containing the serialized module. + * \return The created ACL module. + */ + static Module LoadFromFile(const std::string& path) { + std::string data; + LoadBinaryFromFile(path, &data); + dmlc::MemoryStringStream reader(&data); + return LoadFromBinary(&reader); + } + + /*! + * \brief Get the JSON generated by codegen. + * + * \param format the format to return (only JSON for the time being) + * \return A string of JSON. + */ + std::string GetSource(const std::string& format) override { + std::ostringstream os; + dmlc::JSONWriter writer(&os); + writer.BeginObject(); + for (const auto& it : deserialized_functions_) { + writer.WriteObjectKeyValue(it.first, it.second.first); + } + writer.EndObject(); + return os.str(); + } + + private: + /* \brief A vector of (external symbol, serialized JSON subgraph) pairs. */ + std::vector>>> + deserialized_functions_; + +#ifdef TVM_GRAPH_RUNTIME_ACL + /* \brief A map between ext_symbols (function names) and an ACL subgraph. + * \note Currently only a single op per subgraph is supported. Hence mapping to + * cached layer.*/ + std::map> subgraph_cache_; + /*! \brief Allow ACL functions to request auxiliary memory from TVM. */ + contrib::acl::ACLAllocator allocator_; +#endif +}; + +TVM_REGISTER_GLOBAL("runtime.module.loadfile_acl").set_body([](TVMArgs args, TVMRetValue* rv) { + *rv = ACLModule::LoadFromFile(args[0]); +}); + +TVM_REGISTER_GLOBAL("runtime.module.loadbinary_acl").set_body([](TVMArgs args, TVMRetValue* rv) { + *rv = ACLModule::LoadFromBinary(args[0]); +}); + +} // namespace runtime +} // namespace tvm diff --git a/src/runtime/contrib/acl/acl_utils.cc b/src/runtime/contrib/acl/acl_utils.cc new file mode 100644 index 0000000000000..6e29cc384d404 --- /dev/null +++ b/src/runtime/contrib/acl/acl_utils.cc @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file src/runtime/contrib/acl/acl_utils.cc + * \brief Utils and common functions for the interface. + */ + +#include "acl_utils.h" + +#include +#include + +namespace tvm { +namespace runtime { +namespace contrib { +namespace acl { + +void CheckACLError(arm_compute::Status status) { + CHECK(status.error_code() == arm_compute::ErrorCode::OK) << "ACL: " << status.error_description(); +} + +acl::Tensor MakeTensor(const api::JSONTensor& tensor_rep, void* data) { + acl::Tensor tensor; + acl::TensorInfo info = MakeTensorInfo(tensor_rep); + tensor.allocator()->init(info); + if (data != nullptr) { + CheckACLError(tensor.allocator()->import_memory(data)); + } + return tensor; +} + +acl::TensorInfo MakeTensorInfo(const api::JSONTensor& tensor_rep) { + return acl::TensorInfo(MakeTensorShape(tensor_rep.shape), 1, acl::DataType::F32, + acl::DataLayout::NHWC); +} + +arm_compute::TensorShape MakeTensorShape(const std::vector& shape) { + arm_compute::TensorShape acl_shape; + for (unsigned int i = shape.size(); i > 0; --i) { + acl_shape.set(shape.size() - i, shape[i - 1]); + } + return acl_shape; +} + +} // namespace acl +} // namespace contrib +} // namespace runtime +} // namespace tvm diff --git a/src/runtime/contrib/acl/acl_utils.h b/src/runtime/contrib/acl/acl_utils.h new file mode 100644 index 0000000000000..111121d48308e --- /dev/null +++ b/src/runtime/contrib/acl/acl_utils.h @@ -0,0 +1,87 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file src/runtime/contrib/acl/acl_utils.h + * \brief Utils and common functions for the interface. + */ + +#ifndef TVM_RUNTIME_CONTRIB_ACL_ACL_UTILS_H_ +#define TVM_RUNTIME_CONTRIB_ACL_ACL_UTILS_H_ + +#include +#include + +#include + +#include "../../../relay/backend/contrib/acl/acl_api.h" + +namespace tvm { +namespace runtime { +namespace contrib { +namespace acl { + +namespace api = relay::contrib::acl; +namespace acl = arm_compute; + +/*! + * \brief Check if there are any errors from acl and forward them to TVM. + * + * \param status status of called function. + * + * Status values: + * - 0 => OK + * - 1 => RUNTIME_ERROR + * - 2 => UNSUPPORTED_EXTENSION_USE + */ +void CheckACLError(acl::Status status); + +/*! + * \brief Make an acl tensor from JSON tensor representation. + * + * \param tensor_rep A JSON tensor representation. + * \param data (optional) Initialize the tensor with memory. + * \return arm_compute::Tensor. + */ +acl::Tensor MakeTensor(const api::JSONTensor& tensor_rep, void* data = nullptr); + +/*! + * \brief Make an acl tensor info object from JSON tensor + * representation. + * + * \param tensor_rep A JSON tensor representation. + * \return arm_compute::TensorInfo. + */ +acl::TensorInfo MakeTensorInfo(const api::JSONTensor& tensor_rep); + +/*! + * \brief Convert vector object to acl TensorShape. + * \note This requires reversing the given vector. + * + * \param shape The shape of the tensor as a vector. + * \return acl TensorShape. + */ +acl::TensorShape MakeTensorShape(const std::vector& shape); + +} // namespace acl +} // namespace contrib +} // namespace runtime +} // namespace tvm + +#endif // TVM_RUNTIME_CONTRIB_ACL_ACL_UTILS_H_ diff --git a/tests/python/contrib/test_acl/__init__.py b/tests/python/contrib/test_acl/__init__.py new file mode 100644 index 0000000000000..a8671172febde --- /dev/null +++ b/tests/python/contrib/test_acl/__init__.py @@ -0,0 +1,17 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Infrastructure and tests for ACL""" diff --git a/tests/python/contrib/test_acl/infrastructure.py b/tests/python/contrib/test_acl/infrastructure.py new file mode 100644 index 0000000000000..04c5d2784c28a --- /dev/null +++ b/tests/python/contrib/test_acl/infrastructure.py @@ -0,0 +1,162 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from itertools import zip_longest, combinations +import json + +import tvm +from tvm import relay +from tvm import rpc +from tvm.contrib import graph_runtime +from tvm.relay.op.contrib import acl +from tvm.contrib import util + + +class Device: + """Adjust the following settings to connect to and use a remote device for tests.""" + use_remote = False + target = "llvm -target=aarch64-linux-gnu -mattr=+neon" + # Enable cross compilation when connecting a remote device from a non-arm platform. + cross_compile = None + # cross_compile = "aarch64-linux-gnu-g++" + + def __init__(self): + """Keep remote device for lifetime of object.""" + self.device = self._get_remote() + + @classmethod + def _get_remote(cls): + """Get a remote (or local) device to use for testing.""" + if cls.use_remote: + # Here you may adjust settings to run the ACL unit tests via a remote + # device using the RPC mechanism. Use this in the case you want to compile + # an ACL module on a different machine to what you run the module on i.e. + # x86 -> AArch64. + # + # Use the following to connect directly to a remote device: + # device = rpc.connect( + # hostname="0.0.0.0", + # port=9090) + # + # Or connect via a tracker: + # device = tvm.autotvm.measure.request_remote( + # host="0.0.0.0", + # port=9090, + # device_key="device_key", + # timeout=1000) + # + # return device + raise NotImplementedError( + "Please adjust these settings to connect to your remote device.") + else: + device = rpc.LocalSession() + return device + + +def skip_runtime_test(): + """Skip test if it requires the runtime and it's not present.""" + # ACL codegen not present. + if not tvm.get_global_func("relay.ext.acl", True): + print("Skip because ACL codegen is not available.") + return True + + # Remote device is in use or ACL runtime not present + if not Device.use_remote and not acl.is_acl_runtime_present(): + print("Skip because runtime isn't present or a remote device isn't being used.") + return True + + +def skip_codegen_test(): + """Skip test if it requires the ACL codegen and it's not present.""" + if not tvm.get_global_func("relay.ext.acl", True): + print("Skip because ACL codegen is not available.") + return True + + +def build_module(mod, target, params=None, enable_acl=True): + """Build module with option to build for ACL.""" + if isinstance(mod, tvm.relay.expr.Call): + mod = tvm.IRModule.from_expr(mod) + with tvm.transform.PassContext(opt_level=3): + if enable_acl: + mod = acl.partition_for_acl(mod, params) + return relay.build(mod, target=target, params=params) + + +def build_and_run(mod, inputs, outputs, params, device, enable_acl=True, no_runs=1): + """Build and run the relay module.""" + graph, lib, params = build_module(mod, device.target, params, enable_acl) + lib = update_lib(lib, device.device, device.cross_compile) + gen_module = graph_runtime.create(graph, lib, ctx=device.device.cpu(0)) + gen_module.set_input(**inputs) + gen_module.set_input(**params) + for _ in range(no_runs): + gen_module.run() + out = [gen_module.get_output(i) for i in range(outputs)] + return out + + +def update_lib(lib, device, cross_compile): + """Export the library to the remote/local device.""" + lib_name = "mod.so" + temp = util.tempdir() + lib_path = temp.relpath(lib_name) + if cross_compile: + lib.export_library(lib_path, cc=cross_compile) + else: + lib.export_library(lib_path) + device.upload(lib_path) + lib = device.load_module(lib_name) + return lib + + +def verify(answers, atol, rtol): + """Compare the array of answers. Each entry is a list of outputs.""" + if len(answers) < 2: + raise RuntimeError( + f"No results to compare: expected at least two, found {len(answers)}") + for answer in zip_longest(*answers): + for outs in combinations(answer, 2): + tvm.testing.assert_allclose( + outs[0].asnumpy(), outs[1].asnumpy(), rtol=rtol, atol=atol) + + +def extract_acl_modules(module): + """Get the ACL module(s) from llvm module.""" + return list(filter(lambda mod: mod.type_key == "acl", + module.imported_modules)) + + +def verify_codegen(module, known_good_codegen, num_acl_modules, + target="llvm -target=aarch64-linux-gnu -mattr=+neon"): + """Check acl codegen against a known good output.""" + _, module, _ = build_module(module, target) + acl_modules = extract_acl_modules(module) + + assert len(acl_modules) == num_acl_modules, \ + f"The number of ACL modules produced ({len(acl_modules)}) does not " \ + f"match the expected value ({num_acl_modules})." + + for mod in acl_modules: + source = mod.get_source() + source_json = json.loads(source) + func_name = list(source_json.keys())[0] + codegen = source_json[func_name]["node"] + + assert codegen == known_good_codegen, \ + f"The JSON produced by codegen does not match the expected result. \n" \ + f"Actual={json.dumps(codegen, sort_keys=True, indent=2)} \n" \ + f"Expected={json.dumps(known_good_codegen, sort_keys=True, indent=2)}" diff --git a/tests/python/contrib/test_acl/test_conv2d.py b/tests/python/contrib/test_acl/test_conv2d.py new file mode 100644 index 0000000000000..a2724315c4e8e --- /dev/null +++ b/tests/python/contrib/test_acl/test_conv2d.py @@ -0,0 +1,202 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""ACL Integration conv2d tests.""" + +import numpy as np + +import tvm +from tvm import relay + +from .infrastructure import skip_runtime_test, skip_codegen_test, build_and_run, \ + verify, verify_codegen +from .infrastructure import Device + + +def _get_model(shape, kernel_size, padding, strides, + dilation, groups, dtype, channels, + var_names, has_bias=False, has_activation=False, has_pad=False): + """Return a model and any parameters it may have""" + a = relay.var(next(var_names), shape=shape, dtype=dtype) + if has_pad: + p = ((0, 0), (padding[0], padding[0]), (padding[1], padding[1]), (0, 0)) + a = relay.nn.pad(a, pad_width=p) + padding = (0, 0, 0, 0) + else: + if len(padding) == 2: + padding = (padding[0], padding[1], padding[0], padding[1]) + shape = (shape[0], shape[1] + padding[0] * 2, + shape[2] + padding[1] * 2, shape[3]) + weight_shape = (kernel_size, kernel_size, shape[3] // groups, channels) + w = tvm.nd.array(np.random.uniform(-128, 127, weight_shape).astype(dtype)) + weights = relay.const(w, dtype) + out = relay.nn.conv2d( + a, + weights, + kernel_size=(kernel_size, kernel_size), + data_layout="NHWC", + kernel_layout="HWIO", + dilation=(1, 1), + strides=strides, + padding=padding, + groups=groups, + channels=channels + ) + params = {"w": w} + if has_bias: + b = tvm.nd.array(np.random.uniform(-128, 127, weight_shape[3]).astype(dtype)) + biasc = relay.const(b, dtype) + out = relay.nn.bias_add(out, biasc, axis=3) + params["b"] = b + if has_activation: + out = relay.nn.relu(out) + return out, params + + +def _get_expected_codegen(shape, kernel_size, padding, strides, + dilation, groups, dtype, channels, + has_bias=False, has_activation=False): + codegen = { + "name": "conv2d", + "inputs": [], + "outputs": [], + "attrs": { + "groups": ["Int", 1], + "num_inputs": ["Size_t", 2], + "num_outputs": ["Size_t", 1] + } + } + + if len(padding) == 2: + padding = (padding[0], padding[1], padding[0], padding[1]) + # Transpose padding to match ACL format + padding = (padding[1], padding[3], padding[0], padding[2]) + weight_shape = (channels, kernel_size, kernel_size, shape[3] // groups) + output_height = ((shape[1] - kernel_size + padding[2] + padding[3]) / strides[0]) + 1 + output_width = ((shape[2] - kernel_size + padding[0] + padding[1]) / strides[1]) + 1 + output_shape = (1, int(output_height), int(output_width), channels) + + codegen["attrs"]["padding"] = ["IntVector", list(padding)] + codegen["attrs"]["strides"] = ["IntVector", list(strides)] + if has_activation: + codegen["attrs"]["activation_type"] = ["String", "relu"] + + inputs = [{"type": "var", "shape": list(shape)}, + {"type": "const", "shape": list(weight_shape)}] + if has_bias: + inputs.append({"type": "const", "shape": [weight_shape[0]]}) + outputs = [{"type": "var", "shape": list(output_shape)}] + + codegen["inputs"] = inputs + codegen["outputs"] = outputs + codegen["attrs"]["num_inputs"] = ["Size_t", len(inputs)] + codegen["attrs"]["num_outputs"] = ["Size_t", len(outputs)] + + return codegen + + +def test_conv2d(): + if skip_runtime_test(): + return + + device = Device() + + shape = (1, 25, 25, 1) + dtype = "float32" + + inputs = { + "a": tvm.nd.array(np.random.uniform(-128, 127, shape).astype(dtype)), + } + + for kernel_size in [2, 3]: + outputs = [] + func, params = _get_model(shape, kernel_size, + (0, 0), (1, 1), 1, 1, + dtype, 1, iter(inputs)) + for acl in [False, True]: + outputs.append(build_and_run(func, inputs, 1, + params, device, + enable_acl=acl)) + verify(outputs, atol=0.002, rtol=0.01) + + for pad_ksize in [((1, 1), 3), ((2, 2), 5), ((2, 1), 3)]: + outputs = [] + func, params = _get_model(shape, pad_ksize[1], pad_ksize[0], + (1, 1), 1, 1, dtype, 1, iter(inputs)) + for acl in [False, True]: + outputs.append(build_and_run(func, inputs, 1, + params, device, + enable_acl=acl)) + verify(outputs, atol=0.002, rtol=0.01) + + for strides in [(1, 1), (2, 2)]: + outputs = [] + func, params = _get_model(shape, 2, (0, 0), strides, + 1, 1, dtype, 1, iter(inputs)) + for acl in [False, True]: + outputs.append(build_and_run(func, inputs, 1, + params, device, + enable_acl=acl)) + verify(outputs, atol=0.002, rtol=0.01) + + # Test composite convolution: (has_pad, has_bias, has_activation). + for composite in [(False, True, False), (False, False, True), (False, True, True), + (True, False, False)]: + outputs = [] + func, params = _get_model(shape, 2, (1, 1), (1, 1), + 1, 1, dtype, 1, iter(inputs), + has_pad=composite[0], + has_bias=composite[1], + has_activation=composite[2]) + for acl in [False, True]: + outputs.append(build_and_run(func, inputs, 1, + params, device, + enable_acl=acl)) + verify(outputs, atol=0.002, rtol=0.01) + + +def test_codegen_conv2d(): + if skip_codegen_test(): + return + + shape = (1, 25, 25, 1) + dtype = "float32" + inputs = {"a"} + + for pad_ksize in [((1, 1), 3), ((2, 1), 3)]: + args = (shape, pad_ksize[1], pad_ksize[0], (1, 1), 1, 1, dtype, 1) + func, params = _get_model(*args, var_names=iter(inputs)) + exp_codegen = _get_expected_codegen(*args) + verify_codegen(func, exp_codegen, 1) + + # Test composite convolution: (has_pad, has_bias, has_activation). + for composite in [(False, True, False), (False, False, True), (False, True, True), + (True, False, False)]: + args = (shape, 2, (1, 1), (1, 1), 1, 1, dtype, 1) + func, params = _get_model(*args, var_names=iter(inputs), + has_pad=composite[0], + has_bias=composite[1], + has_activation=composite[2]) + exp_codegen = _get_expected_codegen(*args, + has_bias=composite[1], + has_activation=composite[2], + ) + verify_codegen(func, exp_codegen, 1) + + +if __name__ == "__main__": + test_conv2d() + test_codegen_conv2d() diff --git a/tests/python/contrib/test_acl/test_network.py b/tests/python/contrib/test_acl/test_network.py new file mode 100644 index 0000000000000..e5afe905228f1 --- /dev/null +++ b/tests/python/contrib/test_acl/test_network.py @@ -0,0 +1,76 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""ACL network tests.""" + +import numpy as np + +from tvm import relay + +from .infrastructure import skip_runtime_test, build_and_run, verify +from .infrastructure import Device + + +def _build_and_run_keras_network(mod, params, inputs, device): + """Helper function to build and run a network from the Keras frontend.""" + data = {} + for name, shape in inputs.items(): + data[name] = np.random.uniform(-128, 127, shape).astype("float32") + + outputs = [] + for acl in [False, True]: + outputs.append(build_and_run(mod, data, 1, params, + device, enable_acl=acl)) + verify(outputs, atol=0.002, rtol=0.01) + + +def test_vgg16(): + if skip_runtime_test(): + return + + device = Device() + + def get_model(): + from keras.applications import VGG16 + vgg16 = VGG16(include_top=True, weights='imagenet', + input_shape=(224, 224, 3), classes=1000) + inputs = {vgg16.input_names[0]: (1, 224, 224, 3)} + mod, params = relay.frontend.from_keras(vgg16, inputs, layout="NHWC") + return mod, params, inputs + + _build_and_run_keras_network(*get_model(), device=device) + + +def test_mobilenet(): + if skip_runtime_test(): + return + + device = Device() + + def get_model(): + from keras.applications import MobileNet + mobilenet = MobileNet(include_top=True, weights='imagenet', + input_shape=(224, 224, 3), classes=1000) + inputs = {mobilenet.input_names[0]: (1, 224, 224, 3)} + mod, params = relay.frontend.from_keras(mobilenet, inputs, layout="NHWC") + return mod, params, inputs + + _build_and_run_keras_network(*get_model(), device=device) + + +if __name__ == "__main__": + test_vgg16() + test_mobilenet() diff --git a/tests/python/contrib/test_acl/test_pooling.py b/tests/python/contrib/test_acl/test_pooling.py new file mode 100644 index 0000000000000..8fb1e93d6ac07 --- /dev/null +++ b/tests/python/contrib/test_acl/test_pooling.py @@ -0,0 +1,121 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""ACL Integration pooling tests.""" + +import numpy as np + +import tvm +from tvm import relay + +from .infrastructure import skip_runtime_test, skip_codegen_test, build_and_run, \ + verify, verify_codegen +from .infrastructure import Device + + +def _get_model(shape, typef, sizes, strides, padding, + ceil_mode, var_names): + """Return a model and any parameters it may have.""" + var = relay.var(next(var_names), shape=shape, dtype="float32") + pool = typef(var, pool_size=sizes, strides=strides, padding=padding, + ceil_mode=ceil_mode, layout="NHWC") + return pool + + +def _get_expected_codegen(shape, typef, sizes, strides, padding, + ceil_mode): + codegen = { + "name": "max_pool", + "inputs": [], + "outputs": [], + "attrs": { + "pooling_type": ["String", "max"] + } + } + + if len(padding) == 2: + padding = (padding[1], padding[1], padding[0], padding[0]) + # Transpose padding to match ACL format + padding = (padding[1], padding[3], padding[0], padding[2]) + output_height = ((shape[1] - sizes[0] + padding[2] + padding[3]) / strides[0]) + 1 + output_width = ((shape[2] - sizes[1] + padding[0] + padding[1]) / strides[1]) + 1 + output_shape = (1, int(output_height), int(output_width), shape[3]) + + if typef == relay.nn.max_pool2d: + pooling_type = "max" + else: + raise NotImplementedError(f"No conversion from {typef} to pooling_type string.") + + codegen["attrs"]["padding"] = ["IntVector", list(padding)] + codegen["attrs"]["strides"] = ["IntVector", list(strides)] + codegen["attrs"]["pool_size"] = ["IntVector", list(sizes)] + codegen["attrs"]["pooling_type"] = ["String", pooling_type] + + inputs = [{"type": "var", "shape": list(shape)}] + outputs = [{"type": "var", "shape": list(output_shape)}] + + codegen["inputs"] = inputs + codegen["outputs"] = outputs + codegen["attrs"]["num_inputs"] = ["Size_t", len(inputs)] + codegen["attrs"]["num_outputs"] = ["Size_t", len(outputs)] + + return codegen + + +def test_pooling(): + if skip_runtime_test(): + return + + device = Device() + + for size in [(2, 2), (3, 3)]: + for stride in [(2, 2)]: + shape = (1, size[0] + stride[0] * 5, + size[1] + stride[1] * 5, 16) + + inputs = { + "a": tvm.nd.array(np.random.uniform(-1, 1, shape).astype("float32")), + } + + outputs = [] + func = _get_model(shape, relay.nn.max_pool2d, size, + stride, (0, 0), True, iter(inputs)) + for acl in [False, True]: + outputs.append(build_and_run(func, inputs, 1, None, device, + enable_acl=acl)) + verify(outputs, atol=0.001, rtol=0.001) + + +def test_codegen_pooling(): + if skip_codegen_test(): + return + + inputs = {"a"} + + for size in [(2, 2), (3, 3)]: + for stride in [(2, 2)]: + shape = (1, size[0] + stride[0] * 5, + size[1] + stride[1] * 5, 16) + args = (shape, relay.nn.max_pool2d, size, + stride, (0, 0), True) + func = _get_model(*args, iter(inputs)) + exp_codegen = _get_expected_codegen(*args) + verify_codegen(func, exp_codegen, 1) + + +if __name__ == "__main__": + test_pooling() + test_codegen_pooling() diff --git a/tests/python/contrib/test_acl/test_reshape.py b/tests/python/contrib/test_acl/test_reshape.py new file mode 100644 index 0000000000000..81192cdf992c8 --- /dev/null +++ b/tests/python/contrib/test_acl/test_reshape.py @@ -0,0 +1,91 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""ACL Integration reshape tests.""" + +import numpy as np + +import tvm +from tvm import relay + +from .infrastructure import skip_runtime_test, skip_codegen_test, build_and_run, \ + verify, verify_codegen +from .infrastructure import Device + + +def _get_model(input_shape, output_shape, var_names): + """Return a model and any parameters it may have.""" + a = relay.var(next(var_names), shape=input_shape, dtype="float32") + reshape = relay.reshape(a, output_shape) + return reshape + + +def _get_expected_codegen(input_shape, output_shape): + codegen = { + "name": "reshape", + "inputs": [], + "outputs": [], + "attrs": {} + } + + inputs = [{"type": "var", "shape": list(input_shape)}] + outputs = [{"type": "var", "shape": list(output_shape)}] + + codegen["inputs"] = inputs + codegen["outputs"] = outputs + codegen["attrs"]["num_inputs"] = ["Size_t", len(inputs)] + codegen["attrs"]["num_outputs"] = ["Size_t", len(outputs)] + + return codegen + + +def test_reshape(): + if skip_runtime_test(): + return + + device = Device() + + inputs = { + "a": tvm.nd.array( + np.random.uniform(-128, 127, (1, 1, 1, 1000)).astype("float32")) + } + + for shape in [(1, 1000), (10, 10, 10)]: + outputs = [] + func = _get_model(inputs["a"].shape, shape, iter(inputs)) + for acl in [False, True]: + outputs.append(build_and_run(func, inputs, 1, None, device, + enable_acl=acl)) + verify(outputs, atol=1e-7, rtol=1e-7) + + +def test_codegen_reshape(): + if skip_codegen_test(): + return + + shape = (1, 1, 1, 1000) + inputs = {"a"} + + for new_shape in [(1, 1000), (10, 10, 10)]: + args = (shape, new_shape) + func = _get_model(*args, iter(inputs)) + exp_codegen = _get_expected_codegen(*args) + verify_codegen(func, exp_codegen, 1) + + +if __name__ == "__main__": + test_reshape() + test_codegen_reshape() diff --git a/tests/python/contrib/test_acl/test_runtime.py b/tests/python/contrib/test_acl/test_runtime.py new file mode 100644 index 0000000000000..7b332730e9538 --- /dev/null +++ b/tests/python/contrib/test_acl/test_runtime.py @@ -0,0 +1,97 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""ACL runtime tests.""" + +import numpy as np + +import tvm +from tvm import relay + +from .infrastructure import skip_runtime_test, build_and_run, verify +from .infrastructure import Device + + +def test_multiple_ops(): + """ + Test multiple operators destined for acl. + ACL will expect these ops as in 2 separate functions. + """ + if skip_runtime_test(): + return + + device = Device() + + def get_model(input_shape, var_names): + """Return a model and any parameters it may have.""" + a = relay.var(next(var_names), shape=input_shape, dtype="float32") + out = relay.reshape(a, (1, 1, 1000)) + out = relay.reshape(out, (1, 1000)) + return out + + inputs = { + "a": tvm.nd.array(np.random.uniform(0, 1, (1, 1, 1, 1000)).astype("float32")) + } + + outputs = [] + for acl in [False, True]: + func = get_model(inputs["a"].shape, iter(inputs)) + outputs.append(build_and_run(func, inputs, 1, None, device, + enable_acl=acl)) + verify(outputs, atol=0.002, rtol=0.01) + + +def test_multiple_runs(): + """ + Test that multiple runs of an operator work. + Note: the result isn't checked. + """ + if skip_runtime_test(): + return + + device = Device() + + def get_model(): + a = relay.var("a", shape=(1, 28, 28, 512), dtype="float32") + w = tvm.nd.array(np.ones((256, 1, 1, 512), dtype="float32")) + weights = relay.const(w, "float32") + conv = relay.nn.conv2d( + a, + weights, + kernel_size=(1, 1), + data_layout="NHWC", + kernel_layout="OHWI", + strides=(1, 1), + padding=(0, 0), + dilation=(1, 1) + ) + params = {"w": w} + return conv, params + + inputs = { + "a": tvm.nd.array(np.ones((1, 28, 28, 512), dtype="float32")), + } + + func, params = get_model() + build_and_run(func, inputs, 1, + params, device, + enable_acl=True, + no_runs=3) + + +if __name__ == "__main__": + test_multiple_ops() + test_multiple_runs()