diff --git a/CMakeLists.txt b/CMakeLists.txt
index aaddebdfe3c57..bdb38e3fb8058 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -70,6 +70,8 @@ tvm_option(USE_CPP_RPC "Build CPP RPC" OFF)
 tvm_option(USE_TFLITE "Build with tflite support" OFF)
 tvm_option(USE_TENSORFLOW_PATH "TensorFlow root path when use TFLite" none)
 tvm_option(USE_COREML "Build with coreml support" OFF)
+tvm_option(USE_ACL "Build with Arm Compute Library" OFF)
+tvm_option(USE_ACL_GRAPH_RUNTIME "Build with Arm Compute Library graph runtime" OFF)
 
 if(USE_CPP_RPC AND UNIX)
   message(FATAL_ERROR "USE_CPP_RPC is only supported with WIN32. Use the Makefile for non-Windows.")
@@ -327,6 +329,7 @@ include(cmake/modules/contrib/HybridDump.cmake)
 include(cmake/modules/contrib/TFLite.cmake)
 include(cmake/modules/contrib/TF_TVMDSOOP.cmake)
 include(cmake/modules/contrib/CoreML.cmake)
+include(cmake/modules/contrib/ACL.cmake)
 
 include(CheckCXXCompilerFlag)
 if(NOT MSVC)
diff --git a/cmake/config.cmake b/cmake/config.cmake
index 1b196922ca059..e59690da2c04e 100644
--- a/cmake/config.cmake
+++ b/cmake/config.cmake
@@ -184,6 +184,18 @@ set(USE_SORT ON)
 # Whether use MKL-DNN (DNNL) codegen
 set(USE_DNNL_CODEGEN OFF)
 
+# Whether to use ACL (Arm Compute Library) codegen
+# We provide 2 separate flags since we cannot build the ACL runtime on x86.
+# This is useful for cases where you want to cross-compile a relay graph
+# on x86 then run on AArch.
+#
+# USE_ACL - Support for compiling a relay graph offloading supported
+#           operators to ACL. OFF/ON
+# USE_ACL_GRAPH_RUNTIME - Run ACL annotated functions via the ACL
+#                         runtime. OFF/ON/"path/to/ACL"
+set(USE_ACL OFF)
+set(USE_ACL_GRAPH_RUNTIME OFF)
+
 # Build ANTLR parser for Relay text format
 # Possible values:
 # - ON: enable ANTLR by searching default locations (cmake find_program for antlr4 and /usr/local for jar)
diff --git a/cmake/modules/contrib/ACL.cmake b/cmake/modules/contrib/ACL.cmake
new file mode 100644
index 0000000000000..94db11d1fdf05
--- /dev/null
+++ b/cmake/modules/contrib/ACL.cmake
@@ -0,0 +1,68 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# We separate the codegen and runtime build since ACL can only be built
+# for AArch. In the world where we take the cross compilation approach,
+# which is common with arm devices, we need to be able to cross-compile
+# a relay graph on x86 for AArch and then run the graph on AArch.
+if(USE_ACL)
+    file(GLOB ACL_RELAY_CONTRIB_SRC src/relay/backend/contrib/acl/*.cc)
+    file(GLOB ACL_RUNTIME_MODULE src/runtime/contrib/acl/acl_runtime.cc)
+    list(APPEND COMPILER_SRCS ${ACL_RELAY_CONTRIB_SRC})
+    list(APPEND COMPILER_SRCS ${ACL_RUNTIME_MODULE})
+    message(STATUS "Build with ACL support...")
+endif()
+
+if(USE_ACL_GRAPH_RUNTIME)
+    set(ACL_PATH ${CMAKE_CURRENT_SOURCE_DIR}/acl)
+    # Detect custom ACL path.
+    if (NOT USE_ACL_GRAPH_RUNTIME STREQUAL "ON")
+        set(ACL_PATH ${USE_ACL_GRAPH_RUNTIME})
+    endif()
+
+    file(GLOB ACL_CONTRIB_SRC src/runtime/contrib/acl/*)
+    file(GLOB ACL_API src/relay/backend/contrib/acl/acl_api.cc)
+
+    set(ACL_INCLUDE_DIRS ${ACL_PATH}/include ${ACL_PATH})
+    include_directories(${ACL_INCLUDE_DIRS})
+
+    find_library(EXTERN_ACL_COMPUTE_LIB
+            NAMES arm_compute libarm_compute
+            HINTS "${ACL_PATH}" "${ACL_PATH}/lib" "${ACL_PATH}/build"
+            )
+    find_library(EXTERN_ACL_COMPUTE_CORE_LIB
+            NAMES arm_compute_core libarm_compute_core
+            HINTS "${ACL_PATH}" "${ACL_PATH}/lib" "${ACL_PATH}/build"
+            )
+    find_library(EXTERN_ACL_COMPUTE_GRAPH_LIB
+            NAMES arm_compute_graph libarm_compute_graph
+            HINTS "${ACL_PATH}" "${ACL_PATH}/lib" "${ACL_PATH}/build"
+            )
+
+    list(APPEND TVM_RUNTIME_LINKER_LIBS ${EXTERN_ACL_COMPUTE_LIB})
+    list(APPEND TVM_RUNTIME_LINKER_LIBS ${EXTERN_ACL_COMPUTE_CORE_LIB})
+    list(APPEND TVM_RUNTIME_LINKER_LIBS ${EXTERN_ACL_COMPUTE_GRAPH_LIB})
+    list(APPEND RUNTIME_SRCS ${ACL_CONTRIB_SRC})
+    list(APPEND RUNTIME_SRCS ${ACL_API})
+    message(STATUS "Build with ACL graph runtime support: "
+            ${EXTERN_ACL_COMPUTE_LIB} ", \n"
+            ${EXTERN_ACL_COMPUTE_CORE_LIB} ", \n"
+            ${EXTERN_ACL_COMPUTE_GRAPH_LIB})
+
+    # Set flag to detect ACL graph runtime support.
+    add_definitions(-DTVM_GRAPH_RUNTIME_ACL)
+endif()
diff --git a/python/tvm/relay/op/contrib/__init__.py b/python/tvm/relay/op/contrib/__init__.py
index 0e1b4b024a5aa..fad7183d92987 100644
--- a/python/tvm/relay/op/contrib/__init__.py
+++ b/python/tvm/relay/op/contrib/__init__.py
@@ -18,5 +18,6 @@
 """Contrib modules."""
 from .register import get_pattern_table, register_pattern_table
 
+from .acl import *
 from .dnnl import *
 from .coreml import *
diff --git a/python/tvm/relay/op/contrib/acl.py b/python/tvm/relay/op/contrib/acl.py
new file mode 100644
index 0000000000000..8207575460450
--- /dev/null
+++ b/python/tvm/relay/op/contrib/acl.py
@@ -0,0 +1,125 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# pylint: disable=invalid-name, unused-argument
+"""ACL library supported operators."""
+import tvm
+from tvm.relay import transform
+from tvm.relay.build_module import bind_params_by_name
+
+from ...dataflow_pattern import wildcard, is_op, is_constant
+from .register import register_pattern_table
+
+
+def is_acl_runtime_present():
+    """Check if the ACL graph runtime is present.
+
+    Returns
+    -------
+    ret: bool
+        True if present, False if not.
+    """
+    return tvm.get_global_func("relay.op.is_acl_runtime_enabled", True)
+
+
+def partition_for_acl(mod, params=None):
+    """Partition the graph greedily offloading supported
+    operators to ACL.
+
+    Parameters
+    ----------
+    mod : Module
+        The module to run passes on.
+    params : dict[str, NDArray]
+        Constant input parameters.
+
+    Returns
+    -------
+    ret : annotated and partitioned module.
+    """
+    if params:
+        mod['main'] = bind_params_by_name(mod['main'], params)
+
+    seq = tvm.transform.Sequential([transform.MergeComposite(pattern_table()),
+                                    transform.AnnotateTarget('acl'),
+                                    transform.PartitionGraph()])
+
+    return seq(mod)
+
+
+@register_pattern_table("acl")
+def pattern_table():
+    """Get the ACL pattern table."""
+
+    def conv_pattern():
+        """Create a convolution pattern.
+
+        Returns
+        -------
+        pattern : dataflow_pattern.AltPattern
+            Denotes the convolution pattern.
+        """
+        pattern = is_op('nn.pad')(wildcard()) | wildcard()
+        pattern = is_op('nn.conv2d')(pattern, is_constant())
+        pattern = pattern.optional(lambda x: is_op('nn.bias_add')(x, is_constant()))
+        pattern = pattern.optional(is_op('nn.relu'))
+        return pattern
+
+    def check_conv(extract):
+        """Check conv pattern is supported by ACL."""
+        call = extract
+        while call.op.name != "nn.conv2d":
+            call = call.args[0]
+        return conv2d(call.attrs, call.args)
+
+    return [('acl.conv2d', conv_pattern(), check_conv)]
+
+
+def _register_external_op_helper(op_name, supported=True):
+    @tvm.ir.register_op_attr(op_name, "target.acl")
+    def _func_wrapper(attrs, args):
+        return supported
+
+    return _func_wrapper
+
+
+_register_external_op_helper("reshape")
+
+
+@tvm.ir.register_op_attr("nn.conv2d", "target.acl")
+def conv2d(attrs, args):
+    """Check if the external ACL codegen for conv2d should be used."""
+
+    # ACL only supports group size of 1
+    if attrs.groups != 1:
+        return False
+
+    # ACL only supports NHWC layout
+    if attrs.data_layout != "NHWC":
+        return False
+
+    return True
+
+
+@tvm.ir.register_op_attr("nn.max_pool2d", "target.acl")
+def max_pool2d(attrs, args):
+    """Check if the external ACL codegen for maxpool2d should be used."""
+
+    # ACL only supports NHWC layout
+    if attrs.layout != "NHWC":
+        return False
+
+    return True
diff --git a/src/relay/backend/contrib/acl/README.md b/src/relay/backend/contrib/acl/README.md
new file mode 100644
index 0000000000000..111f64c2c1f28
--- /dev/null
+++ b/src/relay/backend/contrib/acl/README.md
@@ -0,0 +1,111 @@
+<!---
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+# Relay Arm&reg; Compute Library Integration
+Arm Compute Library (ACL) is an open source project that provides accelerated kernels for Arm CPU's
+and GPU's. Currently the integration offloads operators to ACL to use hand-crafted assembler
+routines in the library. By offloading select operators from a relay graph to ACL we can achieve
+a performance boost on such devices.
+
+## Building with ACL support
+The current implementation has two separate build options in cmake. The reason for this split is
+because ACL cannot be used on an x86 machine. However, we still want to be able compile an ACL
+runtime module on an x86 machine.
+
+* USE_ACL=ON/OFF - Enabling this flag will add support for compiling an ACL runtime module.
+* USE_GRAPH_RUNTIME_ACL=ON/OFF/path-to-acl - Enabling this flag will allow the graph runtime to
+compute the ACL offloaded functions.
+
+These flags can be used in different scenarios depending on your setup. For example, if you want
+to compile ACL on an x86 machine and then run the module on a remote Arm device via RPC, you will
+need to use USE_ACL=ON on the x86 machine and USE_GRAPH_RUNTIME_ACL=ON on the remote AArch64
+device.
+## Usage
+_Note:_ this may not stay up-to-date with changes to the API.
+1. Create a relay graph. This may be a single operator or a whole graph. The intention is that any
+relay graph can be input. The ACL integration will only pick supported operators to be offloaded
+whilst the rest will be computed via TVM. (For this example we will use a single
+max_pool2d operator).
+    ```
+    import tvm
+    from tvm import relay
+
+    data_type = "float32"
+    data_shape = (1, 14, 14, 512)
+    strides = (2, 2)
+    padding = (0, 0, 0, 0)
+    pool_size = (2, 2)
+    layout = "NHWC"
+    output_shape = (1, 7, 7, 512)
+
+    data = relay.var('data', shape=data_shape, dtype=data_type)
+    out = relay.nn.max_pool2d(data, pool_size=pool_size, strides=strides,
+                              layout=layout, padding=padding)
+    module = tvm.IRModule.from_expr(out)
+    ```
+2. Annotate and partition the graph for ACL.
+    ```
+    module = relay.transform.AnnotateTarget("acl")(module)
+    module = relay.transform.PartitionGraph()(module)
+    ```
+3. Build the Relay graph.
+    ```
+    target = "llvm -target=aarch64-linux-gnu -mattr=+neon"
+    with relay.build_config(opt_level=3, disabled_pass=["AlterOpLayout"]):
+            json, lib, params = relay.build(module, target=target)
+    ```
+4. Export the module.
+    ```
+    lib_path = '~/lib_acl.so'
+    cross_compile = 'aarch64-linux-gnu-c++'
+    lib.export_library(lib_path, cc=cross_compile)
+    ```
+ 5. Run Inference. This must be on an Arm device. If compiling on x86 device and running on aarch64
+ consider using the RPC mechanism.
+    ```
+    tvm.runtime.load_module('lib_acl.so')
+    gen_module = tvm.contrib.graph_runtime.create(json, lib, ctx)
+
+    d_data = np.random.uniform(0, 1, data_shape).astype(data_type)
+    map_inputs = {'data': d_data}
+    gen_module.map_inputs(**map_inputs)
+    gen_module.run()
+    ```
+
+## More examples
+The example above only shows a basic example of how ACL can be used for offloading a single
+Maxpool2D. If you would like to see more examples for each implemented operator and for
+networks refer to the tests: `tests/python/contrib/test_acl`. Here you can modify
+`infrastructure.py` to use the remote device you have setup.
+
+## Adding a new operator
+Adding a new operator requires changes to a series of places. This section will give a hint on
+what needs to be changed and where, it will not however dive into the complexities for an
+individual operator. This is left to the developer.
+
+There are a series of files we need to make changes to:
+* `python/relay/op/contrib/acl.py` In this file we define the operators we wish to offload using the
+`op.register` decorator. This will mean the annotation pass recognizes this operator as ACL
+offloadable.
+* `src/relay/backend/contrib/acl/codegen_acl.h` Implement `Make[OpName]` method. This is where we
+declare how the operator should be represented by JSON. This will be used to create the ACL module.
+* `src/runtime/contrib/acl/acl_kernel.h` Implement `Create[OpName]Layer` method. This is where we
+define how the JSON representation can be used to create an ACL function. We simply define how to
+translate from the JSON representation to ACL API.
+* `tests/python/contrib/test_acl` Add unit tests for the given operator.
diff --git a/src/relay/backend/contrib/acl/acl_api.cc b/src/relay/backend/contrib/acl/acl_api.cc
new file mode 100644
index 0000000000000..5e3aa9c5679ef
--- /dev/null
+++ b/src/relay/backend/contrib/acl/acl_api.cc
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file src/relay/backend/contrib/acl/acl_api.cc
+ * \brief A common JSON interface between relay and the ACL runtime module.
+ */
+
+#include "acl_api.h"
+
+namespace tvm {
+namespace relay {
+namespace contrib {
+namespace acl {
+
+std::pair<JSONSubGraph, std::vector<runtime::NDArray>> DeserializeSubgraph(
+    std::string* serialized_function) {
+  dmlc::MemoryStringStream mstrm(serialized_function);
+  dmlc::Stream* strm = &mstrm;
+  std::string serialized_json;
+  strm->Read(&serialized_json);
+  std::istringstream is(serialized_json);
+  dmlc::JSONReader reader(&is);
+  JSONSubGraph function;
+  function.Load(&reader);
+  std::vector<runtime::NDArray> constants;
+  size_t const_count;
+  strm->Read(&const_count);
+  for (size_t i = 0; i < const_count; i++) {
+    runtime::NDArray temp;
+    temp.Load(strm);
+    constants.push_back(temp);
+  }
+  return std::make_pair(function, constants);
+}
+
+std::string SerializeSubgraph(const JSONSubGraph& subgraph,
+                              const std::vector<runtime::NDArray>& constants) {
+  std::ostringstream os;
+  dmlc::JSONWriter writer(&os);
+  subgraph.Save(&writer);
+  std::string serialized_subgraph;
+  dmlc::MemoryStringStream mstrm(&serialized_subgraph);
+  dmlc::Stream* strm = &mstrm;
+  strm->Write(os.str());
+  strm->Write(constants.size());
+  for (const auto& it : constants) {
+    it.Save(strm);
+  }
+  return serialized_subgraph;
+}
+
+}  // namespace acl
+}  // namespace contrib
+}  // namespace relay
+}  // namespace tvm
diff --git a/src/relay/backend/contrib/acl/acl_api.h b/src/relay/backend/contrib/acl/acl_api.h
new file mode 100644
index 0000000000000..60ea03e5b3fe4
--- /dev/null
+++ b/src/relay/backend/contrib/acl/acl_api.h
@@ -0,0 +1,172 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file src/relay/backend/contrib/acl/acl_api.h
+ * \brief A common JSON interface between relay and the ACL runtime module.
+ */
+
+#ifndef TVM_RELAY_BACKEND_CONTRIB_ACL_ACL_API_H_
+#define TVM_RELAY_BACKEND_CONTRIB_ACL_ACL_API_H_
+
+#include <dmlc/json.h>
+#include <dmlc/memory_io.h>
+#include <tvm/runtime/ndarray.h>
+
+#include <string>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+namespace tvm {
+namespace relay {
+namespace contrib {
+namespace acl {
+
+DMLC_JSON_ENABLE_ANY(std::vector<int>, IntVector);
+DMLC_JSON_ENABLE_ANY(int, Int);
+DMLC_JSON_ENABLE_ANY(size_t, Size_t);
+DMLC_JSON_ENABLE_ANY(std::string, String);
+
+/*!
+ * JSON interface for ACL tensor.
+ */
+class JSONTensor {
+ public:
+  JSONTensor() = default;
+  explicit JSONTensor(std::vector<int> shape) : type("var"), shape(std::move(shape)) {}
+
+  JSONTensor(std::string type, std::vector<int> shape)
+      : type(std::move(type)), shape(std::move(shape)) {}
+
+  void Save(dmlc::JSONWriter* writer) const {
+    writer->BeginObject();
+    writer->WriteObjectKeyValue("type", type);
+    writer->WriteObjectKeyValue("shape", shape);
+    writer->EndObject();
+  }
+
+  void Load(dmlc::JSONReader* reader) {
+    dmlc::JSONObjectReadHelper helper;
+    helper.DeclareField("type", &type);
+    helper.DeclareField("shape", &shape);
+    helper.ReadAllFields(reader);
+  }
+
+  /*! \brief The type of the tensor var/const. */
+  std::string type;
+  /*! \brief The shape of the tensor. */
+  std::vector<int> shape;
+};
+
+/*!
+ * JSON interface for an ACL operator.
+ */
+class JSONOp {
+ public:
+  JSONOp() = default;
+  explicit JSONOp(std::string name) : name(std::move(name)) {}
+
+  void Save(dmlc::JSONWriter* writer) const {
+    auto op_attrs = attrs;
+    op_attrs["num_inputs"] = dmlc::any(inputs.size());
+    op_attrs["num_outputs"] = dmlc::any(outputs.size());
+    writer->BeginObject();
+    writer->WriteObjectKeyValue("name", name);
+    writer->WriteObjectKeyValue("inputs", inputs);
+    writer->WriteObjectKeyValue("outputs", outputs);
+    writer->WriteObjectKeyValue("attrs", op_attrs);
+    writer->EndObject();
+  }
+
+  void Load(dmlc::JSONReader* reader) {
+    dmlc::JSONObjectReadHelper helper;
+    helper.DeclareField("name", &name);
+    helper.DeclareField("inputs", &inputs);
+    helper.DeclareField("outputs", &outputs);
+    helper.DeclareField("attrs", &attrs);
+    helper.ReadAllFields(reader);
+  }
+
+  /*! The name of the operator. */
+  std::string name;
+  /*! The required variable inputs to the operator. */
+  std::vector<JSONTensor> inputs;
+  /*! The required outputs to the operator. */
+  std::vector<JSONTensor> outputs;
+  /*! The attributes of the operator e.g. padding, strides, etc. */
+  std::unordered_map<std::string, dmlc::any> attrs;
+};
+
+/*!
+ * JSON interface for a series of ACL ops.
+ */
+class JSONSubGraph {
+ public:
+  JSONSubGraph() = default;
+  explicit JSONSubGraph(JSONOp op) : op(std::move(op)) {}
+
+  void Save(dmlc::JSONWriter* writer) const {
+    writer->BeginObject();
+    writer->WriteObjectKeyValue("node", op);
+    writer->EndObject();
+  }
+
+  void Load(dmlc::JSONReader* reader) {
+    dmlc::JSONObjectReadHelper helper;
+    helper.DeclareField("node", &op);
+    helper.ReadAllFields(reader);
+  }
+
+  /*! \brief JSON op to be serialized. */
+  JSONOp op;
+};
+
+/*!
+ * \brief Deserialize a function (or subgraph). The function is serialized in the
+ * format: Serialized JSON (using dmlc::JSONWriter), number of constants, serialized
+ * NDArray constants.
+ *
+ * \param serialized_function Pointer to a serialized function (or subgraph).
+ * \return A pair consisting of deserialized json subgraph object and deserialized
+ * NDArray.
+ */
+std::pair<JSONSubGraph, std::vector<runtime::NDArray>> DeserializeSubgraph(
+    std::string* serialized_function);
+
+/*!
+ * \brief Serialize a single subgraph which can be saved to disk.
+ *
+ * A subgraph is serialized so that the output is as follows:
+ * - Serialized JSON.
+ * - Number of constant tensors.
+ * - Serialized constant tensors.
+ *
+ * \param subgraph JSON subgraph representation.
+ * \constants Serialized JSON constants.
+ */
+std::string SerializeSubgraph(const JSONSubGraph& subgraph,
+                              const std::vector<runtime::NDArray>& constants);
+
+}  // namespace acl
+}  // namespace contrib
+}  // namespace relay
+}  // namespace tvm
+
+#endif  // TVM_RELAY_BACKEND_CONTRIB_ACL_ACL_API_H_
diff --git a/src/relay/backend/contrib/acl/codegen.cc b/src/relay/backend/contrib/acl/codegen.cc
new file mode 100644
index 0000000000000..1c61a6b09fce4
--- /dev/null
+++ b/src/relay/backend/contrib/acl/codegen.cc
@@ -0,0 +1,287 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file src/relay/backend/contrib/acl/codegen_acl.cc
+ * \brief Implementation of the Relay -> ACL JSON schema compiler.
+ */
+#include <tvm/relay/attrs/nn.h>
+#include <tvm/relay/type.h>
+
+#include "../../utils.h"
+#include "codegen_acl.h"
+
+namespace tvm {
+namespace relay {
+namespace contrib {
+namespace acl {
+
+void CodegenACL::VisitLeaf(const Expr& expr) {
+  if (expr->IsInstance<ConstantNode>()) {
+    const auto* constant_node = expr.as<ConstantNode>();
+    this->constants_.push_back(constant_node->data);
+  } else if (!expr->IsInstance<FunctionNode>()) {
+    // Don't enter functions
+    MixedModeVisitor::VisitLeaf(expr);
+  }
+}
+
+void CodegenACL::VisitExpr_(const CallNode* node) {
+  Call call = GetRef<Call>(node);
+  if (this->layer_table_.find(call) == this->layer_table_.end()) {
+    for (const auto& arg : call->args) {
+      this->VisitExpr(arg);
+    }
+    // Determine call -> ACL mapping
+    JSONOp layer;
+    if (IsAclFunc(node, "acl.conv2d") || backend::IsOp(node, "nn.conv2d")) {
+      layer = MakeConvolutionOp(call);
+    } else if (backend::IsOp(node, "nn.max_pool2d")) {
+      layer = MakeMaxPool2DOp(call);
+    } else if (backend::IsOp(node, "reshape")) {
+      layer = MakeReshapeOp(call);
+    } else {
+      LOG(FATAL) << "Unsupported op: " << AsText(node->op, false);
+    }
+    this->layer_table_[call] = layer;
+  }
+}
+
+runtime::Module CodegenACL::CreateRuntimeModule(const ObjectRef& ref) {
+  std::vector<std::pair<std::string, std::string>> serialized_functions;
+  if (ref->IsInstance<FunctionNode>()) {
+    IRModule mod;
+    Function func = Downcast<Function>(ref);
+    auto name_node = func->GetAttr<String>(tvm::attr::kGlobalSymbol);
+    CHECK(name_node.defined()) << "Failed to retrieve external symbol";
+    mod->Add(GlobalVar(name_node.value()), func);
+    mod = this->PreProcessModule(mod);
+    for (const auto& it : mod->functions) {
+      this->SerializeFunction(it.second, &serialized_functions);
+    }
+  } else {
+    LOG(FATAL) << "The input ref is expected to be a Relay function.";
+  }
+  std::string data;
+  dmlc::MemoryStringStream fs(&data);
+  dmlc::SeekStream* strm = &fs;
+  strm->Write(serialized_functions.size());
+  for (const auto& it : serialized_functions) {
+    strm->Write(it.first);
+    strm->Write(it.second);
+  }
+  strm->Seek(0);
+  std::string make_acl_module = "runtime.module.loadbinary_acl";
+  auto pf = tvm::runtime::Registry::Get(make_acl_module);
+  if (pf) {
+    return (*pf)(strm);
+  } else {
+    return runtime::Module();
+  }
+}
+
+JSONSubGraph CodegenACL::CreateJSONSubgraph(const Function& func) {
+  Expr body = func->body;
+  this->layer_table_.clear();
+  this->constants_.clear();
+  this->VisitExpr(body);
+  std::vector<JSONOp> ops;
+  for (const auto& it : this->layer_table_) {
+    ops.push_back(it.second);
+  }
+  CHECK_EQ(layer_table_.size(), 1) << "ACL codegen expects only a single op per function.";
+  return JSONSubGraph(ops[0]);
+}
+
+void CodegenACL::SerializeFunction(
+    const ObjectRef& ref, std::vector<std::pair<std::string, std::string>>* serialized_functions) {
+  Function func = Downcast<Function>(ref);
+  JSONSubGraph subgraph = this->CreateJSONSubgraph(func);
+  const auto name_node = func->GetAttr<String>(tvm::attr::kGlobalSymbol);
+  CHECK(name_node != "") << "Fail to retrieve external symbol";
+  std::string serialized_pair = SerializeSubgraph(subgraph, this->constants_);
+  serialized_functions->emplace_back(name_node.value(), serialized_pair);
+}
+
+IRModule CodegenACL::PreProcessModule(const IRModule& mod) {
+  IRModule preprocessed_module;
+  tvm::Map<String, Array<String>> desired_layouts = {
+      {"nn.conv2d", {String("NHWC"), String("OHWI")}}};
+  preprocessed_module = transform::ConvertLayout(desired_layouts)(mod);
+  preprocessed_module = transform::FoldConstant()(preprocessed_module);
+  return preprocessed_module;
+}
+
+JSONOp CodegenACL::MakeConvolutionOp(const Call& call) {
+  JSONOp op("conv2d");
+  const CallNode* pad = nullptr;
+  const CallNode* conv;
+  const CallNode* bias = nullptr;
+  bool has_activation = false;
+  if (call->op->IsInstance<FunctionNode>()) {
+    Expr composite_conv = GetCompositeExpr(call);
+    // Unpack composite function
+    const auto* current_call = composite_conv.as<CallNode>();
+    if (backend::IsOp(current_call, "nn.relu")) {
+      has_activation = true;
+      current_call = current_call->args[0].as<CallNode>();
+    }
+    if (backend::IsOp(current_call, "nn.bias_add")) {
+      bias = current_call;
+      current_call = current_call->args[0].as<CallNode>();
+    }
+    CHECK(backend::IsOp(current_call, "nn.conv2d"));
+    conv = current_call;
+    if (!current_call->args.empty() && current_call->args[0]->IsInstance<CallNode>()) {
+      current_call = current_call->args[0].as<CallNode>();
+      if (backend::IsOp(current_call, "nn.pad")) {
+        pad = current_call;
+      }
+    }
+  } else {
+    conv = call.as<CallNode>();
+  }
+  const auto* conv_attr = conv->attrs.as<Conv2DAttrs>();
+  CHECK(conv_attr);
+  CHECK(conv_attr->kernel_layout == "OHWI")
+      << "Kernel layout must be OHWI, has the module been pre-processed correctly?";
+  if (pad) {
+    op.inputs.push_back(MakeJSONTensor(pad->args[0]));
+  } else {
+    op.inputs.push_back(MakeJSONTensor(conv->args[0]));
+  }
+  op.inputs.push_back(MakeJSONConstTensor(conv->args[1]));
+  op.outputs.push_back(MakeJSONTensor(GetRef<Expr>(conv)));
+  if (bias) {
+    op.inputs.push_back(MakeJSONConstTensor(bias->args[1]));
+  }
+  // It seems there are two different methods for padding a convolution:
+  // - using nn.pad operator before convolution
+  // - using conv2d_attrs to add padding
+  //
+  // Cover both cases here.
+  std::vector<int> padding;
+  if (pad) {
+    const auto* pad_attr = pad->attrs.as<PadAttrs>();
+    CHECK(pad_attr);
+    padding = GetPadVector(pad_attr->pad_width);
+  } else {
+    padding = GetPadVector(conv_attr->padding);
+  }
+  op.attrs["padding"] = padding;
+  op.attrs["groups"] = conv_attr->groups;
+  op.attrs["strides"] = ToVector(conv_attr->strides);
+  if (has_activation) op.attrs["activation_type"] = std::string("relu");
+  return op;
+}
+
+JSONOp CodegenACL::MakeMaxPool2DOp(const Call& call) {
+  JSONOp op("max_pool");
+  const auto* attr = call->attrs.as<MaxPool2DAttrs>();
+  CHECK(attr);
+  op.inputs.push_back(MakeJSONTensor(call->args[0]));
+  op.outputs.push_back(MakeJSONTensor(call));
+  op.attrs["padding"] = GetPadVector(attr->padding);
+  op.attrs["strides"] = ToVector(attr->strides);
+  op.attrs["pooling_type"] = std::string("max");
+  op.attrs["pool_size"] = ToVector(attr->pool_size);
+  return op;
+}
+
+JSONOp CodegenACL::MakeReshapeOp(const Call& call) {
+  JSONOp op("reshape");
+  const auto* attr = call->attrs.as<ReshapeAttrs>();
+  CHECK(attr);
+  op.inputs.push_back(MakeJSONTensor(call->args[0]));
+  op.outputs.push_back(MakeJSONTensor(call));
+  return op;
+}
+
+JSONTensor CodegenACL::MakeJSONTensor(const Expr& expr) {
+  const auto* ttnode = expr->checked_type().as<TensorTypeNode>();
+  CHECK(ttnode);
+  std::vector<int> shape = ToVector(ttnode->shape);
+  return JSONTensor("var", shape);
+}
+
+JSONTensor CodegenACL::MakeJSONConstTensor(const Expr& expr) {
+  const auto* ttnode = expr->checked_type().as<TensorTypeNode>();
+  CHECK(ttnode);
+  std::vector<int> shape = ToVector(ttnode->shape);
+  VisitExpr(expr);
+  return JSONTensor("const", shape);
+}
+
+bool CodegenACL::IsAclFunc(const CallNode* call, const std::string& op_name) const {
+  if (call->op->IsInstance<FunctionNode>()) {
+    Function func = Downcast<Function>(call->op);
+    CHECK(func.defined());
+    auto name_node = func->GetAttr<String>(attr::kComposite);
+    return name_node.value() == op_name;
+  }
+  return false;
+}
+
+Expr CodegenACL::GetCompositeExpr(const Call& call) {
+  Function composite_function = Downcast<Function>(call->op);
+  Expr composite_expr = composite_function->body;
+  CHECK(composite_expr->IsInstance<CallNode>());
+  return composite_expr;
+}
+
+std::vector<int> CodegenACL::ToVector(const Array<IndexExpr>& array) {
+  std::vector<int> stl_vector;
+  for (auto it : array) {
+    const auto* val = it.as<IntImmNode>();
+    CHECK(val);
+    stl_vector.push_back(val->value);
+  }
+  return stl_vector;
+}
+
+std::vector<int> CodegenACL::GetPadVector(const Array<Array<IndexExpr>>& pad) {
+  // TVM nn.pad: top, bottom, left, right -> ACL Pad: left, right, top, bottom
+  auto acl_pad = {pad[2][0], pad[2][1], pad[1][0], pad[1][1]};
+  return ToVector(acl_pad);
+}
+
+std::vector<int> CodegenACL::GetPadVector(const Array<IndexExpr>& pad) {
+  Array<IndexExpr> acl_pad;
+  switch (pad.size()) {
+    case 1:
+      acl_pad = {pad[0], pad[0], pad[0], pad[0]};
+      break;
+    case 2:
+      // TVM Pad: height, width -> ACL Pad: left, right, top, bottom
+      acl_pad = {pad[1], pad[1], pad[0], pad[0]};
+      break;
+    case 4:
+      // TVM Pad: top, left, bottom, right -> ACL Pad: left, right, top, bottom
+      acl_pad = {pad[1], pad[3], pad[0], pad[2]};
+      break;
+    default:
+      LOG(FATAL) << "Unsupported padding dimensions";
+  }
+  return ToVector(acl_pad);
+}
+
+}  // namespace acl
+}  // namespace contrib
+}  // namespace relay
+}  // namespace tvm
diff --git a/src/relay/backend/contrib/acl/codegen_acl.h b/src/relay/backend/contrib/acl/codegen_acl.h
new file mode 100644
index 0000000000000..23efb09521b2a
--- /dev/null
+++ b/src/relay/backend/contrib/acl/codegen_acl.h
@@ -0,0 +1,198 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file src/relay/backend/contrib/acl/codegen_acl.h
+ * \brief The Relay -> ACL JSON schema compiler.
+ */
+
+#ifndef TVM_RELAY_BACKEND_CONTRIB_ACL_CODEGEN_ACL_H_
+#define TVM_RELAY_BACKEND_CONTRIB_ACL_CODEGEN_ACL_H_
+
+#include <dmlc/json.h>
+#include <dmlc/memory_io.h>
+#include <tvm/relay/expr_functor.h>
+
+#include <map>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "acl_api.h"
+
+namespace tvm {
+namespace relay {
+namespace contrib {
+namespace acl {
+
+/*!
+ * \brief Generates an ACLModule from a relay expression. This "compilation"
+ * does not require ACL since the actual conversion using ACL APIs is
+ * deferred until creation of the runtime. This step simply serializes the
+ * relay program into a JSON string.
+ */
+class CodegenACL : public MixedModeVisitor {
+ public:
+  CodegenACL() = default;
+  void VisitExpr_(const CallNode* node) final;
+  void VisitLeaf(const Expr& expr) final;
+
+  /*!
+   * \brief Create a runtime module for ACL.
+   *
+   * This consists of a series of "serialized functions" which each represent a
+   * subgraph to be computed by ACL and will each be executed independently from
+   * one another. Each function consists of serialized JSON describing the subgraph
+   * and serialized constant tensors.
+   *
+   * \note The ACL runtime module only currently supports a single operator per
+   * subgraph currently.
+   *
+   * \param ref The ext_func Relay expression/module to be executed using extern ops.
+   * \return A runtime module.
+   */
+  runtime::Module CreateRuntimeModule(const ObjectRef& ref);
+
+  /*!
+   * \brief Create a JSON representation of a subgraph.
+   *
+   * \param func The function to be represented.
+   * \return A JSON representation of the function.
+   */
+  JSONSubGraph CreateJSONSubgraph(const Function& func);
+
+ private:
+  /*!
+   * \brief Serialize a single subgraph which can be saved to disk.
+   *
+   * A subgraph is serialized so that the output is as follows.
+   * - Serialized JSON.
+   * - Number of constant tensors.
+   * - Serialized constant tensors.
+   *
+   * \param ref Reference to the function to be serialized.
+   * \param serialized_functions A vector of serialized functions to add to.
+   */
+  void SerializeFunction(const ObjectRef& ref,
+                         std::vector<std::pair<std::string, std::string>>* serialized_functions);
+
+  /*!
+   * \brief Pre-process a module containing functions ready for ACL codegen.
+   *
+   * For now we enforce OHWI kernel layout and fold the transforms away.
+   *
+   * \param mod The module to be pre-processed.
+   * \return The processed module.
+   */
+  IRModule PreProcessModule(const IRModule& mod);
+
+  /*!
+   * \brief Create a JSON representation of an operator.
+   *
+   * \param call The call to be represented.
+   * \return A JSON representation of a specific operator.
+   */
+  JSONOp MakeConvolutionOp(const Call& call);
+  static JSONOp MakeMaxPool2DOp(const Call& call);
+  static JSONOp MakeReshapeOp(const Call& call);
+
+  /*!
+   * \brief Make a JSON representation of a (constant)tensor.
+   *
+   * \param expr Expression of a tensor to be represented.
+   * \return A JSON representation of a tensor.
+   */
+  static JSONTensor MakeJSONTensor(const Expr& expr);
+  JSONTensor MakeJSONConstTensor(const Expr& expr);
+
+  /*!
+   * \brief Check whether CallNode is a composite function and has the same
+   * op_name.
+   *
+   * \param call The current call node.
+   * \param op_name The expected name of the call node to check.
+   * \return True if the call node is composite and has the same name as
+   * op_name, False otherwise.
+   */
+  bool IsAclFunc(const CallNode* call, const std::string& op_name) const;
+
+  /*!
+   * \brief Get composite expression from call node.
+   *
+   * \param call The call node to get expression from.
+   * \return Expression for composite function.
+   */
+  static Expr GetCompositeExpr(const Call& call);
+
+  /*!
+   * \brief Convert a relay array to std::vector.
+   *
+   * \param array A relay array to be converted.
+   * \return std::vector.
+   */
+  static std::vector<int> ToVector(const Array<IndexExpr>& array);
+
+  /*!
+   * \brief Create a padding vector compatible with ACL.
+   *
+   * Currently TVM has many ways to pad a an operator, so each method is taken care of here.
+   *
+   * \param pad Padding array.
+   * \return ACL compatible padding vector.
+   */
+  static std::vector<int> GetPadVector(const Array<Array<IndexExpr>>& pad);
+  static std::vector<int> GetPadVector(const Array<IndexExpr>& pad);
+
+  /*! \brief A vector of constants to be serialized after the JSON representation is constructed. */
+  std::vector<runtime::NDArray> constants_;
+  /*! \brief A look-up table from Expr to JSONOp. */
+  std::map<Expr, JSONOp> layer_table_;
+};
+
+/*!
+ * \brief The external ACL compiler/codegen tool. It takes a Relay
+ * expression/module and compiles it into a runtime module.
+ */
+runtime::Module ACLCompiler(const ObjectRef& ref) {
+  CodegenACL acl_codegen;
+  return acl_codegen.CreateRuntimeModule(ref);
+}
+
+TVM_REGISTER_GLOBAL("relay.ext.acl").set_body_typed(ACLCompiler);
+
+/*!
+ * \brief Check whether ACL graph runtime is used.
+ * \return True if ACL graph runtime is enabled, False if not.
+ */
+inline constexpr bool IsACLRuntimeEnabled() {
+#if TVM_GRAPH_RUNTIME_ACL
+  return true;
+#else
+  return false;
+#endif
+}
+
+TVM_REGISTER_GLOBAL("relay.op.is_acl_runtime_enabled").set_body_typed(IsACLRuntimeEnabled);
+
+}  // namespace acl
+}  // namespace contrib
+}  // namespace relay
+}  // namespace tvm
+
+#endif  // TVM_RELAY_BACKEND_CONTRIB_ACL_CODEGEN_ACL_H_
diff --git a/src/runtime/contrib/acl/acl_allocator.cc b/src/runtime/contrib/acl/acl_allocator.cc
new file mode 100644
index 0000000000000..b72ec9552130c
--- /dev/null
+++ b/src/runtime/contrib/acl/acl_allocator.cc
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file src/runtime/contrib/acl/acl_allocator.cc
+ * \brief ACL Allocator implementation that requests memory from TVM.
+ */
+
+#include "acl_allocator.h"
+
+namespace tvm {
+namespace runtime {
+namespace contrib {
+namespace acl {
+
+void* ACLAllocator::allocate(size_t size, size_t alignment) {
+  CHECK_GT(size, 0) << "Cannot allocate size less than or equal to zero";
+  return this->device_api_->AllocWorkspace(this->ctx_, size, {});
+}
+
+void ACLAllocator::free(void* ptr) { this->device_api_->FreeWorkspace(this->ctx_, ptr); }
+
+std::unique_ptr<acl::IMemoryRegion> ACLAllocator::make_region(size_t size, size_t alignment) {
+  return acl::support::cpp14::make_unique<ACLMemoryRegion>(size, alignment);
+}
+
+ACLMemoryRegion::ACLMemoryRegion(size_t size, size_t alignment) : IMemoryRegion(size) {
+  CHECK_GT(size, 0) << "Cannot allocate size less than or equal to zero";
+  this->ptr_ = this->device_api_->AllocDataSpace(this->ctx_, size, alignment, {});
+}
+
+ACLMemoryRegion::ACLMemoryRegion(void* ptr, size_t size)
+    : IMemoryRegion(size), is_subregion_(true) {
+  if (size != 0) {
+    this->ptr_ = ptr;
+  }
+}
+
+ACLMemoryRegion::~ACLMemoryRegion() {
+  if (!is_subregion_) {
+    this->device_api_->FreeDataSpace(this->ctx_, this->ptr_);
+  }
+}
+
+std::unique_ptr<acl::IMemoryRegion> ACLMemoryRegion::extract_subregion(size_t offset, size_t size) {
+  if (this->ptr_ != nullptr && (offset < _size) && (_size - offset >= size)) {
+    return acl::support::cpp14::make_unique<ACLMemoryRegion>(
+        static_cast<uint8_t*>(this->ptr_) + offset, size);
+  } else {
+    return nullptr;
+  }
+}
+
+}  // namespace acl
+}  // namespace contrib
+}  // namespace runtime
+}  // namespace tvm
diff --git a/src/runtime/contrib/acl/acl_allocator.h b/src/runtime/contrib/acl/acl_allocator.h
new file mode 100644
index 0000000000000..d608645947891
--- /dev/null
+++ b/src/runtime/contrib/acl/acl_allocator.h
@@ -0,0 +1,141 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file src/runtime/contrib/acl/acl_allocator.h
+ * \brief ACL Allocator implementation that requests memory from TVM.
+ */
+
+#ifndef TVM_RUNTIME_CONTRIB_ACL_ACL_ALLOCATOR_H_
+#define TVM_RUNTIME_CONTRIB_ACL_ACL_ALLOCATOR_H_
+
+#include <arm_compute/runtime/IAllocator.h>
+#include <arm_compute/runtime/IMemoryRegion.h>
+#include <arm_compute/runtime/MemoryRegion.h>
+#include <support/ToolchainSupport.h>
+#include <tvm/runtime/data_type.h>
+#include <tvm/runtime/device_api.h>
+#include <tvm/runtime/registry.h>
+
+#include <memory>
+
+namespace tvm {
+namespace runtime {
+namespace contrib {
+namespace acl {
+
+namespace acl = arm_compute;
+
+/*!
+ * \brief Override ACL memory allocator and replace with TVM workspace based allocation.
+ */
+class ACLAllocator : public arm_compute::IAllocator {
+ public:
+  ACLAllocator() = default;
+
+  /*!
+   * \brief Allocate bytes to ACL runtime.
+   *
+   * Specific implementation requests memory from TVM using their device api.
+   *
+   * \param size Size to allocate.
+   * \param alignment Alignment that the returned pointer should comply with.
+   * \return A pointer to the allocated memory.
+   */
+  void* allocate(size_t size, size_t alignment) override;
+
+  /*!
+   * \brief Free memory from ACL runtime.
+   *
+   * \param ptr Pointer to workspace to free.
+   */
+  void free(void* ptr) override;
+
+  /*!
+   * \brief Create self-managed memory region.
+   *
+   * \param size Size of the memory region.
+   * \param alignment Alignment of the memory region.
+   * \return The memory region object.
+   */
+  std::unique_ptr<acl::IMemoryRegion> make_region(size_t size, size_t alignment) override;
+
+ private:
+  /*! \brief Always allocate data in the context of the current CPU. */
+  const TVMContext ctx_{kDLCPU, 0};
+  /*! \brief Device API which allows requests for memory from TVM. */
+  runtime::DeviceAPI* device_api_ = runtime::DeviceAPI::Get(ctx_);
+};
+
+/*!
+ * \brief Memory region that can request TVM memory for ACL to use.
+ */
+class ACLMemoryRegion : public arm_compute::IMemoryRegion {
+ public:
+  ACLMemoryRegion(size_t size, size_t alignment);
+  ACLMemoryRegion(void* ptr, size_t size);
+
+  ~ACLMemoryRegion() override;
+
+  /*! \brief Prevent instances of this class from being copied (As this class contains
+   * pointers). */
+  ACLMemoryRegion(const ACLMemoryRegion&) = delete;
+  /*! \brief Default move constructor. */
+  ACLMemoryRegion(ACLMemoryRegion&&) = default;
+  /*! \brief Prevent instances of this class from being copied (As this class
+   * contains pointers) */
+  ACLMemoryRegion& operator=(const ACLMemoryRegion&) = delete;
+  /*! Default move assignment operator. */
+  ACLMemoryRegion& operator=(ACLMemoryRegion&&) = default;
+
+  void* buffer() override { return this->ptr_; }
+
+  const void* buffer() const override { return this->ptr_; }
+
+  /*!
+   * \brief Extract a sub-region from the memory.
+   *
+   * \warning Ownership is maintained by the parent memory,
+   *          while a wrapped raw memory region is returned by this function.
+   *          Thus parent memory should not be released before this.
+   *
+   * \param offset Offset to the region.
+   * \param size Size of the region.
+   * \return A wrapped memory sub-region with no ownership of the
+   * underlying memory.
+   */
+  std::unique_ptr<acl::IMemoryRegion> extract_subregion(size_t offset, size_t size) override;
+
+ private:
+  /*! \brief Points to a region of memory allocated by TVM. */
+  void* ptr_;
+  /*! \brief A subregion doesn't manage TVM memory so we don't need to free it. */
+  bool is_subregion_ = false;
+  /*! \brief Always allocate data in the context of the current CPU. */
+  const TVMContext ctx_{kDLCPU, 0};
+  /*! \brief Device API which allows requests for memory from TVM. */
+  runtime::DeviceAPI* device_api_ = runtime::DeviceAPI::Get(ctx_);
+};
+
+}  // namespace acl
+}  // namespace contrib
+}  // namespace runtime
+}  // namespace tvm
+
+#endif  // TVM_RUNTIME_CONTRIB_ACL_ACL_ALLOCATOR_H_
diff --git a/src/runtime/contrib/acl/acl_kernel.cc b/src/runtime/contrib/acl/acl_kernel.cc
new file mode 100644
index 0000000000000..a87b1b525e2e5
--- /dev/null
+++ b/src/runtime/contrib/acl/acl_kernel.cc
@@ -0,0 +1,157 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file src/runtime/contrib/acl/acl_kernel.cc
+ * \brief TVM compatible wrappers for ACL kernels.
+ */
+
+#include "acl_kernel.h"
+
+#include <arm_compute/core/Types.h>
+#include <arm_compute/runtime/NEON/functions/NEConvolutionLayer.h>
+#include <arm_compute/runtime/NEON/functions/NEPoolingLayer.h>
+#include <arm_compute/runtime/NEON/functions/NEReshapeLayer.h>
+#include <tvm/runtime/data_type.h>
+#include <tvm/runtime/registry.h>
+
+#include <memory>
+#include <string>
+
+namespace tvm {
+namespace runtime {
+namespace contrib {
+namespace acl {
+
+CachedLayer::CachedLayer(const api::JSONSubGraph& function, const std::vector<NDArray>& constants,
+                         ACLAllocator* allocator,
+                         const std::shared_ptr<acl::MemoryManagerOnDemand>& mm)
+    : constants_(constants), allocator_(allocator) {
+  api::JSONOp op = function.op;
+  // Make tensors
+  int const_tensor_idx = 0;
+  for (const auto& it : op.inputs) {
+    if (it.type == "const") {
+      this->function_.const_inputs.push_back(MakeTensor(it, constants[const_tensor_idx++]->data));
+    } else if (it.type == "var") {
+      this->function_.inputs.push_back(MakeTensor(it));
+    } else {
+      LOG(FATAL) << "Unsupported tensor type";
+    }
+  }
+  for (const auto& it : op.outputs) {
+    this->function_.outputs.push_back(MakeTensor(it));
+  }
+  // Create layer
+  if (op.name == "conv2d") {
+    CreateConvolution2DLayer(&this->function_, function.op, mm);
+    this->is_mm_ = true;
+  } else if (op.name == "max_pool") {
+    CreateMaxPoolLayer(&this->function_, function.op);
+  } else if (op.name == "reshape") {
+    CreateReshapeLayer(&this->function_, function.op);
+  } else {
+    LOG(FATAL) << "Operator not yet supported";
+  }
+  // Prepare function
+  this->function_.function->prepare();
+}
+
+bool CachedLayer::Inference(const std::vector<DLTensor*>& inputs,
+                            const std::vector<DLTensor*>& outputs) {
+  for (size_t i = 0; i < inputs.size(); i++) {
+    CheckACLError(function_.inputs[i].allocator()->import_memory(inputs[i]->data));
+  }
+  for (size_t i = 0; i < outputs.size(); i++) {
+    CheckACLError(function_.outputs[i].allocator()->import_memory(outputs[i]->data));
+  }
+
+  this->function_.function->run();
+  return true;
+}
+
+size_t CachedLayer::GetNumInputs() const { return this->function_.inputs.size(); }
+
+void CachedLayer::CreateConvolution2DLayer(CacheItems* cache, const api::JSONOp& params,
+                                           const std::shared_ptr<acl::MemoryManagerOnDemand>& mm) {
+  auto padding = dmlc::get<std::vector<int>>(params.attrs.at("padding"));
+  auto strides = dmlc::get<std::vector<int>>(params.attrs.at("strides"));
+  auto groups = dmlc::get<int>(params.attrs.at("groups"));
+
+  CHECK(groups == 1) << "ACL NEON Convolution only supports group size of 1";
+
+  acl::PadStrideInfo pad_stride_info =
+      acl::PadStrideInfo(strides[0], strides[1], padding[0], padding[1], padding[2], padding[3],
+                         acl::DimensionRoundingType::FLOOR);
+  acl::ActivationLayerInfo act_info = acl::ActivationLayerInfo();
+  if (params.attrs.find("activation_type") != params.attrs.end()) {
+    auto activation_function = dmlc::get<std::string>(params.attrs.at("activation_type"));
+
+    if (activation_function == "relu") {
+      act_info = acl::ActivationLayerInfo(acl::ActivationLayerInfo::ActivationFunction::RELU);
+    } else {
+      LOG(FATAL) << "Unsupported activation function";
+    }
+  }
+
+  auto function = std::make_shared<acl::NEConvolutionLayer>(mm);
+  function->configure(&cache->inputs[0], &cache->const_inputs[0],
+                      cache->const_inputs.size() > 1 ? &cache->const_inputs[1] : nullptr,
+                      &cache->outputs[0], pad_stride_info, acl::WeightsInfo(), acl::Size2D(1U, 1U),
+                      act_info);
+
+  cache->function = function;
+}
+
+void CachedLayer::CreateMaxPoolLayer(CacheItems* cache, const api::JSONOp& params) {
+  auto padding = dmlc::get<std::vector<int>>(params.attrs.at("padding"));
+  auto strides = dmlc::get<std::vector<int>>(params.attrs.at("strides"));
+  auto pool_size = dmlc::get<std::vector<int>>(params.attrs.at("pool_size"));
+  auto pooling_type = dmlc::get<std::string>(params.attrs.at("pooling_type"));
+
+  acl::PoolingType pool_type;
+  if (pooling_type == "max") {
+    pool_type = acl::PoolingType::MAX;
+  } else {
+    LOG(FATAL) << "Pooling type not supported";
+  }
+
+  acl::PadStrideInfo pad_stride_info =
+      acl::PadStrideInfo(strides[0], strides[1], padding[0], padding[1], padding[2], padding[3],
+                         acl::DimensionRoundingType::FLOOR);
+  acl::PoolingLayerInfo pool_info = acl::PoolingLayerInfo(
+      pool_type, acl::Size2D(pool_size[0], pool_size[1]), acl::DataLayout::NHWC, pad_stride_info);
+
+  auto function = std::make_shared<acl::NEPoolingLayer>();
+  function->configure(&cache->inputs[0], &cache->outputs[0], pool_info);
+
+  cache->function = function;
+}
+
+void CachedLayer::CreateReshapeLayer(CacheItems* cache, const api::JSONOp& params) {
+  auto function = std::make_shared<acl::NEReshapeLayer>();
+  function->configure(&cache->inputs[0], &cache->outputs[0]);
+
+  cache->function = function;
+}
+
+}  // namespace acl
+}  // namespace contrib
+}  // namespace runtime
+}  // namespace tvm
diff --git a/src/runtime/contrib/acl/acl_kernel.h b/src/runtime/contrib/acl/acl_kernel.h
new file mode 100644
index 0000000000000..8ab8eaf229109
--- /dev/null
+++ b/src/runtime/contrib/acl/acl_kernel.h
@@ -0,0 +1,129 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file src/runtime/contrib/acl/acl_kernel.h
+ * \brief Use ACL library kernels, we create an interface to these.
+ */
+
+#ifndef TVM_RUNTIME_CONTRIB_ACL_ACL_KERNEL_H_
+#define TVM_RUNTIME_CONTRIB_ACL_ACL_KERNEL_H_
+
+#include <arm_compute/runtime/IFunction.h>
+#include <arm_compute/runtime/MemoryManagerOnDemand.h>
+#include <arm_compute/runtime/Tensor.h>
+#include <dmlc/filesystem.h>
+#include <dmlc/json.h>
+#include <dmlc/logging.h>
+#include <dmlc/memory_io.h>
+
+#include <memory>
+#include <utility>
+#include <vector>
+
+#include "../../../relay/backend/contrib/acl/acl_api.h"
+#include "acl_allocator.h"
+#include "acl_utils.h"
+
+namespace tvm {
+namespace runtime {
+namespace contrib {
+namespace acl {
+
+namespace api = relay::contrib::acl;
+namespace acl = arm_compute;
+
+/*!
+ * \brief ACL objects we cache in order to avoid needing to construct
+ * a new layer each time.
+ */
+struct CacheItems {
+  std::shared_ptr<arm_compute::IFunction> function;
+  std::vector<arm_compute::Tensor> inputs;
+  std::vector<arm_compute::Tensor> const_inputs;
+  std::vector<arm_compute::Tensor> outputs;
+};
+
+/*!
+ * \brief A cached ACL layer containing a single ACL function.
+ */
+class CachedLayer {
+ public:
+  /*!
+   * \brief Create an ACL layer from a JSON representation. Also prepare
+   * the layer for execution - this will perform actions such as pre-
+   * transposing of weights.
+   *
+   * \note The naming suggests a subgraph directly maps to a layer.
+   * In general this is not true, but since ACL only expects subgraphs
+   * consisting of a single op it is.
+   *
+   * \param function A JSON representation of a subgraph.
+   * \param constants The constants used in the subgraph.
+   * \param allocator ACL can request memory from TVM.
+   */
+  CachedLayer(const api::JSONSubGraph& function, const std::vector<NDArray>& constants,
+              ACLAllocator* allocator, const std::shared_ptr<acl::MemoryManagerOnDemand>& mm);
+
+  /*!
+   * \brief Run inference on the ACL layer.
+   *
+   * \param inputs The inputs for the layer.
+   * \param outputs The outputs for the layer.
+   * \return True if success, False if not successful.
+   */
+  bool Inference(const std::vector<DLTensor*>& inputs, const std::vector<DLTensor*>& outputs);
+
+  /*!
+   * \brief Get the number of inputs the layer takes.
+   *
+   * \return Number of inputs.
+   */
+  size_t GetNumInputs() const;
+
+  /*!
+   * \brief Check if the layer requires working memory to be allocated.
+   *
+   * \return True if it does, False if not.
+   */
+  bool IsMemoryManaged() const { return this->is_mm_; }
+
+ private:
+  /*! \brief Constant tensors used in the layer. */
+  std::vector<NDArray> constants_;
+  /*! \brief Cache ACL function and tensors for execution. */
+  CacheItems function_;
+  /*! \brief ACL Allocator to request auxiliary memory from TVM. */
+  ACLAllocator* allocator_;
+  /*! \brief Check if the function requires working memory to be allocated. */
+  bool is_mm_ = false;
+
+  /*! \brief Create individual ACL layer. */
+  static void CreateConvolution2DLayer(CacheItems* cache, const api::JSONOp& params,
+                                       const std::shared_ptr<acl::MemoryManagerOnDemand>& mm);
+  static void CreateMaxPoolLayer(CacheItems* cache, const api::JSONOp& params);
+  static void CreateReshapeLayer(CacheItems* cache, const api::JSONOp& params);
+};
+
+}  // namespace acl
+}  // namespace contrib
+}  // namespace runtime
+}  // namespace tvm
+
+#endif  // TVM_RUNTIME_CONTRIB_ACL_ACL_KERNEL_H_
diff --git a/src/runtime/contrib/acl/acl_runtime.cc b/src/runtime/contrib/acl/acl_runtime.cc
new file mode 100644
index 0000000000000..1c372fe2c7e01
--- /dev/null
+++ b/src/runtime/contrib/acl/acl_runtime.cc
@@ -0,0 +1,233 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <dmlc/json.h>
+#include <dmlc/logging.h>
+#include <dmlc/memory_io.h>
+#include <tvm/runtime/module.h>
+#include <tvm/runtime/registry.h>
+
+#include <utility>
+
+#include "../../../relay/backend/contrib/acl/acl_api.h"
+#include "../../file_util.h"
+
+#ifdef TVM_GRAPH_RUNTIME_ACL
+#include <arm_compute/runtime/MemoryManagerOnDemand.h>
+#include <arm_compute/runtime/OffsetLifetimeManager.h>
+#include <arm_compute/runtime/PoolManager.h>
+
+#include "acl_allocator.h"
+#include "acl_kernel.h"
+#endif
+
+namespace tvm {
+namespace runtime {
+
+namespace api = relay::contrib::acl;
+
+class ACLModule : public ModuleNode {
+ public:
+  /*!
+   * \brief The ACL runtime module. Deserialize the provided functions
+   * on creation and store in the layer cache.
+   *
+   * \param serialized_graphs A vector of (external symbol, serialized JSON subgraph) pairs.
+   */
+  explicit ACLModule(const std::vector<std::pair<std::string, std::string>>& serialized_functions) {
+#ifdef TVM_GRAPH_RUNTIME_ACL
+    auto lifetime_mgr = std::make_shared<arm_compute::OffsetLifetimeManager>();
+    auto pool_mgr = std::make_shared<arm_compute::PoolManager>();
+    auto mm = std::make_shared<arm_compute::MemoryManagerOnDemand>(lifetime_mgr, pool_mgr);
+    int num_pools = 0;
+#endif
+
+    for (const auto& it : serialized_functions) {
+      std::string serialized_function = it.second;
+      auto ds = api::DeserializeSubgraph(&serialized_function);
+      this->deserialized_functions_.emplace_back(it.first, ds);
+
+#ifdef TVM_GRAPH_RUNTIME_ACL
+      this->subgraph_cache_[it.first] =
+          std::make_shared<contrib::acl::CachedLayer>(ds.first, ds.second, &this->allocator_, mm);
+      if (this->subgraph_cache_[it.first]->IsMemoryManaged()) num_pools++;
+#endif
+    }
+#ifdef TVM_GRAPH_RUNTIME_ACL
+    // Allocate working memory for layers.
+    if (num_pools > 0) mm->populate(this->allocator_, num_pools);
+#endif
+  }
+
+  /*!
+   * \brief Get a PackedFunc from the ACL module.
+   *
+   * \param name The name of the function.
+   * \param sptr_to_self The ObjectPtr that points to this module node.
+   * \return The function pointer when it is found, otherwise, PackedFunc(nullptr).
+   */
+  PackedFunc GetFunction(const std::string& name, const ObjectPtr<Object>& sptr_to_self) final {
+#ifdef TVM_GRAPH_RUNTIME_ACL
+    if (this->subgraph_cache_.find(name) != this->subgraph_cache_.end()) {
+      return PackedFunc([sptr_to_self, this, name](TVMArgs args, TVMRetValue* rv) {
+        *rv = tvm::runtime::ACLModule::Inference(args, this->subgraph_cache_[name].get());
+      });
+    }
+#endif
+    return PackedFunc(nullptr);
+  }
+
+  /*!
+   * \brief The type key of the module.
+   *
+   * \return module type key.
+   */
+  const char* type_key() const override { return "acl"; }
+
+  /*!
+   * \brief Unpack inputs and outputs and run inference on a given layer.
+   *
+   * \param args Access inputs and outputs.
+   * \param function The layer to execute inference on.
+   * \return Status of inference.
+   */
+#ifdef TVM_GRAPH_RUNTIME_ACL
+  static bool Inference(tvm::runtime::TVMArgs args, contrib::acl::CachedLayer* function) {
+    // Unpack parameters
+    int argc = 0;
+    std::vector<DLTensor*> inputs;
+    for (size_t i = 0; i < function->GetNumInputs(); i++) {
+      inputs.push_back(args[argc++]);
+    }
+    std::vector<DLTensor*> outputs;
+    for (; argc < args.size(); argc++) {
+      outputs.push_back(args[argc]);
+    }
+    return function->Inference(inputs, outputs);
+  }
+#endif
+
+  /*!
+   * \brief Save a compiled network to a binary stream, which can then be
+   * serialized to disk.
+   *
+   * \param stream The stream to save the binary.
+   */
+  void SaveToBinary(dmlc::Stream* stream) final {
+    stream->Write(this->deserialized_functions_.size());
+    for (const auto& it : this->deserialized_functions_) {
+      stream->Write(it.first);
+      std::pair<api::JSONSubGraph, std::vector<NDArray>> subgraph_pair = it.second;
+      std::string serialized_function =
+          api::SerializeSubgraph(subgraph_pair.first, subgraph_pair.second);
+      stream->Write(serialized_function);
+    }
+  }
+
+  /*!
+   * \brief Load a compiled network from stream.
+   *
+   * \param strm The binary stream to load.
+   * \return The created ACL module.
+   */
+  static Module LoadFromBinary(void* strm) {
+    auto stream = static_cast<dmlc::Stream*>(strm);
+    size_t func_count;
+    stream->Read(&func_count);
+    std::vector<std::pair<std::string, std::string>> serialized_functions;
+    for (unsigned int i = 0; i < func_count; i++) {
+      std::string ext_symbol;
+      std::string serialized_function;
+      stream->Read(&ext_symbol);
+      stream->Read(&serialized_function);
+      serialized_functions.emplace_back(std::make_pair(ext_symbol, serialized_function));
+    }
+    auto n = make_object<ACLModule>(serialized_functions);
+    return Module(n);
+  }
+
+  /*!
+   * \brief Save a module to a specified path.
+   *
+   * \param path Where to save the serialized module.
+   * \param format The format of the file.
+   */
+  void SaveToFile(const std::string& path, const std::string& format) override {
+    std::string data;
+    dmlc::MemoryStringStream writer(&data);
+    dmlc::SeekStream* strm = &writer;
+    SaveToBinary(strm);
+    SaveBinaryToFile(path, data);
+  }
+
+  /*!
+   * \brief Create a module from a file.
+   *
+   * \param path The path of the file containing the serialized module.
+   * \return The created ACL module.
+   */
+  static Module LoadFromFile(const std::string& path) {
+    std::string data;
+    LoadBinaryFromFile(path, &data);
+    dmlc::MemoryStringStream reader(&data);
+    return LoadFromBinary(&reader);
+  }
+
+  /*!
+   * \brief Get the JSON generated by codegen.
+   *
+   * \param format the format to return (only JSON for the time being)
+   * \return A string of JSON.
+   */
+  std::string GetSource(const std::string& format) override {
+    std::ostringstream os;
+    dmlc::JSONWriter writer(&os);
+    writer.BeginObject();
+    for (const auto& it : deserialized_functions_) {
+      writer.WriteObjectKeyValue(it.first, it.second.first);
+    }
+    writer.EndObject();
+    return os.str();
+  }
+
+ private:
+  /* \brief A vector of (external symbol, serialized JSON subgraph) pairs. */
+  std::vector<std::pair<std::string, std::pair<api::JSONSubGraph, std::vector<NDArray>>>>
+      deserialized_functions_;
+
+#ifdef TVM_GRAPH_RUNTIME_ACL
+  /* \brief A map between ext_symbols (function names) and an ACL subgraph.
+   * \note Currently only a single op per subgraph is supported. Hence mapping to
+   * cached layer.*/
+  std::map<std::string, std::shared_ptr<contrib::acl::CachedLayer>> subgraph_cache_;
+  /*! \brief Allow ACL functions to request auxiliary memory from TVM. */
+  contrib::acl::ACLAllocator allocator_;
+#endif
+};
+
+TVM_REGISTER_GLOBAL("runtime.module.loadfile_acl").set_body([](TVMArgs args, TVMRetValue* rv) {
+  *rv = ACLModule::LoadFromFile(args[0]);
+});
+
+TVM_REGISTER_GLOBAL("runtime.module.loadbinary_acl").set_body([](TVMArgs args, TVMRetValue* rv) {
+  *rv = ACLModule::LoadFromBinary(args[0]);
+});
+
+}  // namespace runtime
+}  // namespace tvm
diff --git a/src/runtime/contrib/acl/acl_utils.cc b/src/runtime/contrib/acl/acl_utils.cc
new file mode 100644
index 0000000000000..6e29cc384d404
--- /dev/null
+++ b/src/runtime/contrib/acl/acl_utils.cc
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file src/runtime/contrib/acl/acl_utils.cc
+ * \brief Utils and common functions for the interface.
+ */
+
+#include "acl_utils.h"
+
+#include <tvm/runtime/data_type.h>
+#include <tvm/runtime/registry.h>
+
+namespace tvm {
+namespace runtime {
+namespace contrib {
+namespace acl {
+
+void CheckACLError(arm_compute::Status status) {
+  CHECK(status.error_code() == arm_compute::ErrorCode::OK) << "ACL: " << status.error_description();
+}
+
+acl::Tensor MakeTensor(const api::JSONTensor& tensor_rep, void* data) {
+  acl::Tensor tensor;
+  acl::TensorInfo info = MakeTensorInfo(tensor_rep);
+  tensor.allocator()->init(info);
+  if (data != nullptr) {
+    CheckACLError(tensor.allocator()->import_memory(data));
+  }
+  return tensor;
+}
+
+acl::TensorInfo MakeTensorInfo(const api::JSONTensor& tensor_rep) {
+  return acl::TensorInfo(MakeTensorShape(tensor_rep.shape), 1, acl::DataType::F32,
+                         acl::DataLayout::NHWC);
+}
+
+arm_compute::TensorShape MakeTensorShape(const std::vector<int>& shape) {
+  arm_compute::TensorShape acl_shape;
+  for (unsigned int i = shape.size(); i > 0; --i) {
+    acl_shape.set(shape.size() - i, shape[i - 1]);
+  }
+  return acl_shape;
+}
+
+}  // namespace acl
+}  // namespace contrib
+}  // namespace runtime
+}  // namespace tvm
diff --git a/src/runtime/contrib/acl/acl_utils.h b/src/runtime/contrib/acl/acl_utils.h
new file mode 100644
index 0000000000000..111121d48308e
--- /dev/null
+++ b/src/runtime/contrib/acl/acl_utils.h
@@ -0,0 +1,87 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file src/runtime/contrib/acl/acl_utils.h
+ * \brief Utils and common functions for the interface.
+ */
+
+#ifndef TVM_RUNTIME_CONTRIB_ACL_ACL_UTILS_H_
+#define TVM_RUNTIME_CONTRIB_ACL_ACL_UTILS_H_
+
+#include <arm_compute/core/Types.h>
+#include <arm_compute/runtime/Tensor.h>
+
+#include <vector>
+
+#include "../../../relay/backend/contrib/acl/acl_api.h"
+
+namespace tvm {
+namespace runtime {
+namespace contrib {
+namespace acl {
+
+namespace api = relay::contrib::acl;
+namespace acl = arm_compute;
+
+/*!
+ * \brief Check if there are any errors from acl and forward them to TVM.
+ *
+ * \param status status of called function.
+ *
+ * Status values:
+ * - 0 => OK
+ * - 1 => RUNTIME_ERROR
+ * - 2 => UNSUPPORTED_EXTENSION_USE
+ */
+void CheckACLError(acl::Status status);
+
+/*!
+ * \brief Make an acl tensor from JSON tensor representation.
+ *
+ * \param tensor_rep A JSON tensor representation.
+ * \param data (optional) Initialize the tensor with memory.
+ * \return arm_compute::Tensor.
+ */
+acl::Tensor MakeTensor(const api::JSONTensor& tensor_rep, void* data = nullptr);
+
+/*!
+ * \brief Make an acl tensor info object from JSON tensor
+ * representation.
+ *
+ * \param tensor_rep A JSON tensor representation.
+ * \return arm_compute::TensorInfo.
+ */
+acl::TensorInfo MakeTensorInfo(const api::JSONTensor& tensor_rep);
+
+/*!
+ * \brief Convert vector object to acl TensorShape.
+ * \note This requires reversing the given vector.
+ *
+ * \param shape The shape of the tensor as a vector.
+ * \return acl TensorShape.
+ */
+acl::TensorShape MakeTensorShape(const std::vector<int>& shape);
+
+}  // namespace acl
+}  // namespace contrib
+}  // namespace runtime
+}  // namespace tvm
+
+#endif  // TVM_RUNTIME_CONTRIB_ACL_ACL_UTILS_H_
diff --git a/tests/python/contrib/test_acl/__init__.py b/tests/python/contrib/test_acl/__init__.py
new file mode 100644
index 0000000000000..a8671172febde
--- /dev/null
+++ b/tests/python/contrib/test_acl/__init__.py
@@ -0,0 +1,17 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""Infrastructure and tests for ACL"""
diff --git a/tests/python/contrib/test_acl/infrastructure.py b/tests/python/contrib/test_acl/infrastructure.py
new file mode 100644
index 0000000000000..04c5d2784c28a
--- /dev/null
+++ b/tests/python/contrib/test_acl/infrastructure.py
@@ -0,0 +1,162 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+from itertools import zip_longest, combinations
+import json
+
+import tvm
+from tvm import relay
+from tvm import rpc
+from tvm.contrib import graph_runtime
+from tvm.relay.op.contrib import acl
+from tvm.contrib import util
+
+
+class Device:
+    """Adjust the following settings to connect to and use a remote device for tests."""
+    use_remote = False
+    target = "llvm -target=aarch64-linux-gnu -mattr=+neon"
+    # Enable cross compilation when connecting a remote device from a non-arm platform.
+    cross_compile = None
+    # cross_compile = "aarch64-linux-gnu-g++"
+
+    def __init__(self):
+        """Keep remote device for lifetime of object."""
+        self.device = self._get_remote()
+
+    @classmethod
+    def _get_remote(cls):
+        """Get a remote (or local) device to use for testing."""
+        if cls.use_remote:
+            # Here you may adjust settings to run the ACL unit tests via a remote
+            # device using the RPC mechanism. Use this in the case you want to compile
+            # an ACL module on a different machine to what you run the module on i.e.
+            # x86 -> AArch64.
+            #
+            # Use the following to connect directly to a remote device:
+            # device = rpc.connect(
+            #     hostname="0.0.0.0",
+            #     port=9090)
+            #
+            # Or connect via a tracker:
+            # device = tvm.autotvm.measure.request_remote(
+            #     host="0.0.0.0",
+            #     port=9090,
+            #     device_key="device_key",
+            #     timeout=1000)
+            #
+            # return device
+            raise NotImplementedError(
+                "Please adjust these settings to connect to your remote device.")
+        else:
+            device = rpc.LocalSession()
+            return device
+
+
+def skip_runtime_test():
+    """Skip test if it requires the runtime and it's not present."""
+    # ACL codegen not present.
+    if not tvm.get_global_func("relay.ext.acl", True):
+        print("Skip because ACL codegen is not available.")
+        return True
+
+    # Remote device is in use or ACL runtime not present
+    if not Device.use_remote and not acl.is_acl_runtime_present():
+        print("Skip because runtime isn't present or a remote device isn't being used.")
+        return True
+
+
+def skip_codegen_test():
+    """Skip test if it requires the ACL codegen and it's not present."""
+    if not tvm.get_global_func("relay.ext.acl", True):
+        print("Skip because ACL codegen is not available.")
+        return True
+
+
+def build_module(mod, target, params=None, enable_acl=True):
+    """Build module with option to build for ACL."""
+    if isinstance(mod, tvm.relay.expr.Call):
+        mod = tvm.IRModule.from_expr(mod)
+    with tvm.transform.PassContext(opt_level=3):
+        if enable_acl:
+            mod = acl.partition_for_acl(mod, params)
+        return relay.build(mod, target=target, params=params)
+
+
+def build_and_run(mod, inputs, outputs, params, device, enable_acl=True, no_runs=1):
+    """Build and run the relay module."""
+    graph, lib, params = build_module(mod, device.target, params, enable_acl)
+    lib = update_lib(lib, device.device, device.cross_compile)
+    gen_module = graph_runtime.create(graph, lib, ctx=device.device.cpu(0))
+    gen_module.set_input(**inputs)
+    gen_module.set_input(**params)
+    for _ in range(no_runs):
+        gen_module.run()
+    out = [gen_module.get_output(i) for i in range(outputs)]
+    return out
+
+
+def update_lib(lib, device, cross_compile):
+    """Export the library to the remote/local device."""
+    lib_name = "mod.so"
+    temp = util.tempdir()
+    lib_path = temp.relpath(lib_name)
+    if cross_compile:
+        lib.export_library(lib_path, cc=cross_compile)
+    else:
+        lib.export_library(lib_path)
+    device.upload(lib_path)
+    lib = device.load_module(lib_name)
+    return lib
+
+
+def verify(answers, atol, rtol):
+    """Compare the array of answers. Each entry is a list of outputs."""
+    if len(answers) < 2:
+        raise RuntimeError(
+            f"No results to compare: expected at least two, found {len(answers)}")
+    for answer in zip_longest(*answers):
+        for outs in combinations(answer, 2):
+            tvm.testing.assert_allclose(
+               outs[0].asnumpy(), outs[1].asnumpy(), rtol=rtol, atol=atol)
+
+
+def extract_acl_modules(module):
+    """Get the ACL module(s) from llvm module."""
+    return list(filter(lambda mod: mod.type_key == "acl",
+                       module.imported_modules))
+
+
+def verify_codegen(module, known_good_codegen, num_acl_modules,
+                   target="llvm -target=aarch64-linux-gnu -mattr=+neon"):
+    """Check acl codegen against a known good output."""
+    _, module, _ = build_module(module, target)
+    acl_modules = extract_acl_modules(module)
+
+    assert len(acl_modules) == num_acl_modules, \
+        f"The number of ACL modules produced ({len(acl_modules)}) does not " \
+        f"match the expected value ({num_acl_modules})."
+
+    for mod in acl_modules:
+        source = mod.get_source()
+        source_json = json.loads(source)
+        func_name = list(source_json.keys())[0]
+        codegen = source_json[func_name]["node"]
+
+        assert codegen == known_good_codegen, \
+            f"The JSON produced by codegen does not match the expected result. \n" \
+            f"Actual={json.dumps(codegen, sort_keys=True, indent=2)} \n" \
+            f"Expected={json.dumps(known_good_codegen, sort_keys=True, indent=2)}"
diff --git a/tests/python/contrib/test_acl/test_conv2d.py b/tests/python/contrib/test_acl/test_conv2d.py
new file mode 100644
index 0000000000000..a2724315c4e8e
--- /dev/null
+++ b/tests/python/contrib/test_acl/test_conv2d.py
@@ -0,0 +1,202 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""ACL Integration conv2d tests."""
+
+import numpy as np
+
+import tvm
+from tvm import relay
+
+from .infrastructure import skip_runtime_test, skip_codegen_test, build_and_run, \
+    verify, verify_codegen
+from .infrastructure import Device
+
+
+def _get_model(shape, kernel_size, padding, strides,
+               dilation, groups, dtype, channels,
+               var_names, has_bias=False, has_activation=False, has_pad=False):
+    """Return a model and any parameters it may have"""
+    a = relay.var(next(var_names), shape=shape, dtype=dtype)
+    if has_pad:
+        p = ((0, 0), (padding[0], padding[0]), (padding[1], padding[1]), (0, 0))
+        a = relay.nn.pad(a, pad_width=p)
+        padding = (0, 0, 0, 0)
+    else:
+        if len(padding) == 2:
+            padding = (padding[0], padding[1], padding[0], padding[1])
+        shape = (shape[0], shape[1] + padding[0] * 2,
+                 shape[2] + padding[1] * 2, shape[3])
+    weight_shape = (kernel_size, kernel_size, shape[3] // groups, channels)
+    w = tvm.nd.array(np.random.uniform(-128, 127, weight_shape).astype(dtype))
+    weights = relay.const(w, dtype)
+    out = relay.nn.conv2d(
+        a,
+        weights,
+        kernel_size=(kernel_size, kernel_size),
+        data_layout="NHWC",
+        kernel_layout="HWIO",
+        dilation=(1, 1),
+        strides=strides,
+        padding=padding,
+        groups=groups,
+        channels=channels
+    )
+    params = {"w": w}
+    if has_bias:
+        b = tvm.nd.array(np.random.uniform(-128, 127, weight_shape[3]).astype(dtype))
+        biasc = relay.const(b, dtype)
+        out = relay.nn.bias_add(out, biasc, axis=3)
+        params["b"] = b
+    if has_activation:
+        out = relay.nn.relu(out)
+    return out, params
+
+
+def _get_expected_codegen(shape, kernel_size, padding, strides,
+                          dilation, groups, dtype, channels,
+                          has_bias=False, has_activation=False):
+    codegen = {
+        "name": "conv2d",
+        "inputs": [],
+        "outputs": [],
+        "attrs": {
+            "groups": ["Int", 1],
+            "num_inputs": ["Size_t", 2],
+            "num_outputs": ["Size_t", 1]
+        }
+    }
+
+    if len(padding) == 2:
+        padding = (padding[0], padding[1], padding[0], padding[1])
+    # Transpose padding to match ACL format
+    padding = (padding[1], padding[3], padding[0], padding[2])
+    weight_shape = (channels, kernel_size, kernel_size, shape[3] // groups)
+    output_height = ((shape[1] - kernel_size + padding[2] + padding[3]) / strides[0]) + 1
+    output_width = ((shape[2] - kernel_size + padding[0] + padding[1]) / strides[1]) + 1
+    output_shape = (1, int(output_height), int(output_width), channels)
+
+    codegen["attrs"]["padding"] = ["IntVector", list(padding)]
+    codegen["attrs"]["strides"] = ["IntVector", list(strides)]
+    if has_activation:
+        codegen["attrs"]["activation_type"] = ["String", "relu"]
+
+    inputs = [{"type": "var", "shape": list(shape)},
+              {"type": "const", "shape": list(weight_shape)}]
+    if has_bias:
+        inputs.append({"type": "const", "shape": [weight_shape[0]]})
+    outputs = [{"type": "var", "shape": list(output_shape)}]
+
+    codegen["inputs"] = inputs
+    codegen["outputs"] = outputs
+    codegen["attrs"]["num_inputs"] = ["Size_t", len(inputs)]
+    codegen["attrs"]["num_outputs"] = ["Size_t", len(outputs)]
+
+    return codegen
+
+
+def test_conv2d():
+    if skip_runtime_test():
+        return
+
+    device = Device()
+
+    shape = (1, 25, 25, 1)
+    dtype = "float32"
+
+    inputs = {
+        "a": tvm.nd.array(np.random.uniform(-128, 127, shape).astype(dtype)),
+    }
+
+    for kernel_size in [2, 3]:
+        outputs = []
+        func, params = _get_model(shape, kernel_size,
+                                  (0, 0), (1, 1), 1, 1,
+                                  dtype, 1, iter(inputs))
+        for acl in [False, True]:
+            outputs.append(build_and_run(func, inputs, 1,
+                                         params, device,
+                                         enable_acl=acl))
+        verify(outputs, atol=0.002, rtol=0.01)
+
+    for pad_ksize in [((1, 1), 3), ((2, 2), 5), ((2, 1), 3)]:
+        outputs = []
+        func, params = _get_model(shape, pad_ksize[1], pad_ksize[0],
+                                  (1, 1), 1, 1, dtype, 1, iter(inputs))
+        for acl in [False, True]:
+            outputs.append(build_and_run(func, inputs, 1,
+                                         params, device,
+                                         enable_acl=acl))
+        verify(outputs, atol=0.002, rtol=0.01)
+
+    for strides in [(1, 1), (2, 2)]:
+        outputs = []
+        func, params = _get_model(shape, 2, (0, 0), strides,
+                                  1, 1, dtype, 1, iter(inputs))
+        for acl in [False, True]:
+            outputs.append(build_and_run(func, inputs, 1,
+                                         params, device,
+                                         enable_acl=acl))
+        verify(outputs, atol=0.002, rtol=0.01)
+
+    # Test composite convolution: (has_pad, has_bias, has_activation).
+    for composite in [(False, True, False), (False, False, True), (False, True, True),
+                      (True, False, False)]:
+        outputs = []
+        func, params = _get_model(shape, 2, (1, 1), (1, 1),
+                                  1, 1, dtype, 1, iter(inputs),
+                                  has_pad=composite[0],
+                                  has_bias=composite[1],
+                                  has_activation=composite[2])
+        for acl in [False, True]:
+            outputs.append(build_and_run(func, inputs, 1,
+                                         params, device,
+                                         enable_acl=acl))
+        verify(outputs, atol=0.002, rtol=0.01)
+
+
+def test_codegen_conv2d():
+    if skip_codegen_test():
+        return
+
+    shape = (1, 25, 25, 1)
+    dtype = "float32"
+    inputs = {"a"}
+
+    for pad_ksize in [((1, 1), 3), ((2, 1), 3)]:
+        args = (shape, pad_ksize[1], pad_ksize[0], (1, 1), 1, 1, dtype, 1)
+        func, params = _get_model(*args, var_names=iter(inputs))
+        exp_codegen = _get_expected_codegen(*args)
+        verify_codegen(func, exp_codegen, 1)
+
+    # Test composite convolution: (has_pad, has_bias, has_activation).
+    for composite in [(False, True, False), (False, False, True), (False, True, True),
+                      (True, False, False)]:
+        args = (shape, 2, (1, 1), (1, 1), 1, 1, dtype, 1)
+        func, params = _get_model(*args, var_names=iter(inputs),
+                                  has_pad=composite[0],
+                                  has_bias=composite[1],
+                                  has_activation=composite[2])
+        exp_codegen = _get_expected_codegen(*args,
+                                            has_bias=composite[1],
+                                            has_activation=composite[2],
+                                            )
+        verify_codegen(func, exp_codegen, 1)
+
+
+if __name__ == "__main__":
+    test_conv2d()
+    test_codegen_conv2d()
diff --git a/tests/python/contrib/test_acl/test_network.py b/tests/python/contrib/test_acl/test_network.py
new file mode 100644
index 0000000000000..e5afe905228f1
--- /dev/null
+++ b/tests/python/contrib/test_acl/test_network.py
@@ -0,0 +1,76 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""ACL network tests."""
+
+import numpy as np
+
+from tvm import relay
+
+from .infrastructure import skip_runtime_test, build_and_run, verify
+from .infrastructure import Device
+
+
+def _build_and_run_keras_network(mod, params, inputs, device):
+    """Helper function to build and run a network from the Keras frontend."""
+    data = {}
+    for name, shape in inputs.items():
+        data[name] = np.random.uniform(-128, 127, shape).astype("float32")
+
+    outputs = []
+    for acl in [False, True]:
+        outputs.append(build_and_run(mod, data, 1, params,
+                                     device, enable_acl=acl))
+    verify(outputs, atol=0.002, rtol=0.01)
+
+
+def test_vgg16():
+    if skip_runtime_test():
+        return
+
+    device = Device()
+
+    def get_model():
+        from keras.applications import VGG16
+        vgg16 = VGG16(include_top=True, weights='imagenet',
+                      input_shape=(224, 224, 3), classes=1000)
+        inputs = {vgg16.input_names[0]: (1, 224, 224, 3)}
+        mod, params = relay.frontend.from_keras(vgg16, inputs, layout="NHWC")
+        return mod, params, inputs
+
+    _build_and_run_keras_network(*get_model(), device=device)
+
+
+def test_mobilenet():
+    if skip_runtime_test():
+        return
+
+    device = Device()
+
+    def get_model():
+        from keras.applications import MobileNet
+        mobilenet = MobileNet(include_top=True, weights='imagenet',
+                              input_shape=(224, 224, 3), classes=1000)
+        inputs = {mobilenet.input_names[0]: (1, 224, 224, 3)}
+        mod, params = relay.frontend.from_keras(mobilenet, inputs, layout="NHWC")
+        return mod, params, inputs
+
+    _build_and_run_keras_network(*get_model(), device=device)
+
+
+if __name__ == "__main__":
+    test_vgg16()
+    test_mobilenet()
diff --git a/tests/python/contrib/test_acl/test_pooling.py b/tests/python/contrib/test_acl/test_pooling.py
new file mode 100644
index 0000000000000..8fb1e93d6ac07
--- /dev/null
+++ b/tests/python/contrib/test_acl/test_pooling.py
@@ -0,0 +1,121 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""ACL Integration pooling tests."""
+
+import numpy as np
+
+import tvm
+from tvm import relay
+
+from .infrastructure import skip_runtime_test, skip_codegen_test, build_and_run, \
+    verify, verify_codegen
+from .infrastructure import Device
+
+
+def _get_model(shape, typef, sizes, strides, padding,
+               ceil_mode, var_names):
+    """Return a model and any parameters it may have."""
+    var = relay.var(next(var_names), shape=shape, dtype="float32")
+    pool = typef(var, pool_size=sizes, strides=strides, padding=padding,
+                 ceil_mode=ceil_mode, layout="NHWC")
+    return pool
+
+
+def _get_expected_codegen(shape, typef, sizes, strides, padding,
+                          ceil_mode):
+    codegen = {
+        "name": "max_pool",
+        "inputs": [],
+        "outputs": [],
+        "attrs": {
+            "pooling_type": ["String", "max"]
+        }
+    }
+
+    if len(padding) == 2:
+        padding = (padding[1], padding[1], padding[0], padding[0])
+    # Transpose padding to match ACL format
+    padding = (padding[1], padding[3], padding[0], padding[2])
+    output_height = ((shape[1] - sizes[0] + padding[2] + padding[3]) / strides[0]) + 1
+    output_width = ((shape[2] - sizes[1] + padding[0] + padding[1]) / strides[1]) + 1
+    output_shape = (1, int(output_height), int(output_width), shape[3])
+
+    if typef == relay.nn.max_pool2d:
+        pooling_type = "max"
+    else:
+        raise NotImplementedError(f"No conversion from {typef} to pooling_type string.")
+
+    codegen["attrs"]["padding"] = ["IntVector", list(padding)]
+    codegen["attrs"]["strides"] = ["IntVector", list(strides)]
+    codegen["attrs"]["pool_size"] = ["IntVector", list(sizes)]
+    codegen["attrs"]["pooling_type"] = ["String", pooling_type]
+
+    inputs = [{"type": "var", "shape": list(shape)}]
+    outputs = [{"type": "var", "shape": list(output_shape)}]
+
+    codegen["inputs"] = inputs
+    codegen["outputs"] = outputs
+    codegen["attrs"]["num_inputs"] = ["Size_t", len(inputs)]
+    codegen["attrs"]["num_outputs"] = ["Size_t", len(outputs)]
+
+    return codegen
+
+
+def test_pooling():
+    if skip_runtime_test():
+        return
+
+    device = Device()
+
+    for size in [(2, 2), (3, 3)]:
+        for stride in [(2, 2)]:
+            shape = (1, size[0] + stride[0] * 5,
+                     size[1] + stride[1] * 5, 16)
+
+            inputs = {
+                "a": tvm.nd.array(np.random.uniform(-1, 1, shape).astype("float32")),
+            }
+
+            outputs = []
+            func = _get_model(shape, relay.nn.max_pool2d, size,
+                              stride, (0, 0), True, iter(inputs))
+            for acl in [False, True]:
+                outputs.append(build_and_run(func, inputs, 1, None, device,
+                                             enable_acl=acl))
+            verify(outputs, atol=0.001, rtol=0.001)
+
+
+def test_codegen_pooling():
+    if skip_codegen_test():
+        return
+
+    inputs = {"a"}
+
+    for size in [(2, 2), (3, 3)]:
+        for stride in [(2, 2)]:
+            shape = (1, size[0] + stride[0] * 5,
+                     size[1] + stride[1] * 5, 16)
+            args = (shape, relay.nn.max_pool2d, size,
+                    stride, (0, 0), True)
+            func = _get_model(*args, iter(inputs))
+            exp_codegen = _get_expected_codegen(*args)
+            verify_codegen(func, exp_codegen, 1)
+
+
+if __name__ == "__main__":
+    test_pooling()
+    test_codegen_pooling()
diff --git a/tests/python/contrib/test_acl/test_reshape.py b/tests/python/contrib/test_acl/test_reshape.py
new file mode 100644
index 0000000000000..81192cdf992c8
--- /dev/null
+++ b/tests/python/contrib/test_acl/test_reshape.py
@@ -0,0 +1,91 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""ACL Integration reshape tests."""
+
+import numpy as np
+
+import tvm
+from tvm import relay
+
+from .infrastructure import skip_runtime_test, skip_codegen_test, build_and_run, \
+    verify, verify_codegen
+from .infrastructure import Device
+
+
+def _get_model(input_shape, output_shape, var_names):
+    """Return a model and any parameters it may have."""
+    a = relay.var(next(var_names), shape=input_shape, dtype="float32")
+    reshape = relay.reshape(a, output_shape)
+    return reshape
+
+
+def _get_expected_codegen(input_shape, output_shape):
+    codegen = {
+        "name": "reshape",
+        "inputs": [],
+        "outputs": [],
+        "attrs": {}
+    }
+
+    inputs = [{"type": "var", "shape": list(input_shape)}]
+    outputs = [{"type": "var", "shape": list(output_shape)}]
+
+    codegen["inputs"] = inputs
+    codegen["outputs"] = outputs
+    codegen["attrs"]["num_inputs"] = ["Size_t", len(inputs)]
+    codegen["attrs"]["num_outputs"] = ["Size_t", len(outputs)]
+
+    return codegen
+
+
+def test_reshape():
+    if skip_runtime_test():
+        return
+
+    device = Device()
+
+    inputs = {
+        "a": tvm.nd.array(
+            np.random.uniform(-128, 127, (1, 1, 1, 1000)).astype("float32"))
+    }
+
+    for shape in [(1, 1000), (10, 10, 10)]:
+        outputs = []
+        func = _get_model(inputs["a"].shape, shape, iter(inputs))
+        for acl in [False, True]:
+            outputs.append(build_and_run(func, inputs, 1, None, device,
+                                         enable_acl=acl))
+        verify(outputs, atol=1e-7, rtol=1e-7)
+
+
+def test_codegen_reshape():
+    if skip_codegen_test():
+        return
+
+    shape = (1, 1, 1, 1000)
+    inputs = {"a"}
+
+    for new_shape in [(1, 1000), (10, 10, 10)]:
+        args = (shape, new_shape)
+        func = _get_model(*args, iter(inputs))
+        exp_codegen = _get_expected_codegen(*args)
+        verify_codegen(func, exp_codegen, 1)
+
+
+if __name__ == "__main__":
+    test_reshape()
+    test_codegen_reshape()
diff --git a/tests/python/contrib/test_acl/test_runtime.py b/tests/python/contrib/test_acl/test_runtime.py
new file mode 100644
index 0000000000000..7b332730e9538
--- /dev/null
+++ b/tests/python/contrib/test_acl/test_runtime.py
@@ -0,0 +1,97 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""ACL runtime tests."""
+
+import numpy as np
+
+import tvm
+from tvm import relay
+
+from .infrastructure import skip_runtime_test, build_and_run, verify
+from .infrastructure import Device
+
+
+def test_multiple_ops():
+    """
+    Test multiple operators destined for acl.
+    ACL will expect these ops as in 2 separate functions.
+    """
+    if skip_runtime_test():
+        return
+
+    device = Device()
+
+    def get_model(input_shape, var_names):
+        """Return a model and any parameters it may have."""
+        a = relay.var(next(var_names), shape=input_shape, dtype="float32")
+        out = relay.reshape(a, (1, 1, 1000))
+        out = relay.reshape(out, (1, 1000))
+        return out
+
+    inputs = {
+        "a": tvm.nd.array(np.random.uniform(0, 1, (1, 1, 1, 1000)).astype("float32"))
+    }
+
+    outputs = []
+    for acl in [False, True]:
+        func = get_model(inputs["a"].shape, iter(inputs))
+        outputs.append(build_and_run(func, inputs, 1, None, device,
+                                     enable_acl=acl))
+    verify(outputs, atol=0.002, rtol=0.01)
+
+
+def test_multiple_runs():
+    """
+    Test that multiple runs of an operator work.
+    Note: the result isn't checked.
+    """
+    if skip_runtime_test():
+        return
+
+    device = Device()
+
+    def get_model():
+        a = relay.var("a", shape=(1, 28, 28, 512), dtype="float32")
+        w = tvm.nd.array(np.ones((256, 1, 1, 512), dtype="float32"))
+        weights = relay.const(w, "float32")
+        conv = relay.nn.conv2d(
+            a,
+            weights,
+            kernel_size=(1, 1),
+            data_layout="NHWC",
+            kernel_layout="OHWI",
+            strides=(1, 1),
+            padding=(0, 0),
+            dilation=(1, 1)
+        )
+        params = {"w": w}
+        return conv, params
+
+    inputs = {
+        "a": tvm.nd.array(np.ones((1, 28, 28, 512), dtype="float32")),
+    }
+
+    func, params = get_model()
+    build_and_run(func, inputs, 1,
+                  params, device,
+                  enable_acl=True,
+                  no_runs=3)
+
+
+if __name__ == "__main__":
+    test_multiple_ops()
+    test_multiple_runs()