diff --git a/cmake/modules/contrib/ArmComputeLib.cmake b/cmake/modules/contrib/ArmComputeLib.cmake index ff9c8f7470131..0a75f607acf3f 100644 --- a/cmake/modules/contrib/ArmComputeLib.cmake +++ b/cmake/modules/contrib/ArmComputeLib.cmake @@ -36,6 +36,8 @@ if(USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME) file(GLOB ACL_CONTRIB_SRC src/runtime/contrib/arm_compute_lib/*) + # Cmake needs to find arm_compute, include and support directories + # in the path specified by ACL_PATH. set(ACL_INCLUDE_DIRS ${ACL_PATH}/include ${ACL_PATH}) include_directories(${ACL_INCLUDE_DIRS}) diff --git a/docker/install/ubuntu_install_arm_compute_lib.sh b/docker/install/ubuntu_install_arm_compute_lib.sh index 73e92790fe24a..9616210261ccd 100644 --- a/docker/install/ubuntu_install_arm_compute_lib.sh +++ b/docker/install/ubuntu_install_arm_compute_lib.sh @@ -24,7 +24,7 @@ repo_url="https://github.com/ARM-software/ComputeLibrary.git" repo_dir="acl" install_path="/opt/$repo_dir" architecture_type=$(uname -i) -target_arch="arm64-v8a" # arm64-v8a/armv7a +target_arch="arm64-v8a" # arm64-v8a / arm64-v8.2-a / armv7a build_type="native" tmpdir=$(mktemp -d) @@ -41,9 +41,16 @@ apt-get install -y --no-install-recommends \ git \ scons \ bsdmainutils \ - build-essential \ + build-essential + +# Install cross-compiler when not building natively. +# Depending on the architecture selected to compile for, +# you may need to install an alternative cross-compiler. +if [ "$architecture_type" != "aarch64" ]; then + apt-get install -y --no-install-recommends \ g++-aarch64-linux-gnu \ gcc-aarch64-linux-gnu +fi cd "$tmpdir" diff --git a/docs/deploy/arm_compute_lib.rst b/docs/deploy/arm_compute_lib.rst index 6dc8df082a282..eaffc0a565d8e 100644 --- a/docs/deploy/arm_compute_lib.rst +++ b/docs/deploy/arm_compute_lib.rst @@ -15,8 +15,9 @@ specific language governing permissions and limitations under the License. -Relay Arm|reg| Compute Library Integration -========================================== +Relay Arm :sup:`®` Compute Library Integration +============================================== +**Author**: `Luke Hutton `_ Introduction ------------ @@ -26,6 +27,35 @@ and GPU's. Currently the integration offloads operators to ACL to use hand-craft routines in the library. By offloading select operators from a relay graph to ACL we can achieve a performance boost on such devices. +Installing Arm Compute Library +------------------------------ + +Before installing Arm Compute Library, it is important to know what architecture to build for. One way +to determine this is to use `lscpu` and look for the "Model name" of the CPU. You can then use this to +determine the architecture by looking online. + +We recommend two different ways to build and install ACL: + +* Use the script located at `docker/install/ubuntu_install_arm_compute_library.sh`. You can use this + script for building ACL from source natively or for cross-compiling the library on an x86 machine. + You may need to change the architecture of the device you wish to compile for by altering the + `target_arch` variable. Binaries will be built from source and installed to the location denoted by + `install_path`. +* Alternatively, you can download and use pre-built binaries from: + https://github.com/ARM-software/ComputeLibrary/releases. When using this package, you will need to + select the binaries for the architecture you require and make sure they are visible to cmake. This + can be done like so: + + .. code:: bash + + cd /lib + mv ./linux--neon/* . + + +In both cases you will need to set USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME to the path where the ACL package +is located. Cmake will look in /path-to-acl/ along with /path-to-acl/lib and /path-to-acl/build for the +required binaries. See the section below for more information on how to use these configuration options. + Building with ACL support ------------------------- @@ -42,6 +72,20 @@ to compile an ACL module on an x86 machine and then run the module on a remote A need to use USE_ARM_COMPUTE_LIB=ON on the x86 machine and USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME=ON on the remote AArch64 device. +By default both options are set to OFF. Using USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME=ON will mean that ACL +binaries are searched for by cmake in the default locations +(see https://cmake.org/cmake/help/v3.4/command/find_library.html). In addition to this, +/path-to-tvm-project/acl/ will also be searched. It is likely that you will need to set your own path to +locate ACL. This can be done by specifying a path in the place of ON. + +These flags should be set in your config.cmake file. For example: + +.. code:: cmake + + set(USE_ARM_COMPUTE_LIB ON) + set(USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME /path/to/acl) + + Usage ----- @@ -74,7 +118,7 @@ max_pool2d operator). Annotate and partition the graph for ACL. -..code:: python +.. code:: python from tvm.relay.op.contrib.arm_compute_lib import partition_for_arm_compute_lib module = partition_for_arm_compute_lib(module) @@ -100,7 +144,7 @@ Export the module. Run Inference. This must be on an Arm device. If compiling on x86 device and running on AArch64, consider using the RPC mechanism. Tutorials for using the RPC mechanism: -https://tvm.apache.org/docs/tutorials/cross_compilation_and_rpc.html#sphx-glr-tutorials-cross-compilation-and-rpc-py +https://tvm.apache.org/docs/tutorials/get_started/cross_compilation_and_rpc.html .. code:: python @@ -155,12 +199,12 @@ what needs to be changed and where, it will not however dive into the complexiti individual operator. This is left to the developer. There are a series of files we need to make changes to: + * `python/relay/op/contrib/arm_compute_lib.py` In this file we define the operators we wish to offload using the -`op.register` decorator. This will mean the annotation pass recognizes this operator as ACL -offloadable. + `op.register` decorator. This will mean the annotation pass recognizes this operator as ACL offloadable. * `src/relay/backend/contrib/arm_compute_lib/codegen.cc` Implement `Create[OpName]JSONNode` method. This is where we -declare how the operator should be represented by JSON. This will be used to create the ACL module. -* `src/runtime/contrib/arm_compute_lib/acl_kernel.h` Implement `Create[OpName]Layer` method. This is where we -define how the JSON representation can be used to create an ACL function. We simply define how to -translate from the JSON representation to ACL API. + declare how the operator should be represented by JSON. This will be used to create the ACL module. +* `src/runtime/contrib/arm_compute_lib/acl_runtime.cc` Implement `Create[OpName]Layer` method. This is where we + define how the JSON representation can be used to create an ACL function. We simply define how to + translate from the JSON representation to ACL API. * `tests/python/contrib/test_arm_compute_lib` Add unit tests for the given operator.