From ae093b0d10c14c0d22fb61177846325d2e14cf58 Mon Sep 17 00:00:00 2001 From: Masahiro Hiramori Date: Sat, 14 Sep 2024 21:36:37 +0900 Subject: [PATCH] remove nnpack integration from the codebase --- CMakeLists.txt | 2 - cmake/modules/LibInfo.cmake | 1 - cmake/modules/contrib/NNPack.cmake | 38 --- docker/install/ubuntu_install_nnpack.sh | 37 --- docs/reference/api/python/contrib.rst | 7 - include/tvm/relay/attrs/nn.h | 19 -- python/tvm/contrib/nnpack.py | 235 ---------------- python/tvm/relay/op/nn/_nn.py | 16 -- python/tvm/relay/op/nn/nn.py | 24 -- python/tvm/relay/op/op_attrs.py | 5 - python/tvm/relay/op/strategy/arm_cpu.py | 35 --- python/tvm/relay/op/strategy/generic.py | 8 - python/tvm/topi/arm_cpu/conv2d.py | 172 ------------ python/tvm/topi/arm_cpu/conv2d_alter_op.py | 29 -- python/tvm/topi/generic/nn.py | 17 -- python/tvm/topi/nn/conv2d.py | 23 -- rust/tvm-rt/Cargo.toml | 1 - rust/tvm-sys/Cargo.toml | 1 - rust/tvm-sys/build.rs | 3 - rust/tvm/Cargo.toml | 1 - src/relay/op/nn/convolution.cc | 59 ---- src/relay/transforms/to_mixed_precision.cc | 2 - src/runtime/contrib/nnpack/convolution.cc | 264 ------------------ src/runtime/contrib/nnpack/fully_connected.cc | 63 ----- src/runtime/contrib/nnpack/nnpack_utils.cc | 62 ---- src/runtime/contrib/nnpack/nnpack_utils.h | 42 --- src/support/libinfo.cc | 5 - tests/python/contrib/test_nnpack.py | 220 --------------- .../integration/test_winograd_nnpack.py | 186 ------------ 29 files changed, 1577 deletions(-) delete mode 100644 cmake/modules/contrib/NNPack.cmake delete mode 100755 docker/install/ubuntu_install_nnpack.sh delete mode 100644 python/tvm/contrib/nnpack.py delete mode 100644 src/runtime/contrib/nnpack/convolution.cc delete mode 100644 src/runtime/contrib/nnpack/fully_connected.cc delete mode 100644 src/runtime/contrib/nnpack/nnpack_utils.cc delete mode 100644 src/runtime/contrib/nnpack/nnpack_utils.h delete mode 100644 tests/python/contrib/test_nnpack.py delete mode 100644 tests/python/integration/test_winograd_nnpack.py diff --git a/CMakeLists.txt b/CMakeLists.txt index 38dd59b9c9064..be88814275e20 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -110,7 +110,6 @@ tvm_option(USE_MIOPEN "Build with ROCM:MIOpen" OFF) tvm_option(USE_ROCBLAS "Build with ROCM:RoCBLAS" OFF) tvm_option(USE_HIPBLAS "Build with ROCM:HIPBLAS" OFF) tvm_option(USE_SORT "Build with sort support" ON) -tvm_option(USE_NNPACK "Build with nnpack support" OFF) tvm_option(USE_LIBTORCH "Build with libtorch support" OFF) tvm_option(USE_RANDOM "Build with random support" ON) tvm_option(USE_MICRO_STANDALONE_RUNTIME "Build with micro.standalone_runtime support" OFF) @@ -591,7 +590,6 @@ include(cmake/modules/contrib/Posit.cmake) include(cmake/modules/contrib/MicroStandaloneRuntime.cmake) include(cmake/modules/contrib/MSCCLPP.cmake) include(cmake/modules/contrib/Sort.cmake) -include(cmake/modules/contrib/NNPack.cmake) include(cmake/modules/contrib/LibTorch.cmake) include(cmake/modules/contrib/HybridDump.cmake) include(cmake/modules/contrib/TFLite.cmake) diff --git a/cmake/modules/LibInfo.cmake b/cmake/modules/LibInfo.cmake index a2b51bb331953..f2949b715d558 100644 --- a/cmake/modules/LibInfo.cmake +++ b/cmake/modules/LibInfo.cmake @@ -103,7 +103,6 @@ function(add_lib_info src_file) TVM_INFO_USE_MKL="${USE_MKL}" TVM_INFO_USE_MRVL="${USE_MRVL}" TVM_INFO_USE_MSVC_MT="${USE_MSVC_MT}" - TVM_INFO_USE_NNPACK="${USE_NNPACK}" TVM_INFO_USE_OPENCL="${USE_OPENCL}" TVM_INFO_USE_OPENCL_ENABLE_HOST_PTR="${USE_OPENCL_ENABLE_HOST_PTR}" TVM_INFO_USE_OPENCL_GTEST="${USE_OPENCL_GTEST}" diff --git a/cmake/modules/contrib/NNPack.cmake b/cmake/modules/contrib/NNPack.cmake deleted file mode 100644 index 86059b298f0b6..0000000000000 --- a/cmake/modules/contrib/NNPack.cmake +++ /dev/null @@ -1,38 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -if(USE_NNPACK) - if(NNPACK_PATH STREQUAL "") - set(NNPACK_PATH ${CMAKE_CURRENT_SOURCE_DIR}/NNPack) - endif() - set(PTHREAD_POOL_PATH ${NNPACK_PATH}/deps/pthreadpool) - tvm_file_glob(GLOB NNPACK_CONTRIB_SRC src/runtime/contrib/nnpack/*.cc) - list(APPEND RUNTIME_SRCS ${NNPACK_CONTRIB_SRC}) - include_directories(${NNPACK_PATH}/include) - include_directories(${PTHREAD_POOL_PATH}/include) - find_library(NNPACK_CONTRIB_LIB nnpack ${NNPACK_PATH}/lib) - find_library(NNPACK_PTHREAD_CONTRIB_LIB pthreadpool ${NNPACK_PATH}/lib) - find_library(NNPACK_CPUINFO_CONTRIB_LIB cpuinfo ${NNPACK_PATH}/lib) - find_library(NNPACK_CLOG_CONTRIB_LIB clog ${NNPACK_PATH}/lib) - - list(APPEND TVM_RUNTIME_LINKER_LIBS ${NNPACK_CONTRIB_LIB}) - list(APPEND TVM_RUNTIME_LINKER_LIBS ${NNPACK_PTHREAD_CONTRIB_LIB}) - list(APPEND TVM_RUNTIME_LINKER_LIBS ${NNPACK_CPUINFO_CONTRIB_LIB}) - if(NNPACK_CLOG_CONTRIB_LIB) - list(APPEND TVM_RUNTIME_LINKER_LIBS ${NNPACK_CLOG_CONTRIB_LIB}) - endif(NNPACK_CLOG_CONTRIB_LIB) -endif(USE_NNPACK) diff --git a/docker/install/ubuntu_install_nnpack.sh b/docker/install/ubuntu_install_nnpack.sh deleted file mode 100755 index aac79a7436948..0000000000000 --- a/docker/install/ubuntu_install_nnpack.sh +++ /dev/null @@ -1,37 +0,0 @@ -#!/bin/bash -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -set -e -set -u -set -o pipefail - -apt-get update && apt-install-and-clear -y --no-install-recommends git cmake python-setuptools - -git clone https://github.com/Maratyszcza/NNPACK NNPACK -git clone https://github.com/Maratyszcza/pthreadpool NNPACK/pthreadpool - -# Use specific versioning tag. -(cd NNPACK && git checkout 70a77f485) -(cd NNPACK/pthreadpool && git checkout 43edadc) - -mkdir -p NNPACK/build -cd NNPACK/build -cmake -DCMAKE_INSTALL_PREFIX:PATH=. -DNNPACK_INFERENCE_ONLY=OFF -DNNPACK_CONVOLUTION_ONLY=OFF -DNNPACK_BUILD_TESTS=OFF -DCMAKE_POSITION_INDEPENDENT_CODE=ON -DPTHREADPOOL_SOURCE_DIR=pthreadpool .. -make -j2 -make install -cd - diff --git a/docs/reference/api/python/contrib.rst b/docs/reference/api/python/contrib.rst index 26b5abb97ffa3..dda322417bc10 100644 --- a/docs/reference/api/python/contrib.rst +++ b/docs/reference/api/python/contrib.rst @@ -68,13 +68,6 @@ tvm.contrib.ndk .. automodule:: tvm.contrib.ndk :members: - -tvm.contrib.nnpack -~~~~~~~~~~~~~~~~~~ -.. automodule:: tvm.contrib.nnpack - :members: - - tvm.contrib.nvcc ~~~~~~~~~~~~~~~~ .. automodule:: tvm.contrib.nvcc diff --git a/include/tvm/relay/attrs/nn.h b/include/tvm/relay/attrs/nn.h index 58edb9df8b97a..2c434ec4a29da 100644 --- a/include/tvm/relay/attrs/nn.h +++ b/include/tvm/relay/attrs/nn.h @@ -282,25 +282,6 @@ struct Conv2DWinogradAttrs : public tvm::AttrsNode { } }; -/*! \brief Attributes used in winograd weight transformation operators */ -struct Conv2DWinogradNNPACKWeightTransformAttrs - : public tvm::AttrsNode { - int convolution_algorithm; - DataType out_dtype; - - TVM_DECLARE_ATTRS(Conv2DWinogradNNPACKWeightTransformAttrs, - "relay.attrs.Conv2DWinogradNNPACKWeightTransformAttrs") { - TVM_ATTR_FIELD(convolution_algorithm) - .describe( - "The convolution algorithm for Winograd NNPACK. " - "E.g. tvm.contrib.nnpack.ConvolutionAlgorithm.WT_8x8 for WT_8x8, " - "tvm.contrib.nnpack.ConvolutionAlgorithm.WT_8x8_FP16 for WT_8x8_FP16"); - TVM_ATTR_FIELD(out_dtype) - .set_default(NullValue()) - .describe("Output data type, set to explicit type under mixed precision setting"); - } -}; - /*! \brief Attributes used in convolution operators */ struct Conv3DAttrs : public tvm::AttrsNode { Array strides; diff --git a/python/tvm/contrib/nnpack.py b/python/tvm/contrib/nnpack.py deleted file mode 100644 index 010bef533c002..0000000000000 --- a/python/tvm/contrib/nnpack.py +++ /dev/null @@ -1,235 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -"""External function interface to NNPACK libraries.""" -import tvm -from tvm import te -import tvm._ffi - - -def is_available(): - """Check whether NNPACK is available, that is, `nnp_initialize()` - returns `nnp_status_success`. - """ - return _initialize() == 0 - - -def fully_connected_inference(lhs, rhs, nthreads=1): - """Create an extern op that compute fully connected of 1D tensor lhs and - 2D tensor rhs with nnpack. - - Parameters - ---------- - lhs : Tensor - lhs 1D array input[input_channels] of FP32 elements - rhs : Tensor - lhs 2D matrix kernel[output_channels][input_channels] of FP32 elements - - Returns - ------- - C : Tensor - lhs 1D array out[output_channels] of FP32 elements. - """ - m = rhs.shape[0] - return te.extern( - (m,), - [lhs, rhs], - lambda ins, outs: tvm.tir.call_packed( - "tvm.contrib.nnpack.fully_connected_inference", ins[0], ins[1], outs[0], nthreads - ), - name="C", - ) - - -class ConvolutionAlgorithm: - AUTO = 0 - FFT_8x8 = 1 - FFT_16x16 = 2 - WT_8x8 = 3 - IMPLICIT_GEMM = 4 - DIRECT = 5 - WT_8x8_FP16 = 6 - - -class ConvolutionTransformStrategy: - COMPUTE = 1 - PRECOMPUTE = 2 - - -def convolution_inference( - data, kernel, bias, padding, stride, nthreads=1, algorithm=ConvolutionAlgorithm.AUTO -): - """Create an extern op to do inference convolution of 4D tensor data and - 4D tensor kernel and 1D tensor bias with nnpack. - - Parameters - ---------- - data : Tensor - data 4D tensor input[batch][input_channels][input_height][input_width] of - FP32 elements. - kernel : Tensor - kernel 4D tensor kernel[output_channels][input_channels][kernel_height] - [kernel_width] of FP32 elements. - bias : Tensor - bias 1D array bias[output_channels][input_channels][kernel_height] - [kernel_width] of FP32 elements. - padding : list - padding A 4-dim list of [pad_top, pad_bottom, pad_left, pad_right], - which indicates the padding around the feature map. - stride : list - stride A 2-dim list of [stride_height, stride_width], which indicates - the stride. - - Returns - ------- - output : Tensor - output 4D tensor output[batch][output_channels][output_height][output_width] - of FP32 elements. - """ - - assert isinstance(padding, list) and len(padding) == 4 - assert isinstance(stride, list) and len(stride) == 2 - batch, _, input_height, input_width = data.shape - output_channels, _, kernel_height, kernel_width = kernel.shape - idxdiv = te.indexdiv - output_height = idxdiv(input_height + padding[0] + padding[1] - kernel_height, stride[0]) + 1 - output_width = idxdiv(input_width + padding[0] + padding[1] - kernel_width, stride[1]) + 1 - - return te.extern( - (batch, output_channels, output_height, output_width), - [data, kernel, bias] if bias is not None else [data, kernel], - lambda ins, outs: tvm.tir.call_packed( - "tvm.contrib.nnpack.convolution_inference", - ins[0], - ins[1], - ins[2] if bias is not None else 0, - outs[0], - padding[0], - padding[1], - padding[2], - padding[3], - stride[0], - stride[1], - nthreads, - algorithm, - ), - name="C", - ) - - -def convolution_inference_without_weight_transform( - data, transformed_kernel, bias, padding, stride, nthreads=1, algorithm=ConvolutionAlgorithm.AUTO -): - """Create an extern op to do inference convolution of 4D tensor data and - 4D pre-transformed tensor kernel and 1D tensor bias with nnpack. - - Parameters - ---------- - data : Tensor - data 4D tensor input[batch][input_channels][input_height][input_width] of - FP32 elements. - transformed_kernel : Tensor - transformed_kernel 4D tensor kernel[output_channels][input_channels][tile] - [tile] of FP32 elements. - bias : Tensor - bias 1D array bias[output_channels][input_channels][kernel_height] - [kernel_width] of FP32 elements. - padding : list - padding A 4-dim list of [pad_top, pad_bottom, pad_left, pad_right], - which indicates the padding around the feature map. - stride : list - stride A 2-dim list of [stride_height, stride_width], which indicates - the stride. - - Returns - ------- - output : Tensor - output 4D tensor output[batch][output_channels][output_height][output_width] - of FP32 elements. - """ - - assert algorithm in (ConvolutionAlgorithm.WT_8x8, ConvolutionAlgorithm.WT_8x8_FP16) - assert isinstance(padding, list) and len(padding) == 4 - assert isinstance(stride, list) and len(stride) == 2 - batch, _, input_height, input_width = data.shape - output_channels, _, _, _ = transformed_kernel.shape - kernel_height, kernel_width = (3, 3) - idxdiv = te.indexdiv - output_height = idxdiv(input_height + padding[0] + padding[1] - kernel_height, stride[0]) + 1 - output_width = idxdiv(input_width + padding[0] + padding[1] - kernel_width, stride[1]) + 1 - - return te.extern( - (batch, output_channels, output_height, output_width), - [data, transformed_kernel, bias] if bias is not None else [data, transformed_kernel], - lambda ins, outs: tvm.tir.call_packed( - "tvm.contrib.nnpack.convolution_inference_without_weight_transform", - ins[0], - ins[1], - ins[2] if bias is not None else 0, - outs[0], - padding[0], - padding[1], - padding[2], - padding[3], - stride[0], - stride[1], - nthreads, - algorithm, - ), - name="C", - dtype="float32", - ) - - -def convolution_inference_weight_transform( - kernel, nthreads=1, algorithm=ConvolutionAlgorithm.AUTO, dtype="float32" -): - """Create an extern op to do inference convolution of 3D tensor data and - 4D tensor kernel and 1D tensor bias with nnpack. - - Parameters - ---------- - kernel : Tensor - kernel 4D tensor kernel[output_channels][input_channels][kernel_height] - [kernel_width] of FP32 elements. - - Returns - ------- - output : Tensor - output 4D tensor output[output_channels][input_channels][tile][tile] - of FP32 elements. - """ - assert algorithm in (ConvolutionAlgorithm.WT_8x8, ConvolutionAlgorithm.WT_8x8_FP16) - output_channels, input_channels, _, _ = kernel.shape - transform_tile_size = 8 - if not isinstance(dtype, str): - dtype = dtype.dtype - return te.extern( - (output_channels, input_channels, transform_tile_size, transform_tile_size), - [kernel], - lambda ins, outs: tvm.tir.call_packed( - "tvm.contrib.nnpack.convolution_inference_weight_transform", - ins[0], - outs[0], - nthreads, - algorithm, - ), - name="transform_kernel", - dtype=dtype, - ) - - -tvm._ffi._init_api("tvm.contrib.nnpack") diff --git a/python/tvm/relay/op/nn/_nn.py b/python/tvm/relay/op/nn/_nn.py index a03907f071fdd..02b08c465e20c 100644 --- a/python/tvm/relay/op/nn/_nn.py +++ b/python/tvm/relay/op/nn/_nn.py @@ -820,22 +820,6 @@ def compute_contrib_conv2d_winograd_weight_transform(attrs, inputs, out_dtype): ) -@reg.register_compute("nn.contrib_conv2d_winograd_nnpack_weight_transform") -def compute_contrib_conv2d_winograd_nnpack_weight_transform(attrs, inputs, out_dtype): - """Compute definition of contrib_conv2d_winograd_nnpack_weight_transform""" - convolution_algorithm = attrs.get_int("convolution_algorithm") - out = topi.nn.conv2d_winograd_nnpack_weight_transform( - inputs[0], convolution_algorithm, out_dtype - ) - return [out] - - -reg.register_schedule( - "nn.contrib_conv2d_winograd_nnpack_weight_transform", - strategy.schedule_conv2d_winograd_nnpack_weight_transform, -) - - # conv2d_NCHWc reg.register_strategy("nn.contrib_conv2d_NCHWc", strategy.conv2d_NCHWc_strategy) diff --git a/python/tvm/relay/op/nn/nn.py b/python/tvm/relay/op/nn/nn.py index 8cb66ecaa9a24..8233d1ca5c4b4 100644 --- a/python/tvm/relay/op/nn/nn.py +++ b/python/tvm/relay/op/nn/nn.py @@ -2786,30 +2786,6 @@ def contrib_conv3d_winograd_weight_transform(weight, tile_size): return _make.contrib_conv3d_winograd_weight_transform(weight, tile_size) -def contrib_conv2d_winograd_nnpack_weight_transform(weight, convolution_algorithm, out_dtype=""): - r"""Weight Transformation part for 2D convolution with winograd algorithm. - - We separate this as a single op to enable pre-compute for inference. - Use this together with nn.contrib_conv2d_winograd_without_weight_transform - - Parameters - ---------- - weight : tvm.relay.Expr - The weight expressions. - - convolution_algorithm : int - The Tile size of winograd. E.g. 2 for F(2x2, 3x3) and 4 for F(4x4, 3x3) - - Returns - ------- - result : tvm.relay.Expr - The computed result. - """ - return _make.contrib_conv2d_winograd_nnpack_weight_transform( - weight, convolution_algorithm, out_dtype - ) - - def deformable_conv2d( data, offset, diff --git a/python/tvm/relay/op/op_attrs.py b/python/tvm/relay/op/op_attrs.py index deae9e2f48bee..b4943141404c9 100644 --- a/python/tvm/relay/op/op_attrs.py +++ b/python/tvm/relay/op/op_attrs.py @@ -49,11 +49,6 @@ class ConvWinogradWeightTransformAttrs(Attrs): """Attributes for nn.contrib_convNd_winograd_weight_transform""" -@tvm._ffi.register_object("relay.attrs.Conv2DWinogradNNPACKWeightTransformAttrs") -class Conv2DWinogradNNPACKWeightTransformAttrs(Attrs): - """Attributes for nn.contrib_conv2d_winograd_nnpack_weight_transform""" - - @tvm._ffi.register_object("relay.attrs.GlobalPool2DAttrs") class GlobalPool2DAttrs(Attrs): """Attributes for nn.global_pool""" diff --git a/python/tvm/relay/op/strategy/arm_cpu.py b/python/tvm/relay/op/strategy/arm_cpu.py index bd9a0a4d020bb..5877b07aa8978 100644 --- a/python/tvm/relay/op/strategy/arm_cpu.py +++ b/python/tvm/relay/op/strategy/arm_cpu.py @@ -151,7 +151,6 @@ def conv2d_strategy_arm_cpu(attrs, inputs, out_type, target): # check if winograd algorithm is applicable _, _, kh, kw = get_const_tuple(kernel.shape) - pt, pl, pb, pr = topi.nn.get_pad_tuple(padding, (kh, kw)) is_winograd_applicable = ( "float" in data.dtype and "custom" not in data.dtype @@ -171,13 +170,6 @@ def conv2d_strategy_arm_cpu(attrs, inputs, out_type, target): name="conv2d_nchw_winograd.arm_cpu", plevel=5, ) - if "nnpack" in target.libs and pt == 1 and pb == 1 and pl == 1 and pr == 1: - strategy.add_implementation( - wrap_compute_conv2d(topi.arm_cpu.conv2d_nchw_winograd_nnpack), - wrap_topi_schedule(topi.arm_cpu.schedule_conv2d_nchw_winograd_nnpack), - name="conv2d_nchw_winograd_nnpack.arm_cpu", - plevel=15, - ) elif re.match(r"OIHW\d*o", kernel_layout): strategy.add_implementation( wrap_compute_conv2d(topi.arm_cpu.conv2d_nchw_spatial_pack), @@ -460,20 +452,6 @@ def depthwise_conv2d_NCHWc_strategy_arm_cpu(attrs, inputs, out_type, target): return strategy -def wrap_compute_conv2d_winograd_nnpack(topi_compute): - """wrap topi compute for conv2d_winograd NNPack""" - - def _compute_conv2d_nnpack(attrs, inputs, out_type): - padding = attrs.get_int_tuple("padding") - strides = attrs.get_int_tuple("strides") - dilation = attrs.get_int_tuple("dilation") - out_dtype = attrs.get_str("out_dtype") - out_dtype = inputs[0].dtype if out_dtype in ("same", "") else out_dtype - return [topi_compute(inputs[0], inputs[1], None, strides, padding, dilation, out_dtype)] - - return _compute_conv2d_nnpack - - @conv2d_winograd_without_weight_transform_strategy.register("arm_cpu") def conv2d_winograd_without_weight_transform_strategy_arm_cpu(attrs, inputs, out_type, target): """conv2d_winograd_without_weight_transform arm cpu strategy""" @@ -498,19 +476,6 @@ def conv2d_winograd_without_weight_transform_strategy_arm_cpu(attrs, inputs, out wrap_topi_schedule(topi.arm_cpu.schedule_conv2d_nchw_winograd), name="conv2d_nchw_winograd.arm_cpu", ) - elif len(kernel.shape) == 4: - # kernel must be packed by winograd nnpack - assert "nnpack" in target.libs - strategy.add_implementation( - wrap_compute_conv2d_winograd_nnpack( - topi.arm_cpu.conv2d_nchw_winograd_nnpack_without_weight_transform - ), - wrap_topi_schedule( - topi.arm_cpu.schedule_conv2d_nchw_winograd_nnpack_without_weight_transform - ), - name="conv2d_nchw_winograd_nnpack_withou_weight_transform.arm_cpu", - plevel=15, - ) else: raise RuntimeError(f"Unsupported kernel shape: {kernel.shape}") else: diff --git a/python/tvm/relay/op/strategy/generic.py b/python/tvm/relay/op/strategy/generic.py index c2a4b4c302af6..19e35e510707a 100644 --- a/python/tvm/relay/op/strategy/generic.py +++ b/python/tvm/relay/op/strategy/generic.py @@ -401,14 +401,6 @@ def schedule_conv2d_winograd_weight_transform(attrs, outs, target): return topi.generic.schedule_conv2d_winograd_weight_transform(outs) -# conv2d_winograd_nnpack_weight_transform -@generic_func -def schedule_conv2d_winograd_nnpack_weight_transform(attrs, outs, target): - """Schedule conv2d_winograd_nnpack_weight_transform""" - with target: - return topi.generic.schedule_conv2d_winograd_nnpack_weight_transform(outs) - - # conv2d_gemm_weight_transform @generic_func def schedule_conv2d_gemm_weight_transform(attrs, outs, target): diff --git a/python/tvm/topi/arm_cpu/conv2d.py b/python/tvm/topi/arm_cpu/conv2d.py index b7327d5b52e8c..88713bbd3d3db 100644 --- a/python/tvm/topi/arm_cpu/conv2d.py +++ b/python/tvm/topi/arm_cpu/conv2d.py @@ -22,7 +22,6 @@ from tvm import te from tvm import autotvm from tvm.script import tir as T -import tvm.contrib.nnpack from tvm.tir.schedule.analysis import has_block from ..utils import traverse_inline, get_const_tuple @@ -336,177 +335,6 @@ def _schedule_winograd(cfg, s, output, last): s[output].compute_inline() -@autotvm.register_topi_compute("conv2d_nchw_winograd_nnpack.arm_cpu") -def conv2d_nchw_winograd_nnpack(cfg, data, kernel, strides, padding, dilation, out_dtype): - """Compute conv2d_nchw using nnpack Winograd implementation""" - dtype = data.dtype - if dtype == "float32": - return _conv2d_arm_cpu_winograd_nnpack( - cfg, - data, - kernel, - strides, - padding, - dilation, - out_dtype, - tvm.contrib.nnpack.ConvolutionAlgorithm.WT_8x8, - ) - elif dtype == "float16": - return _conv2d_arm_cpu_winograd_nnpack( - cfg, - data, - kernel, - strides, - padding, - dilation, - out_dtype, - tvm.contrib.nnpack.ConvolutionAlgorithm.WT_8x8_FP16, - ) - else: - raise ValueError(f"Unsupported data type {dtype} for conv2d winograd nnpack") - - -@autotvm.register_topi_schedule("conv2d_nchw_winograd_nnpack.arm_cpu") -def schedule_conv2d_nchw_winograd_nnpack(cfg, outs): - """Create schedule for conv2d_nchw_winograd_nnpack""" - s = te.create_schedule([x.op for x in outs]) - - def _callback(op): - if "winograd_nnpack_conv2d_output" in op.tag: - output = op.output(0) - _schedule_winograd_nnpack(cfg, s, output, outs[0]) - - traverse_inline(s, outs[0].op, _callback) - return s - - -def _conv2d_arm_cpu_winograd_nnpack( - cfg, data, kernel, strides, padding, dilation, out_dtype, convolution_algorithm -): - """TOPI compute callback. Use winograd NNPACK template""" - N, CI, IH, IW = get_const_tuple(data.shape) - - if isinstance(dilation, int): - dilation_h = dilation_w = dilation - else: - dilation_h, dilation_w = dilation - assert (dilation_h, dilation_w) == (1, 1) - assert len(kernel.shape) == 4 - CO, _, KH, KW = get_const_tuple(kernel.shape) - HSTR, WSTR = strides if isinstance(strides, (tuple, list)) else (strides, strides) - pt, pl, pb, pr = get_pad_tuple(padding, (KH, KW)) - - assert ( - KH == 3 - and KW == 3 - and pt == 1 - and pb == 1 - and pl == 1 - and pr == 1 - and HSTR == 1 - and WSTR == 1 - ) - H = (IH + pt + pb - 3) // HSTR + 1 - W = (IW + pl + pr - 3) // WSTR + 1 - - cfg.define_knob("winograd_nnpack_algorithm", [convolution_algorithm]) - - assert N == 1 - with tvm.te.tag_scope("winograd_nnpack_conv2d_weight_transform"): - transformed_kernel = tvm.contrib.nnpack.convolution_inference_weight_transform( - kernel, algorithm=cfg["winograd_nnpack_algorithm"].val - ) - if autotvm.GLOBAL_SCOPE.in_tuning: - transformed_kernel = te.compute(transformed_kernel.shape, lambda *args: 0.0) - - with tvm.te.tag_scope("winograd_nnpack_conv2d_output"): - output = tvm.contrib.nnpack.convolution_inference_without_weight_transform( - data, - transformed_kernel, - bias=None, - padding=[pt, pb, pl, pr], - stride=[HSTR, WSTR], - algorithm=cfg["winograd_nnpack_algorithm"].val, - ) - - # we have to manually assign effective GFLOP for winograd - cfg.add_flop(2 * N * CI * H * W * KH * KW * CO) - return output - - -def _schedule_winograd_nnpack(cfg, s, output, last): - # Could have bias. - - (X, TK) = output.op.input_tensors[:2] - - # transform kernel - assert isinstance(TK.op, (te.tensor.ComputeOp, te.tensor.ExternOp, te.tensor.PlaceholderOp)) - if autotvm.GLOBAL_SCOPE.in_tuning and isinstance(TK.op, te.tensor.ComputeOp): - # kernel transformation will be pre-computed during compilation, so we skip - # this part to make tuning records correct - s[TK].pragma(s[TK].op.axis[0], "debug_skip_region") - - -@autotvm.register_topi_compute("conv2d_nchw_winograd_nnpack_without_weight_transform.arm_cpu") -def conv2d_nchw_winograd_nnpack_without_weight_transform( - cfg, data, transformed_kernel, bias, strides, padding, dilation, out_dtype -): - """Compute conv2d_nchw using NNPack winograd without weight transform""" - N, CI, IH, IW = get_const_tuple(data.shape) - if isinstance(dilation, int): - dilation_h = dilation_w = dilation - else: - dilation_h, dilation_w = dilation - assert (dilation_h, dilation_w) == (1, 1) - assert len(transformed_kernel.shape) == 4 - CO, _, _, _ = get_const_tuple(transformed_kernel.shape) - HSTR, WSTR = strides if isinstance(strides, (tuple, list)) else (strides, strides) - KH, KW = 3, 3 - pt, pl, pb, pr = get_pad_tuple(padding, (KH, KW)) - - assert ( - KH == 3 - and KW == 3 - and pt == 1 - and pb == 1 - and pl == 1 - and pr == 1 - and HSTR == 1 - and WSTR == 1 - ) - H = (IH + pt + pb - 3) // HSTR + 1 - W = (IW + pl + pr - 3) // WSTR + 1 - - assert N == 1 - with tvm.te.tag_scope("winograd_nnpack_conv2d_output"): - output = tvm.contrib.nnpack.convolution_inference_without_weight_transform( - data=data, - transformed_kernel=transformed_kernel, - bias=bias, - padding=[pt, pb, pl, pr], - stride=[HSTR, WSTR], - algorithm=cfg["winograd_nnpack_algorithm"].val, - ) - - # we have to manually assign effective GFLOP for winograd - cfg.add_flop(2 * N * CI * H * W * KH * KW * CO) - return output - - -@autotvm.register_topi_schedule("conv2d_nchw_winograd_nnpack_without_weight_transform.arm_cpu") -def schedule_conv2d_nchw_winograd_nnpack_without_weight_transform(cfg, outs): - """TOPI schedule callback""" - s = te.create_schedule([x.op for x in outs]) - - def _callback(op): - if "winograd_nnpack_conv2d_output" in op.tag: - output = op.output(0) - _schedule_winograd_nnpack(cfg, s, output, outs[0]) - - traverse_inline(s, outs[0].op, _callback) - return s - - @autotvm.register_topi_compute("conv2d_nhwc_dsp.arm_cpu") def conv2d_nhwc_dsp(cfg, data, kernel, strides, padding, dilation, out_dtype): """Compute conv2d_nhwc with v7e-m DSP instructions.""" diff --git a/python/tvm/topi/arm_cpu/conv2d_alter_op.py b/python/tvm/topi/arm_cpu/conv2d_alter_op.py index 2476cb92b9152..c941d3ad85810 100644 --- a/python/tvm/topi/arm_cpu/conv2d_alter_op.py +++ b/python/tvm/topi/arm_cpu/conv2d_alter_op.py @@ -273,35 +273,6 @@ def _alter_conv2d_layout(attrs, inputs, tinfos, out_type): inputs[0], weight_expr, **new_attrs ) - if topi_tmpl == "conv2d_nchw_winograd_nnpack.arm_cpu": - assert data_layout == "NCHW" and kernel_layout == "OIHW" - N, CI, H, W = get_const_tuple(data.shape) - CO, _, KH, KW = get_const_tuple(kernel.shape) - new_attrs["channels"] = CO - - # pre-compute winograd_nnpack transform - # for winograd_nnpack_fp16, the precompute prune pass must run on device, - # where float16 is supported - weight_dtype = "float32" - weight_expr = inputs[1] - transformed_weight = relay.nn.contrib_conv2d_winograd_nnpack_weight_transform( - weight_expr, - convolution_algorithm=cfg["winograd_nnpack_algorithm"].val, - out_dtype=weight_dtype, - ) - - new_data = data - new_kernel = te.placeholder((CO, CI, 8, 8), "float32") - - new_workload = autotvm.task.args_to_workload( - [new_data, new_kernel, None, strides, padding, dilation, out_dtype], - "conv2d_nchw_winograd_nnpack_without_weight_transform.arm_cpu", - ) - dispatch_ctx.update(target, new_workload, cfg) - return relay.nn.contrib_conv2d_winograd_without_weight_transform( - inputs[0], transformed_weight, **new_attrs - ) - if topi_tmpl == "depthwise_conv2d_nchw_spatial_pack.arm_cpu": assert data_layout == "NCHW" and kernel_layout == "OIHW" N, CI, H, W = get_const_tuple(data.shape) diff --git a/python/tvm/topi/generic/nn.py b/python/tvm/topi/generic/nn.py index a3da7a395151b..f007ccf78b96b 100644 --- a/python/tvm/topi/generic/nn.py +++ b/python/tvm/topi/generic/nn.py @@ -294,23 +294,6 @@ def schedule_conv2d_winograd_without_weight_transform(outs): return _default_schedule(outs, False) -def schedule_conv2d_winograd_nnpack_weight_transform(outs): - """Schedule for weight transformation of winograd - Parameters - ---------- - outs: Array of Tensor - The computation graph description of this operator - in the format of an array of tensors. - Returns - ------- - sch: Schedule - The computation schedule for the op. - """ - # Typically this is computed in PreCompute pass - s = te.create_schedule([x.op for x in outs]) - return s - - def schedule_conv3d_ncdhw(outs): """Schedule for conv3d_ncdhw diff --git a/python/tvm/topi/nn/conv2d.py b/python/tvm/topi/nn/conv2d.py index 205730ff22d6a..1b206cba94fee 100644 --- a/python/tvm/topi/nn/conv2d.py +++ b/python/tvm/topi/nn/conv2d.py @@ -712,29 +712,6 @@ def conv2d_winograd_weight_transform(kernel, tile_size): ) -def conv2d_winograd_nnpack_weight_transform(kernel, convolution_algorithm, out_dtype): - """Weight transformation for winograd - - Parameters - ---------- - kernel: Tensor - The raw kernel tensor with layout "NCHW". Only 3x3 kernel is supported for now. - convolution_algorithm: int - The convolution algorithm for Winograd NNPACK. - - Returns - ------- - output : tvm.te.Tensor - 4-D with shape [alpha, alpha, CO, CI] - """ - # pylint: disable=import-outside-toplevel - from tvm.contrib import nnpack - - return nnpack.convolution_inference_weight_transform( - kernel, algorithm=convolution_algorithm, dtype=out_dtype - ) - - def group_conv2d_nchw(Input, Filter, stride, padding, dilation, groups, out_dtype=None): """Group convolution operator in NCHW layout. diff --git a/rust/tvm-rt/Cargo.toml b/rust/tvm-rt/Cargo.toml index 24d9061a213fc..34c43a3c468cf 100644 --- a/rust/tvm-rt/Cargo.toml +++ b/rust/tvm-rt/Cargo.toml @@ -69,7 +69,6 @@ use-thrust = ["tvm-sys/use-thrust"] use-miopen = ["tvm-sys/use-miopen"] use-rocblas = ["tvm-sys/use-rocblas"] use-sort = ["tvm-sys/use-sort"] -use-nnpack = ["tvm-sys/use-nnpack"] use-random = ["tvm-sys/use-random"] use-micro-standalone-runtime = ["tvm-sys/use-micro-standalone-runtime"] use-cpp-rpc = ["tvm-sys/use-cpp-rpc"] diff --git a/rust/tvm-sys/Cargo.toml b/rust/tvm-sys/Cargo.toml index 4494e20afa31a..614ed269579c5 100644 --- a/rust/tvm-sys/Cargo.toml +++ b/rust/tvm-sys/Cargo.toml @@ -62,7 +62,6 @@ use-thrust = [] use-miopen = [] use-rocblas = [] use-sort = [] -use-nnpack = [] use-random = [] use-micro-standalone-runtime = [] use-cpp-rpc = [] diff --git a/rust/tvm-sys/build.rs b/rust/tvm-sys/build.rs index 80c7efbaf8941..be0702ee101b2 100644 --- a/rust/tvm-sys/build.rs +++ b/rust/tvm-sys/build.rs @@ -162,9 +162,6 @@ fn find_using_tvm_build() -> Result { if cfg!(feature = "use-sort") { build_config.settings.use_sort = Some(true); } - if cfg!(feature = "use-nnpack") { - build_config.settings.use_nnpack = Some(true); - } if cfg!(feature = "use-random") { build_config.settings.use_random = Some(true); } diff --git a/rust/tvm/Cargo.toml b/rust/tvm/Cargo.toml index 8d9b23f7616b4..fbf76ed6c967f 100644 --- a/rust/tvm/Cargo.toml +++ b/rust/tvm/Cargo.toml @@ -68,7 +68,6 @@ use-thrust = ["tvm-rt/use-thrust"] use-miopen = ["tvm-rt/use-miopen"] use-rocblas = ["tvm-rt/use-rocblas"] use-sort = ["tvm-rt/use-sort"] -use-nnpack = ["tvm-rt/use-nnpack"] use-random = ["tvm-rt/use-random"] use-micro-standalone-runtime = ["tvm-rt/use-micro-standalone-runtime"] use-cpp-rpc = ["tvm-rt/use-cpp-rpc"] diff --git a/src/relay/op/nn/convolution.cc b/src/relay/op/nn/convolution.cc index 547b533ccc9be..f70d0a48a9d03 100644 --- a/src/relay/op/nn/convolution.cc +++ b/src/relay/op/nn/convolution.cc @@ -1330,65 +1330,6 @@ weight transformation in advance. .add_type_rel("Conv3DWinogradWeightTransform", Conv3DWinogradWeightTransformRel) .set_attr("TOpPattern", kOutEWiseFusable); -// relay.nn.contrib_conv2d_winograd_nnpack_weight_transform -TVM_REGISTER_NODE_TYPE(Conv2DWinogradNNPACKWeightTransformAttrs); - -bool Conv2DWinogradNNPACKWeightTransformRel(const Array& types, int num_inputs, - const Attrs& attrs, const TypeReporter& reporter) { - ICHECK_EQ(types.size(), 2); - const auto* data = types[0].as(); - if (data == nullptr) { - return false; - } - - const Conv2DWinogradNNPACKWeightTransformAttrs* param = - attrs.as(); - ICHECK(param != nullptr); - - ICHECK_EQ(data->shape.size(), 4) << "Only support NCHW normal kernel layout"; - - std::vector oshape{ - data->shape[0], - data->shape[1], - 8, - 8, - }; - - DataType out_dtype = param->out_dtype; - if (out_dtype.bits() == 0) { - out_dtype = data->dtype; - } - reporter->Assign(types[1], TensorType(Array(oshape), out_dtype)); - return true; -} - -Expr MakeConv2DWinogradNNPACKWeightTransform(Expr weight, int convolution_algorithm, - DataType out_dtype) { - auto attrs = make_object(); - attrs->convolution_algorithm = convolution_algorithm; - attrs->out_dtype = std::move(out_dtype); - static const Op& op = Op::Get("nn.contrib_conv2d_winograd_nnpack_weight_transform"); - return Call(op, {weight}, Attrs(attrs), {}); -} - -TVM_REGISTER_GLOBAL("relay.op.nn._make.contrib_conv2d_winograd_nnpack_weight_transform") - .set_body_typed(MakeConv2DWinogradNNPACKWeightTransform); - -RELAY_REGISTER_OP("nn.contrib_conv2d_winograd_nnpack_weight_transform") - .describe(R"code(Weight transformation of winograd fast convolution algorithm with NNPACK. -Separate this into another symbol in order to enable Precompute Pass to compute the -weight transformation in advance. - -- **weight**: (channels, in_channels, kernel_size[0], kernel_size[1]) - -)code" TVM_ADD_FILELINE) - .set_attrs_type() - .set_num_inputs(1) - .add_argument("weight", "Tensor", "The weight tensor.") - .set_support_level(10) - .add_type_rel("Conv2DWinogradNNPACKWeightTransform", Conv2DWinogradNNPACKWeightTransformRel) - .set_attr("TOpPattern", kOpaque); - // relay.nn.contrib_conv2d_gemm_without_weight_transform TVM_REGISTER_GLOBAL("relay.op.nn._make.contrib_conv2d_gemm_without_weight_transform") .set_body_typed([](Expr data, Expr weight, Array strides, Array padding, diff --git a/src/relay/transforms/to_mixed_precision.cc b/src/relay/transforms/to_mixed_precision.cc index 1112755b76a08..3dff3e4919ebd 100644 --- a/src/relay/transforms/to_mixed_precision.cc +++ b/src/relay/transforms/to_mixed_precision.cc @@ -160,8 +160,6 @@ class MixedPrecisionPass : public MixedModeMutator { return ModifyAttrsOutputDType(attrs, accumulation_dtype); } else if (auto attrs = cur_attrs.as()) { return ModifyAttrsOutputDType(attrs, accumulation_dtype); - } else if (auto attrs = cur_attrs.as()) { - return ModifyAttrsOutputDType(attrs, accumulation_dtype); } else if (auto attrs = cur_attrs.as()) { return ModifyAttrsOutputDType(attrs, accumulation_dtype); } else if (auto attrs = cur_attrs.as()) { diff --git a/src/runtime/contrib/nnpack/convolution.cc b/src/runtime/contrib/nnpack/convolution.cc deleted file mode 100644 index 2362e31f92eef..0000000000000 --- a/src/runtime/contrib/nnpack/convolution.cc +++ /dev/null @@ -1,264 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -/*! - * \file Use external nnpack library call. - */ -#include -#include -#include -#include -#include - -#include "nnpack_utils.h" - -namespace tvm { -namespace contrib { -using namespace runtime; - -TVM_REGISTER_GLOBAL("tvm.contrib.nnpack.convolution_inference") - .set_body([](TVMArgs args, TVMRetValue* ret) { - NNPackThreadLocalEntry* entry = NNPackThreadLocalEntry::ThreadLocal(); - static std::once_flag flag; - std::call_once(flag, []() { ICHECK_EQ(nnp_initialize(), nnp_status_success); }); - DLTensor* input = args[0]; - DLTensor* kernel = args[1]; - DLTensor* bias = nullptr; - if (args[2].type_code() == kTVMDLTensorHandle) { - bias = args[2]; - } - DLTensor* output = args[3]; - uint64_t pad_top = args[4], pad_right = args[5], pad_bottom = args[6], pad_left = args[7]; - nnp_padding input_padding{pad_top, pad_right, pad_bottom, pad_left}; - uint64_t stride_width = args[8], stride_height = args[9]; - nnp_size stride_size{stride_width, stride_height}; - NNPackConfig(args[10]); - - uint64_t algo_ = args[11]; - nnp_convolution_algorithm algo = static_cast(algo_); - ICHECK_EQ(input->ndim, 4); - ICHECK_EQ(kernel->ndim, 4); - if (bias) { - ICHECK_EQ(bias->ndim, 1); - } - ICHECK_EQ(output->ndim, 4); - ICHECK_EQ(input->shape[1], kernel->shape[1]); - ICHECK_EQ(input->shape[0], output->shape[0]); - size_t input_channels = input->shape[1]; - ICHECK_EQ(output->shape[1], kernel->shape[0]); - if (bias) { - ICHECK_EQ(output->shape[1], bias->shape[0]); - } - size_t output_channels = output->shape[1]; - nnp_size input_size{static_cast(input->shape[2]), - static_cast(input->shape[3])}; - nnp_size kernel_size{static_cast(kernel->shape[2]), - static_cast(kernel->shape[3])}; - ICHECK(input->strides == nullptr); - ICHECK(kernel->strides == nullptr); - if (bias) { - ICHECK(bias->strides == nullptr); - } - - ICHECK(TypeMatch(input->dtype, kDLFloat, 32)); - ICHECK(TypeMatch(kernel->dtype, kDLFloat, 32)); - if (bias) { - ICHECK(TypeMatch(bias->dtype, kDLFloat, 32)); - } - ICHECK(TypeMatch(output->dtype, kDLFloat, 32)); - - // Allocate a zero-bias if we don't pass one in. - std::unique_ptr> zero_bias; - if (!bias) { - zero_bias.reset(new std::vector(output->shape[1], 0.0)); - } - - size_t workspace_size = 0; - nnp_status status = nnp_convolution_inference( - algo, nnp_convolution_transform_strategy_compute, input_channels, output_channels, - input_size, input_padding, kernel_size, stride_size, nullptr, nullptr, nullptr, nullptr, - nullptr, &workspace_size, nnp_activation_identity, nullptr, entry->threadpool, nullptr); - ICHECK_EQ(status, nnp_status_success); - - // Division with rounding up, in case size is not multiple of sizeof(float) - const size_t workspace_elements = (workspace_size + sizeof(float) - 1) / sizeof(float); - - Device dev = input->device; - DLDataType type_hint = input->dtype; - - DeviceAPI* cpu_api = DeviceAPI::Get(dev); - void* workspace_buffer = - cpu_api->AllocWorkspace(dev, workspace_elements * sizeof(float), type_hint); - ICHECK(workspace_buffer != nullptr); - - for (auto n = 0; n < input->shape[0]; ++n) { - nnp_status status = nnp_convolution_inference( - algo, nnp_convolution_transform_strategy_compute, input_channels, output_channels, - input_size, input_padding, kernel_size, stride_size, - static_cast(input->data) + - n * input->shape[1] * input->shape[2] * input->shape[3], - static_cast(kernel->data), - bias ? static_cast(bias->data) : zero_bias->data(), - static_cast(output->data) + - n * output->shape[1] * output->shape[2] * output->shape[3], - workspace_buffer, &workspace_size, nnp_activation_identity, nullptr, entry->threadpool, - nullptr); - - ICHECK_EQ(status, nnp_status_success); - } - cpu_api->FreeWorkspace(dev, workspace_buffer); - }); - -TVM_REGISTER_GLOBAL("tvm.contrib.nnpack.convolution_inference_without_weight_transform") - .set_body([](TVMArgs args, TVMRetValue* ret) { - NNPackThreadLocalEntry* entry = NNPackThreadLocalEntry::ThreadLocal(); - static std::once_flag flag; - std::call_once(flag, []() { ICHECK_EQ(nnp_initialize(), nnp_status_success); }); - DLTensor* input = args[0]; - DLTensor* transformed_kernel = args[1]; - DLTensor* bias = nullptr; - if (args[2].type_code() == kTVMDLTensorHandle) { - bias = args[2]; - } - DLTensor* output = args[3]; - uint64_t pad_top = args[4], pad_right = args[5], pad_bottom = args[6], pad_left = args[7]; - nnp_padding input_padding{pad_top, pad_right, pad_bottom, pad_left}; - uint64_t stride_width = args[8], stride_height = args[9]; - nnp_size stride_size{stride_width, stride_height}; - NNPackConfig(args[10]); - - uint64_t algo_ = args[11]; - nnp_convolution_algorithm algo = static_cast(algo_); - ICHECK_EQ(input->ndim, 4); - if (bias) { - ICHECK_EQ(bias->ndim, 1); - } - ICHECK_EQ(output->ndim, 4); - ICHECK_EQ(input->shape[0], output->shape[0]); - size_t input_channels = input->shape[1]; - if (bias) { - ICHECK_EQ(output->shape[1], bias->shape[0]); - } - size_t output_channels = output->shape[1]; - nnp_size input_size{static_cast(input->shape[2]), - static_cast(input->shape[3])}; - nnp_size kernel_size{3, 3}; - ICHECK(input->strides == nullptr); - ICHECK(transformed_kernel->strides == nullptr); - if (bias) { - ICHECK(bias->strides == nullptr); - } - - ICHECK(TypeMatch(input->dtype, kDLFloat, 32)); - ICHECK(TypeMatch(transformed_kernel->dtype, kDLFloat, 32)); - if (bias) { - ICHECK(TypeMatch(bias->dtype, kDLFloat, 32)); - } - ICHECK(TypeMatch(output->dtype, kDLFloat, 32)); - - // Allocate a zero-bias if we don't pass one in. - std::unique_ptr> zero_bias; - if (!bias) { - zero_bias.reset(new std::vector(output->shape[1], 0.0)); - } - - size_t workspace_size = 0; - nnp_status status = nnp_convolution_inference( - algo, nnp_convolution_transform_strategy_reuse, input_channels, output_channels, - input_size, input_padding, kernel_size, stride_size, nullptr, nullptr, nullptr, nullptr, - nullptr, &workspace_size, nnp_activation_identity, nullptr, entry->threadpool, nullptr); - ICHECK_EQ(status, nnp_status_success); - - // Division with rounding up, in case size is not multiple of sizeof(float) - const size_t workspace_elements = (workspace_size + sizeof(float) - 1) / sizeof(float); - - Device dev = input->device; - DLDataType type_hint = input->dtype; - - DeviceAPI* cpu_api = DeviceAPI::Get(dev); - void* workspace_buffer = - cpu_api->AllocWorkspace(dev, workspace_elements * sizeof(float), type_hint); - ICHECK(workspace_buffer != nullptr); - - for (auto n = 0; n < input->shape[0]; ++n) { - nnp_status status = nnp_convolution_inference( - algo, nnp_convolution_transform_strategy_reuse, input_channels, output_channels, - input_size, input_padding, kernel_size, stride_size, - static_cast(input->data) + - n * input->shape[1] * input->shape[2] * input->shape[3], - static_cast(transformed_kernel->data), - bias ? static_cast(bias->data) : zero_bias->data(), - static_cast(output->data) + - n * output->shape[1] * output->shape[2] * output->shape[3], - workspace_buffer, &workspace_size, nnp_activation_identity, nullptr, entry->threadpool, - nullptr); - ICHECK_EQ(status, nnp_status_success); - } - - cpu_api->FreeWorkspace(dev, workspace_buffer); - }); - -TVM_REGISTER_GLOBAL("tvm.contrib.nnpack.convolution_inference_weight_transform") - .set_body([](TVMArgs args, TVMRetValue* ret) { - NNPackThreadLocalEntry* entry = NNPackThreadLocalEntry::ThreadLocal(); - static std::once_flag flag; - std::call_once(flag, []() { ICHECK_EQ(nnp_initialize(), nnp_status_success); }); - DLTensor* kernel = args[0]; - DLTensor* transformed_kernel = args[1]; - // Dummy sizes - nnp_padding input_padding{1, 1, 1, 1}; - nnp_size stride_size{1, 1}; - - nnp_size input_size{100, 100}; - - NNPackConfig(args[2]); - - uint64_t algo_ = args[3]; - nnp_convolution_algorithm algo = static_cast(algo_); - ICHECK_EQ(kernel->ndim, 4); - size_t input_channels = kernel->shape[1]; - size_t output_channels = kernel->shape[0]; - ICHECK_EQ(kernel->shape[2], 3); - ICHECK_EQ(kernel->shape[3], 3); - nnp_size kernel_size{static_cast(kernel->shape[2]), - static_cast(kernel->shape[3])}; - ICHECK(kernel->strides == nullptr); - ICHECK(TypeMatch(kernel->dtype, kDLFloat, 32)); - - size_t transformed_kernel_size = 0; - nnp_status status; - status = nnp_convolution_inference( - algo, nnp_convolution_transform_strategy_precompute, input_channels, output_channels, - input_size, input_padding, kernel_size, stride_size, nullptr, nullptr, nullptr, nullptr, - nullptr, &transformed_kernel_size, nnp_activation_identity, nullptr, entry->threadpool, - nullptr); - ICHECK_EQ(status, nnp_status_success); - - ICHECK_LE(transformed_kernel_size, GetDataSize(*transformed_kernel)); - - status = nnp_convolution_inference( - algo, nnp_convolution_transform_strategy_precompute, input_channels, output_channels, - input_size, input_padding, kernel_size, stride_size, nullptr, - static_cast(kernel->data), nullptr, nullptr, - static_cast(transformed_kernel->data), &transformed_kernel_size, - nnp_activation_identity, nullptr, entry->threadpool, nullptr); - ICHECK_EQ(status, nnp_status_success); - }); -} // namespace contrib -} // namespace tvm diff --git a/src/runtime/contrib/nnpack/fully_connected.cc b/src/runtime/contrib/nnpack/fully_connected.cc deleted file mode 100644 index 28570026ada3e..0000000000000 --- a/src/runtime/contrib/nnpack/fully_connected.cc +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -/*! - * \file Use external nnpack library call. - */ -#include -#include -#include -#include - -#include "nnpack_utils.h" - -namespace tvm { -namespace contrib { - -using namespace runtime; - -// matrix multiplication for row major -TVM_REGISTER_GLOBAL("tvm.contrib.nnpack.fully_connected_inference") - .set_body([](TVMArgs args, TVMRetValue* ret) { - NNPackThreadLocalEntry* entry = NNPackThreadLocalEntry::ThreadLocal(); - nnp_initialize(); - DLTensor* A = args[0]; - DLTensor* B = args[1]; - DLTensor* C = args[2]; - NNPackConfig(args[3]); - - ICHECK_EQ(A->ndim, 1); - ICHECK_EQ(B->ndim, 2); - ICHECK_EQ(C->ndim, 1); - ICHECK_EQ(B->shape[0], C->shape[0]); - ICHECK_EQ(B->shape[1], A->shape[0]); - ICHECK(C->strides == nullptr); - ICHECK(B->strides == nullptr); - ICHECK(A->strides == nullptr); - ICHECK(TypeMatch(A->dtype, kDLFloat, 32)); - ICHECK(TypeMatch(B->dtype, kDLFloat, 32)); - ICHECK(TypeMatch(C->dtype, kDLFloat, 32)); - - nnp_fully_connected_inference(B->shape[1], B->shape[0], static_cast(A->data), - static_cast(B->data), static_cast(C->data), - entry->threadpool); - }); - -} // namespace contrib -} // namespace tvm diff --git a/src/runtime/contrib/nnpack/nnpack_utils.cc b/src/runtime/contrib/nnpack/nnpack_utils.cc deleted file mode 100644 index 2fd6f69bf20c4..0000000000000 --- a/src/runtime/contrib/nnpack/nnpack_utils.cc +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -/*! - * \file Use external nnpack library call. - */ -#include "nnpack_utils.h" - -namespace tvm { -namespace contrib { -using namespace runtime; - -typedef dmlc::ThreadLocalStore NNPackThreadLocalStore; - -NNPackThreadLocalEntry* NNPackThreadLocalEntry::ThreadLocal() { - return NNPackThreadLocalStore::Get(); -} - -bool NNPackConfig(uint64_t nthreads) { - NNPackThreadLocalEntry* entry = NNPackThreadLocalEntry::ThreadLocal(); - if (entry->threadpool && pthreadpool_get_threads_count(entry->threadpool) == nthreads) { - ICHECK_NE(nthreads, 1); - return true; - } - if (entry->threadpool) { - pthreadpool_destroy(entry->threadpool); - entry->threadpool = nullptr; - } - - if (nthreads == 1) { - // a null threadpool means the function is invoked on the calling thread, - // which is the desired logic for nthreads == 1 - ICHECK(!entry->threadpool); - return true; - } - - entry->threadpool = pthreadpool_create(nthreads); - return true; -} - -TVM_REGISTER_GLOBAL("contrib.nnpack._initialize").set_body([](TVMArgs args, TVMRetValue* ret) { - *ret = nnp_initialize(); -}); - -} // namespace contrib -} // namespace tvm diff --git a/src/runtime/contrib/nnpack/nnpack_utils.h b/src/runtime/contrib/nnpack/nnpack_utils.h deleted file mode 100644 index ed0312dac4769..0000000000000 --- a/src/runtime/contrib/nnpack/nnpack_utils.h +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -/*! - * \file Use external nnpack library call. - */ -#ifndef TVM_RUNTIME_CONTRIB_NNPACK_NNPACK_UTILS_H_ -#define TVM_RUNTIME_CONTRIB_NNPACK_NNPACK_UTILS_H_ -#include -#include -#include -#include -#include - -namespace tvm { -namespace contrib { - -struct NNPackThreadLocalEntry { - pthreadpool_t threadpool{nullptr}; - static NNPackThreadLocalEntry* ThreadLocal(); -}; - -bool NNPackConfig(uint64_t nthreads); -} // namespace contrib -} // namespace tvm -#endif // TVM_RUNTIME_CONTRIB_NNPACK_NNPACK_UTILS_H_ diff --git a/src/support/libinfo.cc b/src/support/libinfo.cc index 73800338b1435..b6af61686a3b8 100644 --- a/src/support/libinfo.cc +++ b/src/support/libinfo.cc @@ -219,10 +219,6 @@ #define TVM_INFO_USE_SORT "NOT-FOUND" #endif -#ifndef TVM_INFO_USE_NNPACK -#define TVM_INFO_USE_NNPACK "NOT-FOUND" -#endif - #ifndef TVM_INFO_USE_RANDOM #define TVM_INFO_USE_RANDOM "NOT-FOUND" #endif @@ -351,7 +347,6 @@ TVM_DLL Map GetLibInfo() { {"USE_MKL", TVM_INFO_USE_MKL}, {"USE_MRVL", TVM_INFO_USE_MRVL}, {"USE_MSVC_MT", TVM_INFO_USE_MSVC_MT}, - {"USE_NNPACK", TVM_INFO_USE_NNPACK}, {"USE_OPENCL", TVM_INFO_USE_OPENCL}, {"USE_OPENCL_ENABLE_HOST_PTR", TVM_INFO_USE_OPENCL_ENABLE_HOST_PTR}, {"USE_OPENCL_GTEST", TVM_INFO_USE_OPENCL_GTEST}, diff --git a/tests/python/contrib/test_nnpack.py b/tests/python/contrib/test_nnpack.py deleted file mode 100644 index 881226725ac30..0000000000000 --- a/tests/python/contrib/test_nnpack.py +++ /dev/null @@ -1,220 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -import tvm -import tvm.testing -from tvm import te -import numpy as np -import scipy.signal -from tvm.topi.nn.utils import get_pad_tuple -from tvm.contrib import nnpack -import pytest - - -@tvm.testing.requires_llvm -def test_fully_connected_inference(): - n = 1024 - l = 128 - m = 235 - bias = te.var("bias", dtype="float32") - A = te.placeholder((l,), name="A") - B = te.placeholder((m, l), name="B") - C = nnpack.fully_connected_inference(A, B) - D = te.compute(C.shape, lambda i: C[i] + bias, name="D") - s = te.create_schedule(D.op) - - def verify(target="llvm"): - if not tvm.get_global_func("tvm.contrib.nnpack.fully_connected_inference", True): - pytest.skip("extern function is not available") - if not nnpack.is_available(): - pytest.skip("nnpack is not available") - - dev = tvm.cpu(0) - f = tvm.build(s, [A, B, D, bias], target) - a = tvm.nd.array(np.random.uniform(size=(l)).astype(A.dtype), dev) - b = tvm.nd.array(np.random.uniform(size=(m, l)).astype(B.dtype), dev) - d = tvm.nd.array(np.zeros((m,), dtype=D.dtype), dev) - bb = 10.0 - f(a, b, d, bb) - tvm.testing.assert_allclose(d.numpy(), np.dot(a.numpy(), b.numpy().T) + bb, rtol=1e-5) - - verify() - - -def np_conv(na, nw, padding, stride=1): - batch, in_channel, in_height, in_width = na.shape - _, num_filter, kernel_h, kernel_w = nw.shape - if isinstance(stride, int): - stride_h = stride_w = stride - else: - stride_h, stride_w = stride - - pad_top, pad_left, pad_bottom, pad_right = get_pad_tuple(padding, (kernel_h, kernel_w)) - pad_h = pad_top + pad_bottom - pad_w = pad_left + pad_right - - out_channel = num_filter - out_height = (in_height - kernel_h + pad_h) // stride_h + 1 - out_width = (in_width - kernel_w + pad_w) // stride_w + 1 - nb = np.zeros((batch, out_channel, out_height, out_width)) - for n in range(batch): - for f in range(out_channel): - for c in range(in_channel): - if pad_h > 0 or pad_w > 0: - apad = np.zeros((in_height + pad_h, in_width + pad_w)) - apad[pad_top : pad_top + in_height, pad_left : pad_left + in_width] = na[n, c] - else: - apad = na[n, c] - out = scipy.signal.convolve2d(apad, np.rot90(np.rot90(nw[f, c])), mode="valid") - nb[n, f] += out[::stride, ::stride] - return nb - - -@tvm.testing.requires_llvm -def test_convolution_inference(): - BATCH = 8 - IH = 48 - IW = 48 - IC = 16 - OC = 16 - K = 3 - PAD = 1 - STRIDE = 1 - - OH = (IH + 2 * PAD - K) + 1 - OW = (IW + 2 * PAD - K) + 1 - dshape = (BATCH, IC, IH, IW) - kshape = (OC, IC, K, K) - bshape = (OC,) - oshape = (BATCH, OC, OH, OW) - - data = te.placeholder(dshape, name="data") - kernel = te.placeholder(kshape, name="kernel") - bias = te.placeholder(bshape, name="bias") - - def verify(target="llvm", algorithm=nnpack.ConvolutionAlgorithm.AUTO, with_bias=True): - if not tvm.get_global_func("tvm.contrib.nnpack.fully_connected_inference", True): - pytest.skip("extern function is not available") - if not nnpack.is_available(): - pytest.skip("nnpack is not available") - - dev = tvm.cpu(0) - output = nnpack.convolution_inference( - data, - kernel, - bias if with_bias else None, - [PAD, PAD, PAD, PAD], - [STRIDE, STRIDE], - algorithm=algorithm, - ) - s = te.create_schedule(output.op) - - f = tvm.build(s, [data, kernel, bias, output], target) - - na = np.random.uniform(size=dshape).astype(data.dtype) - nb = np.random.uniform(size=kshape).astype(kernel.dtype) - nc = np.zeros(bshape, dtype=bias.dtype) - ta = tvm.nd.array(na, dev) - tb = tvm.nd.array(nb, dev) - tc = tvm.nd.array(nc, dev) - td = tvm.nd.array(np.zeros(oshape, dtype=output.dtype), dev) - f(ta, tb, tc, td) - nd = np_conv(np.reshape(na, (BATCH, IC, IH, IW)), nb, PAD, STRIDE) + nc.reshape( - 1, bshape[0], 1, 1 - ) - tvm.testing.assert_allclose(td.numpy(), nd.reshape(BATCH, IC, IH, IW), rtol=1e-5) - - for algorithm in [ - nnpack.ConvolutionAlgorithm.AUTO, - nnpack.ConvolutionAlgorithm.FFT_8x8, - nnpack.ConvolutionAlgorithm.FFT_16x16, - nnpack.ConvolutionAlgorithm.WT_8x8, - nnpack.ConvolutionAlgorithm.IMPLICIT_GEMM, - nnpack.ConvolutionAlgorithm.WT_8x8_FP16, - ]: - for with_bias in [True, False]: - verify(algorithm=algorithm, with_bias=with_bias) - - -@tvm.testing.requires_llvm -def test_convolution_inference_without_weight_transform(): - BATCH = 6 - IH = 48 - IW = 48 - IC = 16 - OC = 16 - K = 3 - PAD = 1 - STRIDE = 1 - - OH = (IH + 2 * PAD - K) + 1 - OW = (IW + 2 * PAD - K) + 1 - dshape = (BATCH, IC, IH, IW) - kshape = (OC, IC, K, K) - bshape = (OC,) - oshape = (BATCH, OC, OH, OW) - - data = te.placeholder(dshape, name="data") - kernel = te.placeholder(kshape, name="kernel") - bias = te.placeholder(bshape, name="bias") - - def verify(target="llvm", algorithm=nnpack.ConvolutionAlgorithm.AUTO, with_bias=True): - if not tvm.get_global_func("tvm.contrib.nnpack.fully_connected_inference", True): - pytest.skip("extern function is not available") - if not nnpack.is_available(): - pytest.skip("nnpack is not available") - - dev = tvm.cpu(0) - transformed_kernel = nnpack.convolution_inference_weight_transform( - kernel, algorithm=algorithm - ) - output = nnpack.convolution_inference_without_weight_transform( - data, - transformed_kernel, - bias if with_bias else None, - [PAD, PAD, PAD, PAD], - [STRIDE, STRIDE], - algorithm=algorithm, - ) - - s = te.create_schedule(output.op) - - f = tvm.build(s, [data, kernel, bias, output], target) - - na = np.random.uniform(size=dshape).astype(data.dtype) - nb = np.random.uniform(size=kshape).astype(kernel.dtype) - nc = ( - np.random.uniform(size=bshape).astype(bias.dtype) - if with_bias - else np.zeros(bshape, dtype=bias.dtype) - ) - ta = tvm.nd.array(na, dev) - tb = tvm.nd.array(nb, dev) - tc = tvm.nd.array(nc, dev) - td = tvm.nd.array(np.zeros(oshape, dtype=output.dtype), dev) - f(ta, tb, tc, td) - nd = np_conv(np.reshape(na, (BATCH, IC, IH, IW)), nb, PAD, STRIDE) + nc.reshape( - 1, bshape[0], 1, 1 - ) - tvm.testing.assert_allclose(td.numpy(), nd.reshape(BATCH, IC, IH, IW), rtol=1e-5) - - for algorithm in [nnpack.ConvolutionAlgorithm.WT_8x8]: - for with_bias in [True, False]: - verify(algorithm=algorithm, with_bias=with_bias) - - -if __name__ == "__main__": - tvm.testing.main() diff --git a/tests/python/integration/test_winograd_nnpack.py b/tests/python/integration/test_winograd_nnpack.py deleted file mode 100644 index d53dc21d63288..0000000000000 --- a/tests/python/integration/test_winograd_nnpack.py +++ /dev/null @@ -1,186 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -"""Test winograd convolution using nnpack impl.""" -import numpy as np -from pytest import skip - -import tvm -import tvm.testing -import tvm.topi.testing -from tvm import autotvm, te, topi -from tvm.autotvm.task.space import FallbackConfigEntity -from tvm.contrib import nnpack -from tvm.contrib.pickle_memoize import memoize -from tvm.topi.utils import get_const_tuple - - -def verify_conv2d_nchw( - batch, - in_channel, - in_size, - num_filter, - kernel, - stride, - padding, - devices, - dilation=1, - add_bias=False, - add_relu=False, -): - """Verify conv2d nchw workload.""" - print( - "Workload: (%d, %d, %d, %d, %d, %d, %d, %d)" - % (batch, in_channel, in_size, num_filter, kernel, stride, padding, dilation) - ) - - in_height = in_width = in_size - - placholder_a = te.placeholder((batch, in_channel, in_height, in_width), name="A") - placeholder_w = te.placeholder((num_filter, in_channel, kernel, kernel), name="W") - bias = te.placeholder((num_filter, 1, 1), name="bias") - - a_shape = get_const_tuple(placholder_a.shape) - w_shape = get_const_tuple(placeholder_w.shape) - bias_shape = get_const_tuple(bias.shape) - dtype = placholder_a.dtype - - @memoize("topi.tests.test_topi_conv2d_nchw.verify_conv2d_nchw") - def get_ref_data(): - a_np = np.random.uniform(size=a_shape).astype(dtype) - w_np = np.random.uniform(size=w_shape).astype(dtype) - b_np = np.random.uniform(size=bias_shape).astype(dtype) - dw_np = tvm.topi.testing.dilate_python(w_np, (1, 1, dilation, dilation)) - c_np = tvm.topi.testing.conv2d_nchw_python(a_np, dw_np, stride, padding) - if add_bias: - b_np = np.random.uniform(size=bias_shape).astype(dtype) - c_np += b_np - if add_relu: - c_np = np.maximum(c_np, 0) - return a_np, w_np, b_np, c_np - - a_np, w_np, b_np, c_np = get_ref_data() - - def check_device(device): - dev = tvm.device(device, 0) - if not tvm.testing.device_enabled(device): - print("Skipping %s becuase it is not enabled" % device) - print("Running on target: %s" % device) - with tvm.target.Target(device): - result_c = topi.nn.conv2d( - placholder_a, - placeholder_w, - stride, - padding, - dilation, - data_layout="NCHW", - out_dtype=dtype, - ) - if add_bias: - result_c = topi.add(result_c, bias) - if add_relu: - result_c = topi.nn.relu(result_c) - schedule = topi.generic.schedule_conv2d_nchw([result_c]) - - buff_a = tvm.nd.array(a_np, dev) - buff_w = tvm.nd.array(w_np, dev) - buff_b = tvm.nd.array(b_np, dev) - buff_c = tvm.nd.array(np.zeros(get_const_tuple(result_c.shape), dtype=result_c.dtype), dev) - if add_bias: - func = tvm.build( - schedule, - [placholder_a, placeholder_w, bias, result_c], - device, - name="relu_%d_%d_%d_%d_%d_%d_%d_%d" - % (batch, in_channel, in_size, num_filter, kernel, stride, padding, dilation), - ) - func(buff_a, buff_w, buff_b, buff_c) - else: - func = tvm.build( - schedule, - [placholder_a, placeholder_w, result_c], - device, - name="relu_%d_%d_%d_%d_%d_%d_%d_%d" - % (batch, in_channel, in_size, num_filter, kernel, stride, padding, dilation), - ) - func(buff_a, buff_w, buff_c) - tvm.testing.assert_allclose(buff_c.numpy(), c_np, rtol=1e-4) - - for device in devices: - check_device(device) - - -class WinogradFallback(autotvm.FallbackContext): - """Winograd fallbacks.""" - - def _query_inside(self, target, workload): - key = (target, workload) - if key in self.memory: - return self.memory[key] - cfg = FallbackConfigEntity() - cfg.template_key = "winograd_nnpack_fp32" - self.memory[key] = cfg - return cfg - - -def test_conv2d_nchw(): - """Verify conv2d nchw winograd works.""" - - if not tvm.get_global_func( - "tvm.contrib.nnpack.convolution_inference_without_weight_transform", True - ): - skip("extern function is not available") - - if not nnpack.is_available(): - skip("nnpack is not available") - - devices = ["llvm -device=arm_cpu"] - autotvm.GLOBAL_SCOPE.silent = True - with WinogradFallback(): - # resnet 18 workloads - verify_conv2d_nchw(1, 64, 56, 64, 3, 1, 1, devices=devices) - verify_conv2d_nchw(1, 128, 28, 128, 3, 1, 1, devices=devices) - verify_conv2d_nchw(1, 256, 14, 256, 3, 1, 1, devices=devices) - verify_conv2d_nchw(1, 512, 7, 512, 3, 1, 1, devices=devices) - - # unet workloads - verify_conv2d_nchw(1, 3, 192, 12, 3, 1, 1, add_bias=True, devices=devices) - verify_conv2d_nchw(1, 4, 192, 12, 3, 1, 1, add_bias=True, devices=devices) - verify_conv2d_nchw(1, 12, 96, 24, 3, 1, 1, add_bias=True, devices=devices) - verify_conv2d_nchw(1, 24, 48, 48, 3, 1, 1, add_bias=True, devices=devices) - verify_conv2d_nchw(1, 48, 24, 96, 3, 1, 1, add_bias=True, devices=devices) - verify_conv2d_nchw(1, 96, 12, 180, 3, 1, 1, add_bias=True, devices=devices) - verify_conv2d_nchw(1, 180, 6, 220, 3, 1, 1, add_bias=True, devices=devices) - verify_conv2d_nchw(1, 220, 6, 180, 3, 1, 1, add_bias=True, devices=devices) - verify_conv2d_nchw(1, 180, 12, 96, 3, 1, 1, add_bias=True, devices=devices) - verify_conv2d_nchw(1, 96, 24, 48, 3, 1, 1, add_bias=True, devices=devices) - verify_conv2d_nchw(1, 48, 48, 24, 3, 1, 1, add_bias=True, devices=devices) - verify_conv2d_nchw(1, 24, 96, 12, 3, 1, 1, add_bias=True, devices=devices) - verify_conv2d_nchw(1, 12, 192, 1, 3, 1, 1, add_bias=True, devices=devices) - - # relu, bias - verify_conv2d_nchw(1, 64, 56, 64, 3, 1, 1, add_bias=True, devices=devices) - verify_conv2d_nchw(1, 64, 56, 64, 3, 1, 1, add_relu=True, devices=devices) - verify_conv2d_nchw(1, 64, 56, 64, 3, 1, 1, add_relu=True, add_bias=True, devices=devices) - - # werid workloads - verify_conv2d_nchw(1, 3, 3, 3, 3, 1, 1, devices=devices) - verify_conv2d_nchw(1, 13, 71, 59, 3, 1, 1, devices=devices) - autotvm.GLOBAL_SCOPE.silent = False - - -if __name__ == "__main__": - tvm.testing.main()