From 878d0df8952456fb8e771dd40cc7587069a82fda Mon Sep 17 00:00:00 2001 From: Alexander Nesterov Date: Mon, 15 Jan 2024 12:24:46 +0100 Subject: [PATCH] new fix of nhwc layout --- .../src/nodes/executors/acl/acl_deconv.cpp | 66 +++++++++---------- .../src/nodes/executors/acl/acl_deconv.hpp | 6 +- 2 files changed, 31 insertions(+), 41 deletions(-) diff --git a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_deconv.cpp b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_deconv.cpp index 69ce589f9667fd..cee4137b6727fa 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_deconv.cpp +++ b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_deconv.cpp @@ -15,12 +15,7 @@ ACLDeconvTensorInfo getACLDeconvTensorInfo(const DeconvAttrs& deconvAttrs, const std::vector& dstDescs) { auto srcDims = srcDescs[0]->getShape().getDims(); auto weiDims = srcDescs[1]->getShape().getDims(); -// std::cout << (srcDescs[0]->hasLayoutType(LayoutType::nspc) ? "nhwc" : "nchw") << std::endl; -// std::cout << weiDims[0] << " | " << weiDims[1] << " | " << weiDims[2] << " | " << weiDims[3] << " | " << std::endl; - // swap input and output channels dimensions to be align with ACL - // weights tensor shape is changed because ACL expects [O, I, H, W] tensor while OV uses [I, O, H, W] tensor std::swap(weiDims[0], weiDims[1]); -// std::cout << weiDims[0] << " | " << weiDims[1] << " | " << weiDims[2] << " | " << weiDims[3] << " | " << std::endl; auto dstDims = dstDescs[0]->getShape().getDims(); arm_compute::TensorShape srcVecDims = shapeCast(srcDims); @@ -45,8 +40,6 @@ ACLDeconvTensorInfo getACLDeconvTensorInfo(const DeconvAttrs& deconvAttrs, } } - std::cout << weiVecDims[0] << " | " << weiVecDims[1] << " | " << weiVecDims[2] << " | " << weiVecDims[3] << " | " << std::endl; - std::cout << weiVecDims[0] << " ======================== " << std::endl; TensorInfo srcTensorInfo = TensorInfo(srcVecDims, 1, precisionToAclDataType(srcDescs[0]->getPrecision()), getAclDataLayoutByMemoryDesc(srcDescs[0])); TensorInfo weiTensorInfo = TensorInfo(weiVecDims, 1, @@ -78,6 +71,7 @@ bool AclDeconvExecutor::init(const DeconvAttrs& deconvAttrs, const std::vector& srcDescs, const std::vector& dstDescs, const dnnl::primitive_attr &attr) { + this->weiLayoutType = srcDescs[1]->hasLayoutType(LayoutType::nspc) ? LayoutType::nspc : LayoutType::ncsp; this->deconvAttrs = deconvAttrs; ACLDeconvTensorInfo aclDeconvTensorInfo = getACLDeconvTensorInfo(deconvAttrs, srcDescs, dstDescs); TensorInfo srcTensorInfo = aclDeconvTensorInfo.srcTensorInfo; @@ -113,7 +107,7 @@ bool AclDeconvExecutor::init(const DeconvAttrs& deconvAttrs, return true; } -static void transpose_to_1023(const MemoryCPtr& srcMemPtr, std::vector& dst_data) { +static void transpose_ncsp(const MemoryCPtr& srcMemPtr, std::vector& dst_data) { const auto src_data = reinterpret_cast(srcMemPtr->getData()); const int DIM0 = srcMemPtr->getStaticDims()[0]; @@ -137,37 +131,37 @@ static void transpose_to_1023(const MemoryCPtr& srcMemPtr, std::vector& d }); } -//static void transpose_to_0231(VectorDims new_dims, std::vector& dst_data) { -// const unsigned long DIM0 = new_dims[0]; -// const unsigned long DIM1 = new_dims[1]; -// const unsigned long DIM2 = new_dims[2]; -// const unsigned long DIM3 = new_dims[3]; -// -// parallel_for3d(DIM0, DIM1, DIM2, [&](unsigned long dim0, unsigned long dim1, unsigned long dim2) { -// for (int dim3 = 0; dim3 < DIM3; ++dim3) { -// unsigned long src_off = dim0 * DIM1 * DIM2 * DIM3 + -// dim1 * DIM2 * DIM3 + -// dim2 * DIM3 + -// dim3; -// unsigned long dst_off = dim0 * DIM2 * DIM3 * DIM1 + -// dim2 * DIM3 * DIM1 + -// dim3 * DIM1 + -// dim1; -// -// std::swap(dst_data[dst_off], dst_data[src_off]); -// } -// }); -//} +static void transpose_nspc(const MemoryCPtr& srcMemPtr, std::vector& dst_data) { + const auto src_data = reinterpret_cast(srcMemPtr->getData()); + + const int DIM0 = srcMemPtr->getStaticDims()[0]; + const int DIM1 = srcMemPtr->getStaticDims()[1]; + const int DIM2 = srcMemPtr->getStaticDims()[2]; + const int DIM3 = srcMemPtr->getStaticDims()[3]; + + parallel_for3d(DIM0, DIM1, DIM2, [&](const int dim0, const int dim1, const int dim2) { + for (int dim3 = 0; dim3 < DIM3; ++dim3) { + const int src_off = dim0 * DIM2 * DIM3 * DIM1 + + dim2 * DIM3 * DIM1 + + dim3 * DIM1 + + dim1; + const int dst_off = dim1 * DIM2 * DIM3 * DIM0 + + dim2 * DIM3 * DIM0 + + dim3 * DIM0 + + dim0; + + dst_data[dst_off] = src_data[src_off]; + } + }); +} void AclDeconvExecutor::exec(const std::vector& src, const std::vector& dst, const void *post_ops_data_) { // TODO: Remove transpose from exec - transpose_to_1023(src[1], weiBuffer); -// std::cout << src[1]->getShape().getDims()[0] << " * " << -// src[1]->getShape().getDims()[1] << " * " << -// src[1]->getShape().getDims()[2] << " * " << -// src[1]->getShape().getDims()[3] << " * " << std::endl; -// VectorDims vec_dims = {12, 6, 3, 3}; -// transpose_to_0231(vec_dims, weiBuffer); + if (weiLayoutType == LayoutType::ncsp) { + transpose_ncsp(src[1], weiBuffer); + } else if (weiLayoutType == LayoutType::nspc) { + transpose_nspc(src[1], weiBuffer); + } srcTensor.allocator()->import_memory(src[0]->getData()); dstTensor.allocator()->import_memory(dst[0]->getData()); diff --git a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_deconv.hpp b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_deconv.hpp index 34ecac01c2a89f..b678fa42785cdc 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_deconv.hpp +++ b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_deconv.hpp @@ -43,6 +43,7 @@ class AclDeconvExecutor : public DeconvExecutor { DeconvAttrs deconvAttrs; impl_desc_type implType = impl_desc_type::gemm_acl; + LayoutType weiLayoutType; arm_compute::Tensor srcTensor; arm_compute::Tensor weiTensor; arm_compute::Tensor biasTensor; @@ -67,11 +68,6 @@ class AclDeconvExecutorBuilder : public DeconvExecutorBuilder { DeconvExecutorPtr makeExecutor(const ExecutorContext::CPtr context) const override { return std::make_shared(context); } - -private: - static bool validate_deconvolution_output_dimensions(unsigned int in_width, unsigned int in_height, - unsigned int kernel_width, unsigned int kernel_height, - const arm_compute::PadStrideInfo &pad_stride_info); }; } // namespace intel_cpu