Skip to content

Commit

Permalink
new fix of nhwc layout
Browse files Browse the repository at this point in the history
  • Loading branch information
allnes committed Jan 15, 2024
1 parent e27e23a commit 878d0df
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 41 deletions.
66 changes: 30 additions & 36 deletions src/plugins/intel_cpu/src/nodes/executors/acl/acl_deconv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,7 @@ ACLDeconvTensorInfo getACLDeconvTensorInfo(const DeconvAttrs& deconvAttrs,
const std::vector<MemoryDescPtr>& dstDescs) {
auto srcDims = srcDescs[0]->getShape().getDims();
auto weiDims = srcDescs[1]->getShape().getDims();
// std::cout << (srcDescs[0]->hasLayoutType(LayoutType::nspc) ? "nhwc" : "nchw") << std::endl;
// std::cout << weiDims[0] << " | " << weiDims[1] << " | " << weiDims[2] << " | " << weiDims[3] << " | " << std::endl;
// swap input and output channels dimensions to be align with ACL
// weights tensor shape is changed because ACL expects [O, I, H, W] tensor while OV uses [I, O, H, W] tensor
std::swap(weiDims[0], weiDims[1]);
// std::cout << weiDims[0] << " | " << weiDims[1] << " | " << weiDims[2] << " | " << weiDims[3] << " | " << std::endl;
auto dstDims = dstDescs[0]->getShape().getDims();

arm_compute::TensorShape srcVecDims = shapeCast(srcDims);
Expand All @@ -45,8 +40,6 @@ ACLDeconvTensorInfo getACLDeconvTensorInfo(const DeconvAttrs& deconvAttrs,
}
}

std::cout << weiVecDims[0] << " | " << weiVecDims[1] << " | " << weiVecDims[2] << " | " << weiVecDims[3] << " | " << std::endl;
std::cout << weiVecDims[0] << " ======================== " << std::endl;
TensorInfo srcTensorInfo = TensorInfo(srcVecDims, 1,
precisionToAclDataType(srcDescs[0]->getPrecision()), getAclDataLayoutByMemoryDesc(srcDescs[0]));
TensorInfo weiTensorInfo = TensorInfo(weiVecDims, 1,
Expand Down Expand Up @@ -78,6 +71,7 @@ bool AclDeconvExecutor::init(const DeconvAttrs& deconvAttrs,
const std::vector<MemoryDescPtr>& srcDescs,
const std::vector<MemoryDescPtr>& dstDescs,
const dnnl::primitive_attr &attr) {
this->weiLayoutType = srcDescs[1]->hasLayoutType(LayoutType::nspc) ? LayoutType::nspc : LayoutType::ncsp;
this->deconvAttrs = deconvAttrs;
ACLDeconvTensorInfo aclDeconvTensorInfo = getACLDeconvTensorInfo(deconvAttrs, srcDescs, dstDescs);
TensorInfo srcTensorInfo = aclDeconvTensorInfo.srcTensorInfo;
Expand Down Expand Up @@ -113,7 +107,7 @@ bool AclDeconvExecutor::init(const DeconvAttrs& deconvAttrs,
return true;
}

static void transpose_to_1023(const MemoryCPtr& srcMemPtr, std::vector<float>& dst_data) {
static void transpose_ncsp(const MemoryCPtr& srcMemPtr, std::vector<float>& dst_data) {
const auto src_data = reinterpret_cast<float*>(srcMemPtr->getData());

const int DIM0 = srcMemPtr->getStaticDims()[0];
Expand All @@ -137,37 +131,37 @@ static void transpose_to_1023(const MemoryCPtr& srcMemPtr, std::vector<float>& d
});
}

//static void transpose_to_0231(VectorDims new_dims, std::vector<float>& dst_data) {
// const unsigned long DIM0 = new_dims[0];
// const unsigned long DIM1 = new_dims[1];
// const unsigned long DIM2 = new_dims[2];
// const unsigned long DIM3 = new_dims[3];
//
// parallel_for3d(DIM0, DIM1, DIM2, [&](unsigned long dim0, unsigned long dim1, unsigned long dim2) {
// for (int dim3 = 0; dim3 < DIM3; ++dim3) {
// unsigned long src_off = dim0 * DIM1 * DIM2 * DIM3 +
// dim1 * DIM2 * DIM3 +
// dim2 * DIM3 +
// dim3;
// unsigned long dst_off = dim0 * DIM2 * DIM3 * DIM1 +
// dim2 * DIM3 * DIM1 +
// dim3 * DIM1 +
// dim1;
//
// std::swap(dst_data[dst_off], dst_data[src_off]);
// }
// });
//}
static void transpose_nspc(const MemoryCPtr& srcMemPtr, std::vector<float>& dst_data) {
const auto src_data = reinterpret_cast<float*>(srcMemPtr->getData());

const int DIM0 = srcMemPtr->getStaticDims()[0];
const int DIM1 = srcMemPtr->getStaticDims()[1];
const int DIM2 = srcMemPtr->getStaticDims()[2];
const int DIM3 = srcMemPtr->getStaticDims()[3];

parallel_for3d(DIM0, DIM1, DIM2, [&](const int dim0, const int dim1, const int dim2) {
for (int dim3 = 0; dim3 < DIM3; ++dim3) {
const int src_off = dim0 * DIM2 * DIM3 * DIM1 +
dim2 * DIM3 * DIM1 +
dim3 * DIM1 +
dim1;
const int dst_off = dim1 * DIM2 * DIM3 * DIM0 +
dim2 * DIM3 * DIM0 +
dim3 * DIM0 +
dim0;

dst_data[dst_off] = src_data[src_off];
}
});
}

void AclDeconvExecutor::exec(const std::vector<MemoryCPtr>& src, const std::vector<MemoryPtr>& dst, const void *post_ops_data_) {
// TODO: Remove transpose from exec
transpose_to_1023(src[1], weiBuffer);
// std::cout << src[1]->getShape().getDims()[0] << " * " <<
// src[1]->getShape().getDims()[1] << " * " <<
// src[1]->getShape().getDims()[2] << " * " <<
// src[1]->getShape().getDims()[3] << " * " << std::endl;
// VectorDims vec_dims = {12, 6, 3, 3};
// transpose_to_0231(vec_dims, weiBuffer);
if (weiLayoutType == LayoutType::ncsp) {
transpose_ncsp(src[1], weiBuffer);
} else if (weiLayoutType == LayoutType::nspc) {
transpose_nspc(src[1], weiBuffer);
}

srcTensor.allocator()->import_memory(src[0]->getData());
dstTensor.allocator()->import_memory(dst[0]->getData());
Expand Down
6 changes: 1 addition & 5 deletions src/plugins/intel_cpu/src/nodes/executors/acl/acl_deconv.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ class AclDeconvExecutor : public DeconvExecutor {
DeconvAttrs deconvAttrs;
impl_desc_type implType = impl_desc_type::gemm_acl;

LayoutType weiLayoutType;
arm_compute::Tensor srcTensor;
arm_compute::Tensor weiTensor;
arm_compute::Tensor biasTensor;
Expand All @@ -67,11 +68,6 @@ class AclDeconvExecutorBuilder : public DeconvExecutorBuilder {
DeconvExecutorPtr makeExecutor(const ExecutorContext::CPtr context) const override {
return std::make_shared<AclDeconvExecutor>(context);
}

private:
static bool validate_deconvolution_output_dimensions(unsigned int in_width, unsigned int in_height,
unsigned int kernel_width, unsigned int kernel_height,
const arm_compute::PadStrideInfo &pad_stride_info);
};

} // namespace intel_cpu
Expand Down

0 comments on commit 878d0df

Please sign in to comment.