Skip to content

Commit

Permalink
new fix of dynamic shapes
Browse files Browse the repository at this point in the history
  • Loading branch information
allnes committed Jan 12, 2024
1 parent 4ca8f15 commit 45facf2
Show file tree
Hide file tree
Showing 3 changed files with 96 additions and 73 deletions.
59 changes: 29 additions & 30 deletions src/plugins/intel_cpu/src/nodes/deconv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -474,37 +474,35 @@ void Deconvolution::getSupportedDescriptors() {
config.outConfs.resize(getOriginalOutputsNumber());

auto& creatorsMap = BlockedDescCreator::getCommonCreators();
for (size_t i = 0; i < getParentEdges().size(); ++i) {
auto checkDesc = [&](LayoutType format) -> bool {
NodeConfig config;
config.inConfs.resize(getParentEdges().size());
config.outConfs.resize(getOriginalOutputsNumber());

for (size_t i = 0; i < getParentEdges().size(); ++i) {
config.inConfs[i].setMemDesc(
creatorsMap.at(format)->createSharedDesc(getOriginalInputPrecisionAtPort(0), getInputShapeAtPort(i)));
}
auto checkDesc = [&](LayoutType format) -> bool {
NodeConfig config;
config.inConfs.resize(getParentEdges().size());
config.outConfs.resize(getOriginalOutputsNumber());

for (size_t i = 0; i < getChildEdges().size(); ++i) {
config.outConfs[i].setMemDesc(
creatorsMap.at(format)->createSharedDesc(getOriginalOutputPrecisionAtPort(0), getOutputShapeAtPort(i)));
}
for (size_t i = 0; i < getParentEdges().size(); ++i) {
config.inConfs[i].setMemDesc(
creatorsMap.at(format)->createSharedDesc(getOriginalInputPrecisionAtPort(0), getInputShapeAtPort(i)));
}

std::vector<MemoryDescPtr> srcMemoryDescs;
srcMemoryDescs.push_back(config.inConfs[0].getMemDesc()->cloneWithNewDims(inDims));
for (size_t i = 1; i < config.inConfs.size(); i++) {
srcMemoryDescs.push_back(config.inConfs[i].getMemDesc()->clone());
}
std::vector<MemoryDescPtr> dstMemoryDescs;
dstMemoryDescs.push_back(config.outConfs[0].getMemDesc()->cloneWithNewDims(outDims));
for (size_t i = 1; i < config.outConfs.size(); i++) {
dstMemoryDescs.push_back(config.outConfs[i].getMemDesc()->clone());
}
for (size_t i = 0; i < getChildEdges().size(); ++i) {
config.outConfs[i].setMemDesc(
creatorsMap.at(format)->createSharedDesc(getOriginalOutputPrecisionAtPort(0), getOutputShapeAtPort(i)));
}

return AclDeconvExecutorBuilder::customIsSupported(deconvAttrs, srcMemoryDescs, dstMemoryDescs);
};
useACL = checkDesc(LayoutType::ncsp);
}
std::vector<MemoryDescPtr> srcMemoryDescs;
srcMemoryDescs.push_back(config.inConfs[0].getMemDesc()->cloneWithNewDims(inDims));
for (size_t i = 1; i < config.inConfs.size(); i++) {
srcMemoryDescs.push_back(config.inConfs[i].getMemDesc()->clone());
}
std::vector<MemoryDescPtr> dstMemoryDescs;
dstMemoryDescs.push_back(config.outConfs[0].getMemDesc()->cloneWithNewDims(outDims));
for (size_t i = 1; i < config.outConfs.size(); i++) {
dstMemoryDescs.push_back(config.outConfs[i].getMemDesc()->clone());
}

return AclDeconvExecutorBuilder::customIsSupported(deconvAttrs, srcMemoryDescs, dstMemoryDescs);
};
useACL = checkDesc(LayoutType::nspc) || checkDesc(LayoutType::ncsp);
if (useACL) return;
#endif

Expand Down Expand Up @@ -875,11 +873,11 @@ void Deconvolution::prepareParams() {
if (useACL) {
std::vector<MemoryDescPtr> srcMemoryDescs;
for (size_t i = 0; i < getOriginalInputsNumber(); i++) {
srcMemoryDescs.push_back(getParentEdgesAtPort(i).front()->getMemory().getDescWithType<DnnlMemoryDesc>());
srcMemoryDescs.push_back(getParentEdgesAtPort(i).front()->getMemory().getDescPtr());
}
std::vector<MemoryDescPtr> dstMemoryDescs;
for (size_t i = 0; i < getOriginalOutputsNumber(); i++) {
dstMemoryDescs.push_back(getChildEdgesAtPort(i).front()->getMemory().getDescWithType<DnnlMemoryDesc>());
dstMemoryDescs.push_back(getChildEdgesAtPort(i).front()->getMemory().getDescPtr());
}

execPtrDeconv = selected_pd->getExecutorFactoryAs<DeconvExecutorFactory>()->makeExecutor(deconvAttrs, srcMemoryDescs,
Expand Down Expand Up @@ -1253,6 +1251,7 @@ void Deconvolution::initSupportedPrimitiveDescriptors() {

supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::gemm_acl, factory);
};
pushDesc(LayoutType::nspc);
pushDesc(LayoutType::ncsp);
}

Expand Down
104 changes: 64 additions & 40 deletions src/plugins/intel_cpu/src/nodes/executors/acl/acl_deconv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,26 +15,49 @@ ACLDeconvTensorInfo getACLDeconvTensorInfo(const DeconvAttrs& deconvAttrs,
const std::vector<MemoryDescPtr>& dstDescs) {
auto srcDims = srcDescs[0]->getShape().getDims();
auto weiDims = srcDescs[1]->getShape().getDims();
// std::cout << (srcDescs[0]->hasLayoutType(LayoutType::nspc) ? "nhwc" : "nchw") << std::endl;
// std::cout << weiDims[0] << " | " << weiDims[1] << " | " << weiDims[2] << " | " << weiDims[3] << " | " << std::endl;
// swap input and output channels dimensions to be align with ACL
// weights tensor shape is changed because ACL expects [O, I, H, W] tensor while OV uses [I, O, H, W] tensor
std::swap(weiDims[0], weiDims[1]);
// std::cout << weiDims[0] << " | " << weiDims[1] << " | " << weiDims[2] << " | " << weiDims[3] << " | " << std::endl;
auto dstDims = dstDescs[0]->getShape().getDims();

VectorDims biasDims;
TensorInfo biasTensorInfo;

arm_compute::TensorShape srcVecDims = shapeCast(srcDims);
arm_compute::TensorShape weiVecDims = shapeCast(weiDims);
arm_compute::TensorShape dstVecDims = shapeCast(dstDims);
arm_compute::TensorShape biasVecDims;
if (deconvAttrs.withBiasesParam) {
biasDims = srcDescs[2]->getShape().getStaticDims();
biasTensorInfo = TensorInfo(shapeCast(biasDims), 1,
precisionToAclDataType(srcDescs[2]->getPrecision()), getAclDataLayoutByMemoryDesc(srcDescs[2]));
biasVecDims = shapeCast(srcDescs[2]->getShape().getDims());
}
if (srcDescs[0]->hasLayoutType(LayoutType::nspc)) {
auto dim_size = srcDescs[0]->getShape().getDims().size();
auto mover = [&dim_size](TensorShape &_shape) {
if (dim_size > 4) { std::swap(_shape[2], _shape[3]); }
if (dim_size > 3) { std::swap(_shape[1], _shape[2]); }
if (dim_size > 2) { std::swap(_shape[0], _shape[1]); }
};
mover(srcVecDims);
mover(weiVecDims);
mover(dstVecDims);
if (deconvAttrs.withBiasesParam) {
mover(biasVecDims);
}
}

TensorInfo srcTensorInfo = TensorInfo(shapeCast(srcDims), 1,
std::cout << weiVecDims[0] << " | " << weiVecDims[1] << " | " << weiVecDims[2] << " | " << weiVecDims[3] << " | " << std::endl;
std::cout << weiVecDims[0] << " ======================== " << std::endl;
TensorInfo srcTensorInfo = TensorInfo(srcVecDims, 1,
precisionToAclDataType(srcDescs[0]->getPrecision()), getAclDataLayoutByMemoryDesc(srcDescs[0]));
TensorInfo weiTensorInfo = TensorInfo(shapeCast(weiDims), 1,
TensorInfo weiTensorInfo = TensorInfo(weiVecDims, 1,
precisionToAclDataType(srcDescs[1]->getPrecision()), getAclDataLayoutByMemoryDesc(srcDescs[1]));
TensorInfo dstTensorInfo = TensorInfo(shapeCast(dstDims), 1,
TensorInfo dstTensorInfo = TensorInfo(dstVecDims, 1,
precisionToAclDataType(dstDescs[0]->getPrecision()), getAclDataLayoutByMemoryDesc(dstDescs[0]));
TensorInfo biasTensorInfo;
if (deconvAttrs.withBiasesParam) {
biasTensorInfo = TensorInfo(biasVecDims, 1,
precisionToAclDataType(srcDescs[2]->getPrecision()), getAclDataLayoutByMemoryDesc(srcDescs[2]));
}

unsigned int pad_l =
(deconvAttrs.paddingL.size() > 1) ? static_cast<unsigned int>(deconvAttrs.paddingL.at(1)) : static_cast<unsigned int>(deconvAttrs.paddingL.at(0));
Expand Down Expand Up @@ -114,9 +137,37 @@ static void transpose_to_1023(const MemoryCPtr& srcMemPtr, std::vector<float>& d
});
}

//static void transpose_to_0231(VectorDims new_dims, std::vector<float>& dst_data) {
// const unsigned long DIM0 = new_dims[0];
// const unsigned long DIM1 = new_dims[1];
// const unsigned long DIM2 = new_dims[2];
// const unsigned long DIM3 = new_dims[3];
//
// parallel_for3d(DIM0, DIM1, DIM2, [&](unsigned long dim0, unsigned long dim1, unsigned long dim2) {
// for (int dim3 = 0; dim3 < DIM3; ++dim3) {
// unsigned long src_off = dim0 * DIM1 * DIM2 * DIM3 +
// dim1 * DIM2 * DIM3 +
// dim2 * DIM3 +
// dim3;
// unsigned long dst_off = dim0 * DIM2 * DIM3 * DIM1 +
// dim2 * DIM3 * DIM1 +
// dim3 * DIM1 +
// dim1;
//
// std::swap(dst_data[dst_off], dst_data[src_off]);
// }
// });
//}

void AclDeconvExecutor::exec(const std::vector<MemoryCPtr>& src, const std::vector<MemoryPtr>& dst, const void *post_ops_data_) {
// TODO: Remove transpose from exec
transpose_to_1023(src[1], weiBuffer);
// std::cout << src[1]->getShape().getDims()[0] << " * " <<
// src[1]->getShape().getDims()[1] << " * " <<
// src[1]->getShape().getDims()[2] << " * " <<
// src[1]->getShape().getDims()[3] << " * " << std::endl;
// VectorDims vec_dims = {12, 6, 3, 3};
// transpose_to_0231(vec_dims, weiBuffer);

srcTensor.allocator()->import_memory(src[0]->getData());
dstTensor.allocator()->import_memory(dst[0]->getData());
Expand Down Expand Up @@ -193,28 +244,17 @@ bool AclDeconvExecutorBuilder::customIsSupported(const DeconvAttrs &deconvAttrs,
TensorInfo dstTensorInfo = aclDeconvTensorInfo.dstTensorInfo;
PadStrideInfo deconv_info = aclDeconvTensorInfo.deconv_info;

unsigned int kernel_x = (deconvAttrs.kernel.size() > 1) ? deconvAttrs.kernel.at(1) : deconvAttrs.kernel.at(0);
unsigned int kernel_y = deconvAttrs.kernel.at(0);

// After stride=8 up-sampling in ACL Deconvolution layer slower than reference
if (deconv_info.stride().first >= 8 || deconv_info.stride().second >= 8) return false;
if (deconv_info.stride().first >= 8 || deconv_info.stride().second >= 8) {
DEBUG_LOG("AclDeconvExecutor does not support strides > 8:");
return false;
}

unsigned int dilation_x = (deconvAttrs.dilation.size() > 1) ? deconvAttrs.dilation.at(1) : deconvAttrs.dilation.at(0);
unsigned int dilation_y = deconvAttrs.dilation.at(0);
if (!one_of(dilation_x, static_cast<unsigned int >(0), static_cast<unsigned int >(1)) ||
!one_of(dilation_y, static_cast<unsigned int >(0), static_cast<unsigned int >(1))) return false;

size_t in_h = srcDescs[0]->hasLayoutType(LayoutType::ncsp) ? srcDescs[0]->getShape().getDims()[2] : srcDescs[0]->getShape().getDims()[1];
size_t in_w = srcDescs[0]->hasLayoutType(LayoutType::ncsp) ? srcDescs[0]->getShape().getDims()[3] : srcDescs[0]->getShape().getDims()[2];

// Validate function has bug (https://github.com/ARM-software/ComputeLibrary/issues/1061) with error exception.
// We copy deconvolution_output_dimensions function for get correct validation
// TODO: remove after fix
if (validate_deconvolution_output_dimensions(in_w, in_h, kernel_x, kernel_y, deconv_info)) {
DEBUG_LOG("NEDeconvolutionLayer arm_compute::deconvolution_output_dimensions failed");
return false;
}

arm_compute::Status status = arm_compute::NEDeconvolutionLayer::validate(&srcTensorInfo,
&weiTensorInfo,
deconvAttrs.withBiasesParam ? &biasTensorInfo : nullptr,
Expand All @@ -228,21 +268,5 @@ bool AclDeconvExecutorBuilder::customIsSupported(const DeconvAttrs &deconvAttrs,
return true;
}

bool AclDeconvExecutorBuilder::validate_deconvolution_output_dimensions(unsigned int in_width, unsigned int in_height,
unsigned int kernel_width,
unsigned int kernel_height,
const PadStrideInfo &pad_stride_info) {
const unsigned int pad_left = pad_stride_info.pad_left();
const unsigned int pad_top = pad_stride_info.pad_top();
const unsigned int pad_right = pad_stride_info.pad_right();
const unsigned int pad_bottom = pad_stride_info.pad_bottom();
const unsigned int stride_x = pad_stride_info.stride().first;
const unsigned int stride_y = pad_stride_info.stride().second;

if (!((in_width < 1 || in_height < 1) ||
(((in_width - 1) * stride_x + kernel_width) < (pad_left + pad_right)) ||
(((in_height - 1) * stride_y + kernel_height) < (pad_top + pad_bottom)))) { return false; }
return true;
}
} // namespace intel_cpu
} // namespace ov
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Deconv_2D_Planar_FP32,
::testing::ValuesIn(Planar_2D_inputs_smoke),
::testing::Values(ElementType::f32),
::testing::ValuesIn(fusingParamsSet),
::testing::ValuesIn(filterCPUInfo({conv_gemm_2D, conv_gemm_2D_acl})),
::testing::ValuesIn(filterCPUInfo({conv_gemm_2D, conv_gemm_2D_acl, conv_gemm_acl_2D_nspc})),
::testing::Values(CPUTestUtils::empty_plugin_config)),
DeconvolutionLayerCPUTest::getTestCaseName);

Expand All @@ -76,7 +76,7 @@ INSTANTIATE_TEST_SUITE_P(nightly_Deconv_2D_Planar_FP32,
::testing::ValuesIn(Planar_2D_inputs_nightly),
::testing::Values(ElementType::f32),
::testing::ValuesIn(fusingParamsSet),
::testing::ValuesIn(filterCPUInfo({conv_gemm_2D, conv_gemm_2D_acl})),
::testing::ValuesIn(filterCPUInfo({conv_gemm_2D, conv_gemm_2D_acl, conv_gemm_acl_2D_nspc})),
::testing::Values(CPUTestUtils::empty_plugin_config)),
DeconvolutionLayerCPUTest::getTestCaseName);

Expand Down Expand Up @@ -109,7 +109,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Deconv_2D_AutoPadding_FP32,
::testing::ValuesIn(inputs_2D_AutoPadding),
::testing::Values(ElementType::f32),
::testing::Values(emptyFusingSpec),
::testing::ValuesIn(filterCPUInfo({conv_gemm_2D, conv_gemm_2D_acl, conv_avx512_2D})),
::testing::ValuesIn(filterCPUInfo({conv_gemm_2D, conv_gemm_2D_acl, conv_gemm_acl_2D_nspc, conv_avx512_2D})),
::testing::Values(CPUTestUtils::empty_plugin_config)),
DeconvolutionLayerCPUTest::getTestCaseName);

Expand Down

0 comments on commit 45facf2

Please sign in to comment.