Skip to content

Commit

Permalink
[CPU]The shape of the memory descriptor is considered in selectPrefer…
Browse files Browse the repository at this point in the history
…PrimitiveDescriptor of Subgraph (#23971)

### Details:
- *selectPreferPrimitiveDescriptor take into account shapes of the
memory descriptors, because scalar shape node's reorder has less
computation*
 - *New logic only work for Subgraph*

### Tickets:
 - *137307*
 - *139904*

---------

Signed-off-by: xipingya <[email protected]>
Signed-off-by: Yan <[email protected]>
  • Loading branch information
xipingyan authored Sep 26, 2024
1 parent aece8c6 commit 507f31d
Show file tree
Hide file tree
Showing 4 changed files with 244 additions and 2 deletions.
121 changes: 120 additions & 1 deletion src/plugins/intel_cpu/src/node.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,6 @@ void Node::selectPreferPrimitiveDescriptor(const std::vector<impl_desc_type>& pr
auto parentDesc = parent_spd->getConfig().outConfs[inNum].getMemDesc();

const bool isCompatible = curDesc->isCompatible(*parentDesc);

if (isCompatible) {
equalsLocalFormatCount++;
}
Expand Down Expand Up @@ -316,6 +315,126 @@ void Node::selectPreferPrimitiveDescriptor(const std::vector<impl_desc_type>& pr
selectPrimitiveDescriptorByIndex(0);
}

bool Node::isOneDimShape(const ov::PartialShape& pshape) {
int value_1_num = 0;
int sz = static_cast<int>(pshape.size());
for (auto s : pshape) {
if (s.is_static() && s.get_length() == 1) {
value_1_num++;
}
}
return value_1_num >= sz - 1;
}

bool Node::isReorderRequired(ov::intel_cpu::MemoryDescPtr desc1, ov::intel_cpu::MemoryDescPtr desc2) {
bool samePrec = desc1->getPrecision() == desc2->getPrecision();
bool isOneDimShape1 = isOneDimShape(desc1->getShape().toPartialShape());
bool isOneDimShape2 = isOneDimShape(desc2->getShape().toPartialShape());
return !(isOneDimShape1 && isOneDimShape2 && samePrec);
}

void Node::selectPreferPrimitiveDescriptorWithShape(const std::vector<impl_desc_type>& priority, bool ignoreConstInputs) {
// Filter out dynamic shape.
if (isDynamic) {
return selectPreferPrimitiveDescriptor(priority, ignoreConstInputs);
}

auto estimateReorderOverhead = [&](const ov::intel_cpu::NodeDesc& supportedPrimitiveDesc, size_t i) {
int estimate = 0;
auto inputNodesNum = supportedPrimitiveDesc.getConfig().inConfs.size();
for (size_t j = 0; j < inputNodesNum; j++) {
auto parentEdge = getParentEdgeAt(j);
auto parentPtr = parentEdge->getParent();

// We don't take into account constant edges since reorders on them will be executed on load network
// stage
if (ignoreConstInputs && j > 0 && parentPtr->isConstant()) {
continue;
}

auto parent_spd = parentPtr->getSelectedPrimitiveDescriptor();
if (parent_spd != nullptr && !parent_spd->getConfig().outConfs.empty()) {
int inNum = parentEdge->getInputNum();
if (inNum < 0 || inNum >= static_cast<int>(parent_spd->getConfig().outConfs.size())) {
inNum = 0;
}
auto curDesc = supportedPrimitiveDesc.getConfig().inConfs[j].getMemDesc();
auto parentDesc = parent_spd->getConfig().outConfs[inNum].getMemDesc();

const bool isCompatible = curDesc->isCompatible(*parentDesc);
if (!isCompatible) {
if (!isReorderRequired(parentDesc, curDesc)) {
estimate += 1;
} else {
estimate += ov::shape_size<ov::intel_cpu::VectorDims>(curDesc->getShape().getMinDims());
}
}

DEBUG_LOG(getName(), " pd[", i, "].inConfs[", j, "]"
" is ", (isCompatible ? "compatible" : "not compatible"),
" shape is ", (isOneDimShape(curDesc->getShape().toPartialShape()) ? "one dim shape" : "not one dim shape"),
" with parent ", parentPtr->getName(),
" outConfs[", inNum, "], estimate add to ", estimate);
}
}
return estimate;
};

auto selectSPDwithType = [&](const impl_desc_type type) {
int selectedPrimitive = -1;
int bestEstimate = std::numeric_limits<int>::max();
for (size_t i = 0; i < getSupportedPrimitiveDescriptors().size(); i++) {
const auto& supportedPrimitiveDesc = getSupportedPrimitiveDescriptors()[i];
const impl_desc_type supportedType = supportedPrimitiveDesc.getImplementationType();
if (supportedType != type) {
continue;
}

const size_t descInConfSize = supportedPrimitiveDesc.getConfig().inConfs.size();

if (descInConfSize > getParentEdges().size()) {
OPENVINO_THROW(getName(),
" Desc ",
i,
" with type: ",
supportedType,
" has more input ports than node: ",
descInConfSize,
" vs ",
getParentEdges().size());
continue;
}

auto estimate = estimateReorderOverhead(supportedPrimitiveDesc, i);

if (estimate < bestEstimate) {
bestEstimate = estimate;
selectedPrimitive = static_cast<int>(i);
DEBUG_LOG(getName(), " Select primitive desc: ", i, " ", supportedPrimitiveDesc);
}
}
return selectedPrimitive;
};

// loop kernel priority
for (auto& type : priority) {
int selectedPrimitive = selectSPDwithType(type);
if (selectedPrimitive >= 0) {
selectPrimitiveDescriptorByIndex(selectedPrimitive);
return;
}
}

OPENVINO_ASSERT(!getSupportedPrimitiveDescriptors().empty(),
"Supported primitive descriptors list is empty for node: ",
getName(),
" type: ",
NameFromType(getType()));

// fallback. If there are no primitives from priority list just select a first
selectPrimitiveDescriptorByIndex(0);
}

bool Node::canBeInPlace() const {
// TODO [DS]: enable inPlace for dynamic shapes
if (isDynamicNode()) {
Expand Down
3 changes: 3 additions & 0 deletions src/plugins/intel_cpu/src/node.h
Original file line number Diff line number Diff line change
Expand Up @@ -715,6 +715,9 @@ class Node {
friend class GraphOptimizer;

void selectPreferPrimitiveDescriptor(const std::vector<impl_desc_type>& priority, bool ignoreConstInputs);
void selectPreferPrimitiveDescriptorWithShape(const std::vector<impl_desc_type>& priority, bool ignoreConstInputs);
bool isOneDimShape(const ov::PartialShape& pshape);
bool isReorderRequired(ov::intel_cpu::MemoryDescPtr desc1, ov::intel_cpu::MemoryDescPtr desc2);
bool isConfigDefined(const NodeConfig &config) const;
virtual bool canBeInPlace() const;

Expand Down
2 changes: 1 addition & 1 deletion src/plugins/intel_cpu/src/nodes/subgraph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -508,7 +508,7 @@ void Subgraph::initSupportedPrimitiveDescriptors() {
}

void Subgraph::selectOptimalPrimitiveDescriptor() {
selectPreferPrimitiveDescriptor(getImplPriority(), true);
selectPreferPrimitiveDescriptorWithShape(getImplPriority(), true);
}

ov::element::Type Subgraph::getRuntimePrecision() const {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
// Copyright (C) 2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#include "common_test_utils/node_builders/constant.hpp"
#include "openvino/opsets/opset8.hpp"
#include "shared_test_classes/base/ov_subgraph.hpp"
#include "utils/cpu_test_utils.hpp"

namespace ov {
namespace test {

/*
input1(f32_abcd_{1,64,32,32}) input2(f16_abcd_{1,128,1,1})
| |
Reorder(f32_acdb_{1,64,32,32}) const Convert(f32_abcd_{1,128,1,1})
| / |
| / |
Convolution(f32_acdb_{1,1,30,30}) Range_1520 VariadicSplit(f32_abcd_{1,64,1,1}, f32_abcd_{1,64,1,1})
| / \ /
| / \ /
| / \ /
| / \ /
MVN(f32_acdb_{1,1,30,30}) Reorder1(f32_acdb_{1,64,1,1}) Reorder2(f32_acdb_{1,64,1,1})
\ / /
\ / /
\ / /
\ / /
Subgraph(f32_acdb_{1,64,30,30})
|
|
Convolution(f32_acdb_{1,1,28,28})
|
Result
The Subgraph node have 3 inputs: they don't have same layout.
Expected: Reorder is inserted after VariadicSplit[0] and VariadicSplit[1], not inserted after MVN.
Because VariadicSplit's output layout is scalar shape([1,64,1,1]), its reorder has less computation.
*/

class SubgraphSelectPD : virtual public SubgraphBaseStaticTest {
protected:
void SetUp() override {
targetDevice = ov::test::utils::DEVICE_CPU;
abs_threshold = 2e-2;

auto type = element::f32;
constexpr int const1 = 32;
auto input1 = std::make_shared<ov::opset8::Parameter>(type, Shape{1, const1 / 2, 8, 8});
input1->set_friendly_name("input1");
auto input2 = std::make_shared<ov::opset8::Parameter>(type, Shape{1, const1, 1, 1});
input2->set_friendly_name("input2");

auto variadicSplit = std::make_shared<ov::op::v1::VariadicSplit>(
input2,
ov::opset8::Constant::create(element::i64, Shape{1}, {1}),
ov::opset8::Constant::create(element::i64, Shape{2}, {const1 / 2, const1 / 2}));
variadicSplit->set_friendly_name("variadicSplit");

auto add1 = std::make_shared<ov::opset8::Add>(variadicSplit->output(0),
ov::opset8::Constant::create(type, Shape{1}, {0}));
add1->set_friendly_name("add1");
auto shapeof = std::make_shared<ov::opset8::ShapeOf>(input1);
auto rankof = std::make_shared<ov::opset8::ShapeOf>(shapeof);
auto squeeze =
std::make_shared<ov::opset8::Squeeze>(rankof, ov::opset8::Constant::create(element::i64, Shape{1}, {0}));

auto range = std::make_shared<ov::opset8::Range>(ov::opset8::Constant::create(element::i64, Shape{}, {2}),
squeeze,
ov::opset8::Constant::create(element::i64, Shape{}, {1}),
ov::element::i64);
auto create_conv = [&](const std::shared_ptr<ov::Node>& input_node) {
ov::test::utils::InputGenerateData in_gen_data(0, 1);
auto conv = std::make_shared<ov::opset8::Convolution>(
input_node,
ov::test::utils::make_constant(type, Shape{1, const1 / 2u, 3, 3}, ov::test::utils::InputGenerateData(0, 1)),
Strides{1, 1},
CoordinateDiff{1, 1},
CoordinateDiff{1, 1},
Strides{1, 1});
conv->get_rt_info() =
CPUTestUtils::CPUTestsBase::makeCPUInfo({CPUTestUtils::nhwc}, {CPUTestUtils::nhwc}, {});
return conv;
};
auto create_relu = [&](const std::shared_ptr<ov::Node>& input_node) {
return std::make_shared<ov::opset8::PRelu>(input_node,
ov::opset8::Constant::create(element::f32, Shape{1}, {1}));
};
auto conv1 = create_conv(input1);
auto mvn =
std::make_shared<ov::opset8::MVN>(create_relu(conv1), range, false, 0.1, op::MVNEpsMode::INSIDE_SQRT);
auto mul = std::make_shared<ov::opset8::Multiply>(create_relu(add1), mvn);
auto add2 = std::make_shared<ov::opset8::Add>(variadicSplit->output(1), mul);
auto conv2 = create_conv(create_relu(add2));
conv2->set_friendly_name("conv2");

function = std::make_shared<ov::Model>(conv2, ParameterVector{input1, input2});
}

void TearDown() override {
auto runtime_function = compiledModel.get_runtime_model();
int nodes_found = 0;
for (const auto& n : runtime_function->get_ordered_ops()) {
auto layer_type = n->get_rt_info().at(ov::exec_model_info::LAYER_TYPE).as<std::string>();
if (layer_type == "Subgraph") {
nodes_found++;
auto output_layout = n->get_rt_info().at(ov::exec_model_info::OUTPUT_LAYOUTS).as<std::string>();
// The optimal choose should be: 'nhwc'.
ASSERT_EQ(output_layout, "acdb");
}
}
ASSERT_GT(nodes_found, 0);
}
};

TEST_F(SubgraphSelectPD, smoke_CompareWithRefs) {
run();
}

} // namespace test
} // namespace ov

0 comments on commit 507f31d

Please sign in to comment.