Skip to content

Commit

Permalink
[CPU]whisper readvalue optimize (#26130)
Browse files Browse the repository at this point in the history
### Details:
 - *New `ReadValueWithSubgraph` node.* 
- *Move `ReadValue`'s initial subgraph nodes to `ReadValueWithSubgraph`*
 - *Mirror `ReadValueWithSubgraph `to `MemoryInput`*
- *Upgrade MemoryInput and MemoryInputBase in order to let them support
multiple inputs"
- *Call new interface `Init` and `Activate` of ov::intel_cpu::Graph,
avoid to memory copy. Refer:
#25385
 - *Depends on #27189

### Tickets:
 - *128743*

---------

Signed-off-by: xipingya <[email protected]>
Co-authored-by: Egor Duplensky <[email protected]>
Co-authored-by: Maksim Kutakov <[email protected]>
Co-authored-by: Maksim Kutakov <[email protected]>
  • Loading branch information
4 people authored Dec 24, 2024
1 parent af7a091 commit 416bd98
Show file tree
Hide file tree
Showing 16 changed files with 1,165 additions and 50 deletions.
1 change: 1 addition & 0 deletions src/plugins/intel_cpu/src/cpu_types.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,7 @@ static const TypeToNameMap& get_type_to_name_tbl() {
{"Loop", Type::TensorIterator},
{"ReadValue", Type::MemoryInput}, // for construction from name ctor, arbitrary name is used
{"Assign", Type::MemoryOutput}, // for construction from layer ctor
{"ReadValueWithSubgraph", Type::MemoryInput},
{"Convert", Type::Convert},
{"NV12toRGB", Type::ColorConvert},
{"NV12toBGR", Type::ColorConvert},
Expand Down
2 changes: 2 additions & 0 deletions src/plugins/intel_cpu/src/extension.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include "transformations/cpu_opset/common/op/leaky_relu.hpp"
#include "transformations/cpu_opset/common/op/ngram.hpp"
#include "transformations/cpu_opset/common/op/power_static.hpp"
#include "transformations/cpu_opset/common/op/read_value_with_subgraph.hpp"
#include "transformations/cpu_opset/common/op/sdpa.hpp"
#include "transformations/cpu_opset/common/op/swish_cpu.hpp"
#include "transformations/cpu_opset/x64/op/interaction.hpp"
Expand Down Expand Up @@ -78,6 +79,7 @@ class TypeRelaxedExtension : public ov::OpExtension<ov::op::TypeRelaxed<Op>> {
OP_EXTENSION(ov::intel_cpu::SwishNode) \
OP_EXTENSION(ov::intel_cpu::SDPAWithTransposeReshape) \
OP_EXTENSION(ov::intel_cpu::NgramNode) \
OP_EXTENSION(ov::intel_cpu::ReadValueWithSubgraph) \
OP_EXTENSION(ov::op::internal::GatherCompressed) \
OP_EXTENSION(ov::op::internal::NonMaxSuppressionIEInternal) \
OP_EXTENSION(ov::op::internal::MulticlassNmsIEInternal) \
Expand Down
4 changes: 4 additions & 0 deletions src/plugins/intel_cpu/src/graph_dumper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -357,6 +357,10 @@ void average_counters(const Graph& graph) {
* - <nesting-level>_<graph-name>.csv
* For example: 0_MyModel.csv
*/
if (!graph.getGraphContext()) {
DEBUG_LOG("graph.m_context is null. Don't dump average_counters.");
return;
}

const std::string& path = graph.getConfig().debugCaps.averageCountersPath;

Expand Down
64 changes: 44 additions & 20 deletions src/plugins/intel_cpu/src/graph_optimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2935,12 +2935,19 @@ void GraphOptimizer::MatchSdpaKvCache(Graph& graph) {
auto memInputNode = std::dynamic_pointer_cast<node::MemoryInputBase>(node);
OPENVINO_ASSERT(memInputNode, "MemoryInput node ", node->getName(), " has unexpected dynamic type");

ov::optional<Shape> input_shape;
ov::optional<ov::element::Type> input_prc;

ov::optional<std::vector<Shape>> inputShapes;
ov::optional<std::vector<ov::element::Type>> inputPrcs;
if (!node->getParentEdges().empty()) {
input_shape = ov::optional<Shape>(node->getInputShapeAtPort(0));
input_prc = ov::optional<ov::element::Type>(node->getOriginalInputPrecisionAtPort(0));
inputShapes = ov::optional<std::vector<Shape>>(std::vector<Shape>{});
inputPrcs = ov::optional<std::vector<ov::element::Type>>(std::vector<ov::element::Type>{});

auto& input_shape_vec = *inputShapes;
auto& input_prc_vec = *inputPrcs;

for (size_t i = 0; i < node->getParentEdges().size(); i++) {
input_shape_vec.push_back(node->getInputShapeAtPort(i));
input_prc_vec.push_back(node->getOriginalInputPrecisionAtPort(i));
}
}

// search for SDPA
Expand All @@ -2966,8 +2973,8 @@ void GraphOptimizer::MatchSdpaKvCache(Graph& graph) {
memInputNode->getOutputShapeAtPort(0),
memInputNode->getOriginalOutputPrecisionAtPort(0),
graph.getGraphContext(),
input_shape,
input_prc,
inputShapes,
inputPrcs,
sdpa);

if (!memInputNode->getParentEdges().empty()) {
Expand Down Expand Up @@ -3064,12 +3071,18 @@ void GraphOptimizer::DropRedundantMemoryOutput(Graph& graph) {
auto memInputNode = std::dynamic_pointer_cast<node::MemoryInputBase>(node);
OPENVINO_ASSERT(memInputNode, "MemoryInput node ", node->getName(), " has unexpected dynamic type");

ov::optional<Shape> inputShape;
ov::optional<ov::element::Type> inputPrc;

ov::optional<std::vector<Shape>> inputShapes;
ov::optional<std::vector<ov::element::Type>> inputPrcs;
if (!node->getParentEdges().empty()) {
inputShape = ov::optional<Shape>(node->getInputShapeAtPort(0));
inputPrc = ov::optional<ov::element::Type>(node->getOriginalInputPrecisionAtPort(0));
inputShapes = ov::optional<std::vector<Shape>>(std::vector<Shape>{});
inputPrcs = ov::optional<std::vector<ov::element::Type>>(std::vector<ov::element::Type>{});

auto& input_shape_vec = *inputShapes;
auto& input_prc_vec = *inputPrcs;
for (size_t i = 0; i < node->getParentEdges().size(); i++) {
input_shape_vec.push_back(node->getInputShapeAtPort(i));
input_prc_vec.push_back(node->getOriginalInputPrecisionAtPort(i));
}
}

// search for the MemoryOutputNode
Expand All @@ -3086,24 +3099,35 @@ void GraphOptimizer::DropRedundantMemoryOutput(Graph& graph) {
graph.RemoveEdge(memoryOutputNode->getParentEdgeAt(0));
// there are no output edges from MemoryOutput nodes

CPU_GRAPH_OPTIMIZER_SCOPE(DropRedundantMemoryOutput_SubGraph);
auto memInpNd = std::dynamic_pointer_cast<node::MemoryInput>(node);
OPENVINO_ASSERT(memInpNd, "MemoryInput node ", node->getName(), " has unexpected dynamic type");

// now replace the existing MemoryInput with a special type that works without the corresponding MemoryOutput
auto memInputSingle = std::make_shared<MemoryInputSingle>(memInputNode->getId(),
memInputNode->getName(),
memInputNode->getTypeStr(),
memInputNode->getOutputShapeAtPort(0),
memInputNode->getOriginalOutputPrecisionAtPort(0),
graph.getGraphContext(),
inputShape,
inputPrc);

inputShapes,
inputPrcs,
memInpNd->getSubGraph());
graph.AddNode(memInputSingle);

if (!memInputNode->getParentEdges().empty()) {
auto parentEdge = memInputNode->getParentEdgeAt(0);
auto parent = parentEdge->getParent();
const auto inputNum = parentEdge->getInputNum();
graph.RemoveEdge(parentEdge);
graph.CreateEdge(parent, memInputSingle, inputNum, 0);
auto parentEdgeNum = memInputNode->getParentEdges().size();
std::vector<ov::intel_cpu::EdgePtr> parentEdges;
for (size_t i = 0; i < parentEdgeNum; i++) {
auto parentEdge = memInputNode->getParentEdgeAt(i);
auto parent = parentEdge->getParent();
const auto inputNum = parentEdge->getInputNum();
parentEdges.push_back(parentEdge);
graph.CreateEdge(parent, memInputSingle, inputNum, parentEdge->getOutputNum());
}
for (auto parentEdge : parentEdges) {
graph.RemoveEdge(parentEdge);
}
}

for (auto&& edge : memInputNode->getChildEdgesAtPort(0)) {
Expand Down
10 changes: 8 additions & 2 deletions src/plugins/intel_cpu/src/nodes/input.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include "openvino/core/shape.hpp"
#include "openvino/core/type/element_type.hpp"
#include "shape_inference/shape_inference_pass_through.hpp"
#include "transformations/cpu_opset/common/op/read_value_with_subgraph.hpp"

using namespace dnnl;
using namespace dnnl::impl::cpu::x64;
Expand Down Expand Up @@ -226,7 +227,8 @@ Input::Input(const std::shared_ptr<ov::Node>& op, const GraphContext::CPtr conte
op::v0::Constant::get_type_info_static(),
op::v0::Result::get_type_info_static(),
op::v3::ReadValue::get_type_info_static(),
op::v6::ReadValue::get_type_info_static()))
op::v6::ReadValue::get_type_info_static(),
ov::intel_cpu::ReadValueWithSubgraph::get_type_info_static()))
OPENVINO_THROW_NOT_IMPLEMENTED("CPU Input node doesn't support ngraph operation ",
op->get_type_name(),
" with name ",
Expand Down Expand Up @@ -479,7 +481,11 @@ void Input::selectOptimalPrimitiveDescriptor() {
supportedPrimitiveDescriptors.clear();

// and just use parent memory descriptor for Output node to avoid reorders insertion
NodeConfig config({PortConfig(getParentOutputMemDesc(getParentEdgeAt(0)), BlockedMemoryDesc::FULL_MASK, 0)}, {});
std::vector<PortConfig> inConfs;
for (size_t i = 0; i < getParentEdges().size(); i++) {
inConfs.push_back({PortConfig(getParentOutputMemDesc(getParentEdgeAt(i)), BlockedMemoryDesc::FULL_MASK, 0)});
}
NodeConfig config(inConfs, {});

supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown);
selectPrimitiveDescriptorByIndex(0);
Expand Down
Loading

0 comments on commit 416bd98

Please sign in to comment.