From 416bd9894dc4bf2570065b411bb2a69357e8bfc2 Mon Sep 17 00:00:00 2001 From: Xiping Yan Date: Tue, 24 Dec 2024 14:51:33 +0800 Subject: [PATCH] [CPU]whisper readvalue optimize (#26130) ### Details: - *New `ReadValueWithSubgraph` node.* - *Move `ReadValue`'s initial subgraph nodes to `ReadValueWithSubgraph`* - *Mirror `ReadValueWithSubgraph `to `MemoryInput`* - *Upgrade MemoryInput and MemoryInputBase in order to let them support multiple inputs" - *Call new interface `Init` and `Activate` of ov::intel_cpu::Graph, avoid to memory copy. Refer: https://github.com/openvinotoolkit/openvino/pull/25385* - *Depends on https://github.com/openvinotoolkit/openvino/pull/27189* ### Tickets: - *128743* --------- Signed-off-by: xipingya Co-authored-by: Egor Duplensky Co-authored-by: Maksim Kutakov Co-authored-by: Maksim Kutakov --- src/plugins/intel_cpu/src/cpu_types.cpp | 1 + src/plugins/intel_cpu/src/extension.cpp | 2 + src/plugins/intel_cpu/src/graph_dumper.cpp | 4 + src/plugins/intel_cpu/src/graph_optimizer.cpp | 64 ++-- src/plugins/intel_cpu/src/nodes/input.cpp | 10 +- src/plugins/intel_cpu/src/nodes/memory.cpp | 189 ++++++++++- src/plugins/intel_cpu/src/nodes/memory.hpp | 40 ++- .../intel_cpu/src/nodes/memory_state_base.cpp | 4 +- .../common/op/read_value_with_subgraph.cpp | 114 +++++++ .../common/op/read_value_with_subgraph.hpp | 37 +++ .../move_readvalue_inputs_to_subgraph.cpp | 164 +++++++++ .../move_readvalue_inputs_to_subgraph.hpp | 31 ++ .../common/pass/stateful_sdpa_fusion.cpp | 7 + .../convert_to_cpu_specific_opset.hpp | 2 + .../src/common/stateful_init_graph.cpp | 314 ++++++++++++++++++ .../transformations/readvalue_subgraph.cpp | 232 +++++++++++++ 16 files changed, 1165 insertions(+), 50 deletions(-) create mode 100644 src/plugins/intel_cpu/src/transformations/cpu_opset/common/op/read_value_with_subgraph.cpp create mode 100644 src/plugins/intel_cpu/src/transformations/cpu_opset/common/op/read_value_with_subgraph.hpp create mode 100644 src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/move_readvalue_inputs_to_subgraph.cpp create mode 100644 src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/move_readvalue_inputs_to_subgraph.hpp create mode 100644 src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/common/stateful_init_graph.cpp create mode 100644 src/plugins/intel_cpu/tests/unit/transformations/readvalue_subgraph.cpp diff --git a/src/plugins/intel_cpu/src/cpu_types.cpp b/src/plugins/intel_cpu/src/cpu_types.cpp index 67c538bd78341a..865ec1f692b762 100644 --- a/src/plugins/intel_cpu/src/cpu_types.cpp +++ b/src/plugins/intel_cpu/src/cpu_types.cpp @@ -144,6 +144,7 @@ static const TypeToNameMap& get_type_to_name_tbl() { {"Loop", Type::TensorIterator}, {"ReadValue", Type::MemoryInput}, // for construction from name ctor, arbitrary name is used {"Assign", Type::MemoryOutput}, // for construction from layer ctor + {"ReadValueWithSubgraph", Type::MemoryInput}, {"Convert", Type::Convert}, {"NV12toRGB", Type::ColorConvert}, {"NV12toBGR", Type::ColorConvert}, diff --git a/src/plugins/intel_cpu/src/extension.cpp b/src/plugins/intel_cpu/src/extension.cpp index bdb5211009a22a..95de3720bb1e25 100644 --- a/src/plugins/intel_cpu/src/extension.cpp +++ b/src/plugins/intel_cpu/src/extension.cpp @@ -23,6 +23,7 @@ #include "transformations/cpu_opset/common/op/leaky_relu.hpp" #include "transformations/cpu_opset/common/op/ngram.hpp" #include "transformations/cpu_opset/common/op/power_static.hpp" +#include "transformations/cpu_opset/common/op/read_value_with_subgraph.hpp" #include "transformations/cpu_opset/common/op/sdpa.hpp" #include "transformations/cpu_opset/common/op/swish_cpu.hpp" #include "transformations/cpu_opset/x64/op/interaction.hpp" @@ -78,6 +79,7 @@ class TypeRelaxedExtension : public ov::OpExtension> { OP_EXTENSION(ov::intel_cpu::SwishNode) \ OP_EXTENSION(ov::intel_cpu::SDPAWithTransposeReshape) \ OP_EXTENSION(ov::intel_cpu::NgramNode) \ + OP_EXTENSION(ov::intel_cpu::ReadValueWithSubgraph) \ OP_EXTENSION(ov::op::internal::GatherCompressed) \ OP_EXTENSION(ov::op::internal::NonMaxSuppressionIEInternal) \ OP_EXTENSION(ov::op::internal::MulticlassNmsIEInternal) \ diff --git a/src/plugins/intel_cpu/src/graph_dumper.cpp b/src/plugins/intel_cpu/src/graph_dumper.cpp index ffd58fdb162899..3cdd2f389d29f8 100644 --- a/src/plugins/intel_cpu/src/graph_dumper.cpp +++ b/src/plugins/intel_cpu/src/graph_dumper.cpp @@ -357,6 +357,10 @@ void average_counters(const Graph& graph) { * - _.csv * For example: 0_MyModel.csv */ + if (!graph.getGraphContext()) { + DEBUG_LOG("graph.m_context is null. Don't dump average_counters."); + return; + } const std::string& path = graph.getConfig().debugCaps.averageCountersPath; diff --git a/src/plugins/intel_cpu/src/graph_optimizer.cpp b/src/plugins/intel_cpu/src/graph_optimizer.cpp index fe0df309dc32f1..1cab7ab7d8c60a 100644 --- a/src/plugins/intel_cpu/src/graph_optimizer.cpp +++ b/src/plugins/intel_cpu/src/graph_optimizer.cpp @@ -2935,12 +2935,19 @@ void GraphOptimizer::MatchSdpaKvCache(Graph& graph) { auto memInputNode = std::dynamic_pointer_cast(node); OPENVINO_ASSERT(memInputNode, "MemoryInput node ", node->getName(), " has unexpected dynamic type"); - ov::optional input_shape; - ov::optional input_prc; - + ov::optional> inputShapes; + ov::optional> inputPrcs; if (!node->getParentEdges().empty()) { - input_shape = ov::optional(node->getInputShapeAtPort(0)); - input_prc = ov::optional(node->getOriginalInputPrecisionAtPort(0)); + inputShapes = ov::optional>(std::vector{}); + inputPrcs = ov::optional>(std::vector{}); + + auto& input_shape_vec = *inputShapes; + auto& input_prc_vec = *inputPrcs; + + for (size_t i = 0; i < node->getParentEdges().size(); i++) { + input_shape_vec.push_back(node->getInputShapeAtPort(i)); + input_prc_vec.push_back(node->getOriginalInputPrecisionAtPort(i)); + } } // search for SDPA @@ -2966,8 +2973,8 @@ void GraphOptimizer::MatchSdpaKvCache(Graph& graph) { memInputNode->getOutputShapeAtPort(0), memInputNode->getOriginalOutputPrecisionAtPort(0), graph.getGraphContext(), - input_shape, - input_prc, + inputShapes, + inputPrcs, sdpa); if (!memInputNode->getParentEdges().empty()) { @@ -3064,12 +3071,18 @@ void GraphOptimizer::DropRedundantMemoryOutput(Graph& graph) { auto memInputNode = std::dynamic_pointer_cast(node); OPENVINO_ASSERT(memInputNode, "MemoryInput node ", node->getName(), " has unexpected dynamic type"); - ov::optional inputShape; - ov::optional inputPrc; - + ov::optional> inputShapes; + ov::optional> inputPrcs; if (!node->getParentEdges().empty()) { - inputShape = ov::optional(node->getInputShapeAtPort(0)); - inputPrc = ov::optional(node->getOriginalInputPrecisionAtPort(0)); + inputShapes = ov::optional>(std::vector{}); + inputPrcs = ov::optional>(std::vector{}); + + auto& input_shape_vec = *inputShapes; + auto& input_prc_vec = *inputPrcs; + for (size_t i = 0; i < node->getParentEdges().size(); i++) { + input_shape_vec.push_back(node->getInputShapeAtPort(i)); + input_prc_vec.push_back(node->getOriginalInputPrecisionAtPort(i)); + } } // search for the MemoryOutputNode @@ -3086,6 +3099,10 @@ void GraphOptimizer::DropRedundantMemoryOutput(Graph& graph) { graph.RemoveEdge(memoryOutputNode->getParentEdgeAt(0)); // there are no output edges from MemoryOutput nodes + CPU_GRAPH_OPTIMIZER_SCOPE(DropRedundantMemoryOutput_SubGraph); + auto memInpNd = std::dynamic_pointer_cast(node); + OPENVINO_ASSERT(memInpNd, "MemoryInput node ", node->getName(), " has unexpected dynamic type"); + // now replace the existing MemoryInput with a special type that works without the corresponding MemoryOutput auto memInputSingle = std::make_shared(memInputNode->getId(), memInputNode->getName(), @@ -3093,17 +3110,24 @@ void GraphOptimizer::DropRedundantMemoryOutput(Graph& graph) { memInputNode->getOutputShapeAtPort(0), memInputNode->getOriginalOutputPrecisionAtPort(0), graph.getGraphContext(), - inputShape, - inputPrc); - + inputShapes, + inputPrcs, + memInpNd->getSubGraph()); graph.AddNode(memInputSingle); if (!memInputNode->getParentEdges().empty()) { - auto parentEdge = memInputNode->getParentEdgeAt(0); - auto parent = parentEdge->getParent(); - const auto inputNum = parentEdge->getInputNum(); - graph.RemoveEdge(parentEdge); - graph.CreateEdge(parent, memInputSingle, inputNum, 0); + auto parentEdgeNum = memInputNode->getParentEdges().size(); + std::vector parentEdges; + for (size_t i = 0; i < parentEdgeNum; i++) { + auto parentEdge = memInputNode->getParentEdgeAt(i); + auto parent = parentEdge->getParent(); + const auto inputNum = parentEdge->getInputNum(); + parentEdges.push_back(parentEdge); + graph.CreateEdge(parent, memInputSingle, inputNum, parentEdge->getOutputNum()); + } + for (auto parentEdge : parentEdges) { + graph.RemoveEdge(parentEdge); + } } for (auto&& edge : memInputNode->getChildEdgesAtPort(0)) { diff --git a/src/plugins/intel_cpu/src/nodes/input.cpp b/src/plugins/intel_cpu/src/nodes/input.cpp index 4bb2f714b284fd..34b659a1ef2882 100644 --- a/src/plugins/intel_cpu/src/nodes/input.cpp +++ b/src/plugins/intel_cpu/src/nodes/input.cpp @@ -11,6 +11,7 @@ #include "openvino/core/shape.hpp" #include "openvino/core/type/element_type.hpp" #include "shape_inference/shape_inference_pass_through.hpp" +#include "transformations/cpu_opset/common/op/read_value_with_subgraph.hpp" using namespace dnnl; using namespace dnnl::impl::cpu::x64; @@ -226,7 +227,8 @@ Input::Input(const std::shared_ptr& op, const GraphContext::CPtr conte op::v0::Constant::get_type_info_static(), op::v0::Result::get_type_info_static(), op::v3::ReadValue::get_type_info_static(), - op::v6::ReadValue::get_type_info_static())) + op::v6::ReadValue::get_type_info_static(), + ov::intel_cpu::ReadValueWithSubgraph::get_type_info_static())) OPENVINO_THROW_NOT_IMPLEMENTED("CPU Input node doesn't support ngraph operation ", op->get_type_name(), " with name ", @@ -479,7 +481,11 @@ void Input::selectOptimalPrimitiveDescriptor() { supportedPrimitiveDescriptors.clear(); // and just use parent memory descriptor for Output node to avoid reorders insertion - NodeConfig config({PortConfig(getParentOutputMemDesc(getParentEdgeAt(0)), BlockedMemoryDesc::FULL_MASK, 0)}, {}); + std::vector inConfs; + for (size_t i = 0; i < getParentEdges().size(); i++) { + inConfs.push_back({PortConfig(getParentOutputMemDesc(getParentEdgeAt(i)), BlockedMemoryDesc::FULL_MASK, 0)}); + } + NodeConfig config(inConfs, {}); supportedPrimitiveDescriptors.emplace_back(config, impl_desc_type::unknown); selectPrimitiveDescriptorByIndex(0); diff --git a/src/plugins/intel_cpu/src/nodes/memory.cpp b/src/plugins/intel_cpu/src/nodes/memory.cpp index 5a0bd7a1e3dff1..d9c9dba5a1219d 100644 --- a/src/plugins/intel_cpu/src/nodes/memory.cpp +++ b/src/plugins/intel_cpu/src/nodes/memory.cpp @@ -10,8 +10,11 @@ #include "dnnl_extension_utils.h" #include "dnnl_types.h" #include "memory_desc/cpu_memory_desc_utils.h" +#include "nodes/common/cpu_convert.h" #include "scaled_attn.h" +#include "shape_inference/shape_inference_internal_dyn.hpp" #include "shape_inference/shape_inference_pass_through.hpp" +#include "transformations/cpu_opset/common/op/read_value_with_subgraph.hpp" #include "utils/general_utils.h" using namespace dnnl; @@ -373,8 +376,10 @@ bool MemoryInputBase::isSupportedOperation(const std::shared_ptr try { if (!one_of(op->get_type_info(), ov::op::v3::ReadValue::get_type_info_static(), - ov::op::v6::ReadValue::get_type_info_static())) { - errorMessage = "Node is not an instance of ReadValue from the operation set v3 or v6."; + ov::op::v6::ReadValue::get_type_info_static(), + ov::intel_cpu::ReadValueWithSubgraph::get_type_info_static())) { + errorMessage = "Node is not an instance of ReadValue from the operation set v3 " + "or v6, or is not an instance of intel_cpu::ReadValueWithSubgraph"; return false; } } catch (...) { @@ -402,22 +407,26 @@ MemoryInputBase::MemoryInputBase(const std::string id, const Shape& output_shape, const ov::element::Type& output_prc, const GraphContext::CPtr context, - const ov::optional& input_shape, - const ov::optional& input_prc, + const ov::optional>& input_shape, + const ov::optional>& input_prc, MemoryInputBase::mode mode) : Input(output_shape, output_prc, name, type, context), MemoryStateNode(id) { outputShapes.emplace_back(output_shape); addOriginalOutputPrecision(output_prc); if (input_shape) { - inputShapes.push_back(*input_shape); - isDynamic = isDynamic || input_shape->isDynamic(); + for (auto inp_shape : *input_shape) { + inputShapes.push_back(inp_shape); + isDynamic = isDynamic || inp_shape.isDynamic(); + } if (isDynamic && !shapeInference) { shapeInference = PassThroughShapeInferFactory().makeShapeInfer(); } } if (input_prc) { - addOriginalInputPrecision(*input_prc); + for (auto inp_prc : *input_prc) { + addOriginalInputPrecision(inp_prc); + } } if (created()) { context->getMemoryStatesRegister()->registerInput(this); @@ -456,8 +465,11 @@ void MemoryInputBase::initSupportedPrimitiveDescriptors() { NodeConfig config; if (!getParentEdges().empty()) { - const auto& inputShape = getInputShapeAtPort(0); - config.inConfs.emplace_back(descCreators.at(LayoutType::ncsp)->createSharedDesc(precision, inputShape)); + for (size_t i = 0; i < getParentEdges().size(); i++) { + const auto& inputShape = getInputShapeAtPort(i); + auto inp_prc = getOriginalInputPrecisionAtPort(i); + config.inConfs.emplace_back(descCreators.at(LayoutType::ncsp)->createSharedDesc(inp_prc, inputShape)); + } } const auto& outputShape = getOutputShapeAtPort(0); @@ -562,6 +574,47 @@ void MemoryInputBase::bypassAssignState() { return; } +MemoryInput::MemoryInput(const std::shared_ptr& op, const GraphContext::CPtr ctx) + : MemoryInputBase::MemoryInputBase(op, ctx) { + auto rvWithSubgraph = ov::as_type_ptr(op); + if (rvWithSubgraph) { + body = rvWithSubgraph->get_function(); + subGraph = make_unique(); + if (isDynamic) { + shapeInference = InternalDynShapeInferFactory().makeShapeInfer(); + } + } +} + +MemoryInput::MemoryInput(const std::string id, + const std::string& name, + const std::string& type, + const Shape& output_shape, + const ov::element::Type& output_prc, + const GraphContext::CPtr context, + const ov::optional>& input_shape, + const ov::optional>& input_prc, + std::shared_ptr func, + mode mode) + : MemoryInputBase::MemoryInputBase(id, + name, + type, + output_shape, + output_prc, + context, + input_shape, + input_prc, + mode) { + body = func; + + if (haveSubgraph()) { + subGraph = make_unique(); + if (isDynamic) { + shapeInference = InternalDynShapeInferFactory().makeShapeInfer(); + } + } +} + bool MemoryInput::needInitGraphProcessing() const { return !getParentEdges().empty() && getAssignedState()->is_reset_state(); } @@ -620,6 +673,59 @@ void MemoryInput::initOptimalPrimitiveDescriptor() { config.outConfs.front().setMemDesc(mem_desc); // bypass any checks, we enforce the child descriptor selectedPd->setConfig(config); + + if (haveSubgraph()) { + // Adopt parent configuration, avoid to insert reorder before the MemoryInput. + std::vector graphInputConfig; + + for (size_t i = 0; i < getParentEdges().size(); i++) { + auto desc = getParentOutputMemDesc(getParentEdgeAt(i)); + graphInputConfig.emplace_back(node::Input::InputConfig{desc, true}); + } + + std::vector graphOutputConfig; + for (auto&& portConfig : config.outConfs) { + auto desc = portConfig.getMemDesc(); + graphOutputConfig.emplace_back(node::Input::OutputConfig{desc, true}); + } + + // configure the inner graph to get the information about output memory descriptors + subGraph->Init(body, context, graphInputConfig, graphOutputConfig); + } +} + +// @todo add ascii diagramm for memory mapping / reuse +void MemoryInput::createPrimitive() { + MemoryInputBase::createPrimitive(); + if (haveSubgraph()) { + OPENVINO_ASSERT(getOriginalInputsNumber() == subGraph->inputsNumber(), + "Number of node inputs must be equal the number of inner graph's inputs: ", + getOriginalInputsNumber(), + " != ", + subGraph->inputsNumber()); + + std::vector inputMemory; + for (size_t i = 0; i < getOriginalInputsNumber(); i++) { + auto srcEdgeMem = getSrcMemoryAtPort(i); + // create a separate input memory objects instead of share them. avoid data corruption. + auto mem = std::make_shared(getEngine(), srcEdgeMem->getDescPtr(), srcEdgeMem->getMemoryBlock()); + subgraphMemoryPtrs.push_back(mem); + inputMemory.emplace_back(std::move(mem)); + } + + OPENVINO_ASSERT(getOriginalOutputsNumber() == subGraph->outputsNumber(), + "Number of node outputs must be equal the number of inner graph's outputs: ", + getOriginalOutputsNumber(), + " != ", + subGraph->outputsNumber()); + + std::vector outputMemory; + for (size_t i = 0; i < getOriginalOutputsNumber(); i++) { + outputMemory.emplace_back(getDstMemoryAtPort(i)); + } + + subGraph->Activate(inputMemory, outputMemory); + } } void MemoryInput::runDynamic(dnnl::stream strm) { @@ -655,13 +761,43 @@ void MemoryInput::runDynamic(dnnl::stream strm) { memBlock->reset(); } - // reshape output - const auto& newDims = processInitGraph ? getSrcMemoryAtPort(0)->getStaticDims() : stateDims; + MemoryPtr src = assignedMem; // declare src memory + if (processInitGraph) { + if (haveSubgraph()) { + // put PrepareParams into runDynamic, because init graph is not called each time. + for (size_t i = 0; i < getOriginalInputsNumber(); i++) { + // since the external and internal descriptors are compatible, we may pass the descriptor + subgraphMemoryPtrs[i]->redefineDesc(getSrcMemoryAtPort(i)->getDescPtr()); + } + + subGraph->ResetInferCount(); + subGraph->Infer(); + // depending on the memory sharing solution, we can return here if the memory is substituted from the + // external graph or override the src pointer with the memory pointer pointing to the subgraph output + // memory + OPENVINO_ASSERT(subGraph->outputsNumber() == 1); + src = subGraph->getOutputNodeByIndex(0)->getSrcMemoryAtPort(0); + + // since the shape inference(InternalDynShapeInfer, do nothing) is performed, a memory of the extra child + // edges, attached to the output ports has to be updated after an inference of the inner graph finished + auto& childEdges = getChildEdges(); + for (size_t j = 1; j < childEdges.size(); j++) { + auto& childEdge = childEdges[j]; + auto childEdgePtr = childEdge.lock(); + assert(childEdgePtr); + assert(0 == childEdgePtr->getInputNum()); + childEdgePtr->getMemoryPtr()->redefineDesc(src->getDescPtr()); + } + } else { + src = getSrcMemoryAtPort(0); + } + } + // reshape output + const auto& newDims = src->getStaticDims(); redefineOutputMemory(0, newDims); // copy data when necessary - auto src = processInitGraph ? getSrcMemoryAtPort(0) : assignedMem; if (src->getData() != dst->getData()) { dst->load(*src); } @@ -692,10 +828,21 @@ void MemoryInput::runStatic(dnnl::stream strm) { memBlock->reset(); } - const auto processInitGraph = needInitGraphProcessing(); + const bool processInitGraph = needInitGraphProcessing(); + MemoryPtr src = assignedMem; // declare src memory + if (processInitGraph) { + if (haveSubgraph()) { + subGraph->ResetInferCount(); + subGraph->Infer(); + + OPENVINO_ASSERT(subGraph->outputsNumber() == 1); + src = subGraph->getOutputNodeByIndex(0)->getSrcMemoryAtPort(0); + } else { + src = getSrcMemoryAtPort(0); + } + } // copy data when necessary - auto src = processInitGraph ? getSrcMemoryAtPort(0) : assignedMem; auto dst = getDstMemoryAtPort(0); if (src->getData() != dst->getData()) { dst->load(*src); @@ -749,6 +896,10 @@ MemStatePtr MemoryInput::makeState() const { original_desc); } +std::shared_ptr MemoryInput::getSubGraph() { + return body; +} + bool MemoryInput::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { return MemoryInputBase::isSupportedOperation(op, errorMessage); } @@ -759,8 +910,8 @@ MemoryInputSDPA::MemoryInputSDPA(const std::string id, const Shape& output_shape, const ov::element::Type& output_prc, const GraphContext::CPtr context, - const ov::optional& input_shape, - const ov::optional& input_prc, + const ov::optional>& input_shape, + const ov::optional>& input_prc, const std::shared_ptr& sdpaNode) : MemoryInputBase(id, name, type, output_shape, output_prc, context, input_shape, input_prc), m_sdpaNode(sdpaNode) {} @@ -865,8 +1016,9 @@ MemoryInputSingle::MemoryInputSingle(const std::string id, const Shape& output_shape, const ov::element::Type& output_prc, const GraphContext::CPtr context, - const ov::optional& input_shape, - const ov::optional& input_prc) + const ov::optional>& input_shape, + const ov::optional>& input_prc, + std::shared_ptr func) : MemoryInput(id, name, type, @@ -875,6 +1027,7 @@ MemoryInputSingle::MemoryInputSingle(const std::string id, context, input_shape, input_prc, + func, MemoryInputBase::mode::single_read_value) {} MemStatePtr MemoryInputSingle::makeState() const { diff --git a/src/plugins/intel_cpu/src/nodes/memory.hpp b/src/plugins/intel_cpu/src/nodes/memory.hpp index 9c0c9664ce8a27..1d40849b0f3356 100644 --- a/src/plugins/intel_cpu/src/nodes/memory.hpp +++ b/src/plugins/intel_cpu/src/nodes/memory.hpp @@ -4,6 +4,8 @@ #pragma once +#include + #include #include "input.h" @@ -162,8 +164,8 @@ class MemoryInputBase : public Input, public MemoryStateNode { const Shape& output_shape, const ov::element::Type& output_prc, const GraphContext::CPtr context, - const ov::optional& input_shape, - const ov::optional& input_prc, + const ov::optional>& input_shape, + const ov::optional>& input_prc, mode mode = mode::read_value_assign); protected: @@ -192,15 +194,30 @@ class MemoryInputBase : public Input, public MemoryStateNode { class MemoryInput : public MemoryInputBase { public: - using MemoryInputBase::MemoryInputBase; + MemoryInput(const std::shared_ptr& op, const GraphContext::CPtr ctx); + MemoryInput(const std::string id, + const std::string& name, + const std::string& type, + const Shape& output_shape, + const ov::element::Type& output_prc, + const GraphContext::CPtr context, + const ov::optional>& input_shape, + const ov::optional>& input_prc, + std::shared_ptr func = nullptr, + mode mode = mode::read_value_assign); + static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; void initOptimalPrimitiveDescriptor() override; void resolveInPlaceEdges(Edge::LOOK look) override; + void createPrimitive() override; + MemStatePtr makeState() const override; + std::shared_ptr getSubGraph(); + protected: bool needInitGraphProcessing() const; void runStatic(dnnl::stream strm) override; @@ -210,7 +227,15 @@ class MemoryInput : public MemoryInputBase { void assignStateHook() override { /*pass*/ } + bool haveSubgraph() const { + return body != nullptr; + } + private: + std::shared_ptr body = nullptr; + std::unique_ptr subGraph = nullptr; + std::vector subgraphMemoryPtrs; + ProxyMemoryBlockPtr memBlock = nullptr; }; @@ -222,8 +247,9 @@ class MemoryInputSingle : public MemoryInput { const Shape& output_shape, const ov::element::Type& output_prc, const GraphContext::CPtr context, - const ov::optional& input_shape, - const ov::optional& input_prc); + const ov::optional>& input_shape, + const ov::optional>& input_prc, + std::shared_ptr func); static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; @@ -242,8 +268,8 @@ class MemoryInputSDPA : public MemoryInputBase { const Shape& output_shape, const ov::element::Type& output_prc, const GraphContext::CPtr context, - const ov::optional& input_shape, - const ov::optional& input_prc, + const ov::optional>& input_shape, + const ov::optional>& input_prc, const std::shared_ptr& sdpaNode); static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; diff --git a/src/plugins/intel_cpu/src/nodes/memory_state_base.cpp b/src/plugins/intel_cpu/src/nodes/memory_state_base.cpp index 58d855a091d716..19d4863c3afbcb 100644 --- a/src/plugins/intel_cpu/src/nodes/memory_state_base.cpp +++ b/src/plugins/intel_cpu/src/nodes/memory_state_base.cpp @@ -11,10 +11,8 @@ using namespace ov::intel_cpu::node; MemoryNode::MemoryNode(const std::shared_ptr& op) { - if (auto assignOp = ov::as_type_ptr(op)) { + if (auto assignOp = std::dynamic_pointer_cast(op)) { m_id = assignOp->get_variable_id(); - } else if (auto readValueOp = ov::as_type_ptr(op)) { - m_id = readValueOp->get_variable_id(); } else { OPENVINO_THROW("Unexpected ov::Node type: ", op->get_type_info().name, " in MemoryNode"); } diff --git a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/op/read_value_with_subgraph.cpp b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/op/read_value_with_subgraph.cpp new file mode 100644 index 00000000000000..39df4b6a29c099 --- /dev/null +++ b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/op/read_value_with_subgraph.cpp @@ -0,0 +1,114 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// +#include "read_value_with_subgraph.hpp" + +#include "itt.hpp" +#include "transformations/itt.hpp" + +ov::intel_cpu::ReadValueWithSubgraph::ReadValueWithSubgraph(const std::shared_ptr& variable, + std::shared_ptr body) { + m_variable = variable; + set_function(body); +} + +ov::intel_cpu::ReadValueWithSubgraph::ReadValueWithSubgraph(const std::shared_ptr& variable, + std::shared_ptr body, + const OutputVector& args) + : ReadValueWithSubgraph(variable, body) { + set_arguments(args); +} + +std::string ov::intel_cpu::ReadValueWithSubgraph::get_variable_id() const { + OPENVINO_ASSERT(m_variable, "Variable is not initialized. Variable_id is unavailable"); + return get_variable()->get_info().variable_id; +} + +void ov::intel_cpu::ReadValueWithSubgraph::set_input(const Output& value, + const std::shared_ptr& body_parameter) { + OPENVINO_ASSERT(body_parameter != nullptr, "Missing parameter! parameter is is nullptr!"); + auto param_index = m_bodies[0]->get_parameter_index(body_parameter); + + OPENVINO_ASSERT(param_index != -1, "Missing parameter ", body_parameter->get_friendly_name(), " for \'body\'!"); + + set_invariant_inputs(value, {body_parameter}); +} + +ov::Output ov::intel_cpu::ReadValueWithSubgraph::set_output( + const std::shared_ptr& body_result) { + OPENVINO_ASSERT(body_result != nullptr, "Incorrect result in \"body\"! Result cant be \'nullptr\'"); + auto result_id = m_bodies[0]->get_result_index(body_result); + + OPENVINO_ASSERT(result_id != -1, "Missing result ", body_result->get_friendly_name(), "in \'body\'!"); + + return set_body_outputs({body_result}); +} + +std::shared_ptr ov::intel_cpu::ReadValueWithSubgraph::clone_with_new_inputs( + const OutputVector& new_args) const { + INTERNAL_OP_SCOPE(intel_cpu_ReadValueWithSubgraphNode_clone_with_new_inputs); + + check_new_args_count(this, new_args); + auto op = + std::make_shared(this->get_variable(), get_function()->clone(), new_args); + OPENVINO_ASSERT(op.get(), + op != nullptr, + "Cannot clone ", + description(), + " operation with name ", + get_friendly_name()); + op->set_output_size(m_output_descriptions[0].size()); + for (const auto& m_input_descr : m_input_descriptions[0]) { + op->m_input_descriptions[0].push_back(m_input_descr->copy()); + } + for (const auto& m_output_descr : m_output_descriptions[0]) { + op->m_output_descriptions[0].push_back(m_output_descr->copy()); + } + op->validate_and_infer_types(); + return op; +} + +bool ov::intel_cpu::ReadValueWithSubgraph::visit_attributes(AttributeVisitor& visitor) { + INTERNAL_OP_SCOPE(intel_cpu_ReadValueWithSubgraphNode_visit_attributes); + visitor.on_attribute("variable_id", m_variable); + + auto variable_info = m_variable->get_info(); + visitor.on_attribute("variable_type", variable_info.data_type); + visitor.on_attribute("variable_shape", variable_info.data_shape); + m_variable->update(variable_info); + + visitor.on_attribute("body", m_bodies[0]); + visitor.on_attribute("inputs", m_input_descriptions[0]); + visitor.on_attribute("outputs", m_output_descriptions[0]); + return true; +} + +void ov::intel_cpu::ReadValueWithSubgraph::validate_and_infer_types() { + INTERNAL_OP_SCOPE(intel_cpu_ReadValueWithSubgraphNode_validate_and_infer_types); + + NODE_VALIDATION_CHECK(this, + m_bodies.size() == 1, + "ReadValueWithSubgraph contains incorrect number of bodies:", + m_bodies.size()); + + validate_and_infer_type_body(get_function(), m_input_descriptions[0]); + + auto output_nodes = outputs(); + + auto outputs_map = get_mapping_outputs_on_body_description(m_output_descriptions[0]); + + // Checking each output + for (size_t output_index = 0; output_index < output_nodes.size(); ++output_index) { + NODE_VALIDATION_CHECK(this, + outputs_map.count(output_index) != 0, + "Incorrect associating in body! Output ", + output_index, + " is not associated with results in then_body!"); + + auto desc = outputs_map.at(output_index); + + auto node_result = m_bodies[0]->get_results().at(desc->m_body_value_index)->input_value(0); + + set_output_type(output_index, node_result.get_element_type(), node_result.get_partial_shape()); + } +} \ No newline at end of file diff --git a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/op/read_value_with_subgraph.hpp b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/op/read_value_with_subgraph.hpp new file mode 100644 index 00000000000000..037f8eb302afcd --- /dev/null +++ b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/op/read_value_with_subgraph.hpp @@ -0,0 +1,37 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/op/op.hpp" +#include "openvino/op/read_value.hpp" +#include "openvino/op/util/sub_graph_base.hpp" +#include "transformations/cpu_opset/common/op/submodel.hpp" + +namespace ov { +namespace intel_cpu { + +class ReadValueWithSubgraph : public ov::op::util::SubGraphOp, public ov::op::util::VariableExtension { +public: + OPENVINO_OP("ReadValueWithSubgraph", "cpu_plugin_opset"); + + ReadValueWithSubgraph() = default; + ReadValueWithSubgraph(const std::shared_ptr& variable, std::shared_ptr body); + ReadValueWithSubgraph(const std::shared_ptr& variable, + std::shared_ptr body, + const OutputVector& args); + + std::string get_variable_id() const override; + + void set_input(const Output& value, const std::shared_ptr& body_parameter); + + Output set_output(const std::shared_ptr& body_result); + + std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; + bool visit_attributes(AttributeVisitor& visitor) override; + void validate_and_infer_types() override; +}; + +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/move_readvalue_inputs_to_subgraph.cpp b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/move_readvalue_inputs_to_subgraph.cpp new file mode 100644 index 00000000000000..e2b283e65c8615 --- /dev/null +++ b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/move_readvalue_inputs_to_subgraph.cpp @@ -0,0 +1,164 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "move_readvalue_inputs_to_subgraph.hpp" + +#include + +#include "itt.hpp" +#include "openvino/core/rt_info.hpp" +#include "openvino/pass/constant_folding.hpp" +#include "openvino/pass/pattern/op/wrap_type.hpp" +#include "ov_ops/rotary_positional_embeddings.hpp" +#include "transformations/cpu_opset/common/op/read_value_with_subgraph.hpp" +#include "transformations/cpu_opset/common/op/sdpa.hpp" +#include "transformations/cpu_opset/common/op/submodel.hpp" +#include "transformations/rt_info/disable_fp16_compression.hpp" +#include "transformations/utils/gen_pattern.hpp" +#include "transformations/utils/utils.hpp" + +ov::intel_cpu::MoveReadValueInputsToSubgraph::MoveReadValueInputsToSubgraph() { + MATCHER_SCOPE(MoveReadValueInputsToSubgraph); + using namespace ov::pass::pattern; + + auto readvalue_pattern = pass::pattern::wrap_type(); + + ov::matcher_pass_callback callback = [=](Matcher& m) { + const auto& pattern_map = m.get_pattern_value_map(); + auto readvalue = as_type_ptr(pattern_map.at(readvalue_pattern).get_node_shared_ptr()); + if (!readvalue || readvalue->get_input_size() != 1u) { + return false; + } + + if (readvalue->get_rt_info().count("DisableInitSubgraphFusing") && + readvalue->get_rt_info()["DisableInitSubgraphFusing"].as()) { + return false; + } + + NodeVector subgraph_nodes; + std::unordered_set> visited_path_to_output; // Cache nodes which connect to Output. + std::unordered_set> visited_path_to_rv; // Cache nodes which connect to ReadValue. + NodeVector inputs = {}; + OutputVector outputs = {}; + + // DFS, Check if current node's final successor is only ReadValue. + std::function, bool&)> dfs = [&](std::shared_ptr node, + bool& found_output) { + if (found_output) { + return; + } + + if (visited_path_to_output.find(node) != visited_path_to_output.end()) { + found_output = true; + return; + } + + if (visited_path_to_rv.find(node) != visited_path_to_rv.end()) { + return; + } + + // node is Output + if (node->get_output_target_inputs(0).size() == 0u) { + found_output = true; + return; + } + + bool any_child_on_output_path = false; + for (const auto& child : node->get_output_target_inputs(0)) { + auto son = child.get_node()->shared_from_this(); + if (son == readvalue) { + continue; + } + + bool new_found_output = false; + dfs(son, new_found_output); + if (new_found_output) { + any_child_on_output_path = true; + } + } + + if (any_child_on_output_path) { + visited_path_to_output.insert(node); + found_output = any_child_on_output_path; + } + }; + + std::function)> reverse_dfs = [&](std::shared_ptr node) { + if (visited_path_to_output.find(node) != visited_path_to_output.end()) { + inputs.emplace_back(node); + return; + } + + if (visited_path_to_rv.find(node) != visited_path_to_rv.end()) { + return; + } + + if (ov::op::util::is_parameter(node)) { + inputs.emplace_back(node); + return; + } + + // Check if the current node has path(bypassing the ReadValue node) to the Output node via dfs algorithm. + bool found_output = false; // Flag: find Output node + dfs(node, found_output); + + if (found_output) { + inputs.emplace_back(node); + visited_path_to_output.insert(node); + return; + } + + visited_path_to_rv.insert(node); + + // Cache to subgraph_nodes + subgraph_nodes.emplace_back(node); + + for (size_t i = 0; i < node->get_input_size(); i++) { + reverse_dfs(node->get_input_node_shared_ptr(i)); + } + }; + + // Reverse DFS ReadValue, find all suitable nodes and move them to subgraph_nodes. + reverse_dfs(readvalue->get_input_node_shared_ptr(0)); + + if (inputs.size() == 0 || subgraph_nodes.size() == 0) { + return false; + } + + // Subgraph's input + auto params = ParameterVector{}; + for (auto inp : inputs) { + auto param = + std::make_shared(inp->get_element_type(), inp->get_output_partial_shape(0)); + params.push_back(param); + for (const auto& child : inp->get_output_target_inputs(0)) { + auto it = std::find(subgraph_nodes.begin(), subgraph_nodes.end(), child.get_node()->shared_from_this()); + if (it != subgraph_nodes.end()) { + child.replace_source_output(param); + } + } + } + + // Subgraph's output + auto last_node = readvalue->get_input_node_shared_ptr(0); + auto output = std::make_shared(last_node); + auto func = std::make_shared(ov::ResultVector({output}), params, "state_init_submodel"); + + auto new_rv = std::make_shared(readvalue->get_variable(), func); + + for (size_t i = 0; i < inputs.size(); i++) { + new_rv->set_input(inputs[i]->output(0), params[i]); + } + new_rv->set_output(output); + + // Replace ReadValue with ov::intel_cpu::ReadValueWithSubgraph + ov::replace_node(readvalue, new_rv); + ov::copy_runtime_info(subgraph_nodes, new_rv); + new_rv->validate_and_infer_types(); + return true; + }; + + auto m = std::make_shared(readvalue_pattern, matcher_name); + this->register_matcher(m, callback); +} \ No newline at end of file diff --git a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/move_readvalue_inputs_to_subgraph.hpp b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/move_readvalue_inputs_to_subgraph.hpp new file mode 100644 index 00000000000000..220003cc83ead1 --- /dev/null +++ b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/move_readvalue_inputs_to_subgraph.hpp @@ -0,0 +1,31 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/pass/graph_rewrite.hpp" +#include "transformations_visibility.hpp" + +namespace ov { +namespace intel_cpu { + +/** + * @brief Move ReadValue's inputs inside the new CPU ngraph node:ReadValueWithSubgraph op. + * intput1 + * | + * Some nodes(They have only one common successor[ReadValue]) input1 + * | | + * ReadValue -------> ReadValueWithSubgraph(Subgraph is inside) + * | \ | \ + * Assign others Assign others + */ + +class MoveReadValueInputsToSubgraph : public ov::pass::MatcherPass { +public: + OPENVINO_RTTI("MoveReadValueInputsToSubgraph", "0"); + MoveReadValueInputsToSubgraph(); +}; + +} // namespace intel_cpu +} // namespace ov \ No newline at end of file diff --git a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/stateful_sdpa_fusion.cpp b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/stateful_sdpa_fusion.cpp index 0ec2049d1ccc1c..447adb0b2fe23f 100644 --- a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/stateful_sdpa_fusion.cpp +++ b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/stateful_sdpa_fusion.cpp @@ -270,6 +270,13 @@ StatefulSDPAFusion::StatefulSDPAFusion() { else assign_v_node->set_arguments({new_node->output(2)}); + // Markup pattern: + // ReadValue->Convert(Optional)->ScaledDotProductAttentionWithKVCache->Convert(Optional)->Assign, so that + // ReadValue can't be replaced with ReadValueWithSubgraph in this pattern. + // TODO: Temporarily skip this pattern. If MemoryInputSDPA supports Subgraph in the future, it may be deleted. + past_k_node->get_rt_info()["DisableInitSubgraphFusing"] = true; + past_v_node->get_rt_info()["DisableInitSubgraphFusing"] = true; + return true; }; diff --git a/src/plugins/intel_cpu/src/transformations/cpu_opset/convert_to_cpu_specific_opset.hpp b/src/plugins/intel_cpu/src/transformations/cpu_opset/convert_to_cpu_specific_opset.hpp index 614f7d690f8726..5142ee319ac523 100644 --- a/src/plugins/intel_cpu/src/transformations/cpu_opset/convert_to_cpu_specific_opset.hpp +++ b/src/plugins/intel_cpu/src/transformations/cpu_opset/convert_to_cpu_specific_opset.hpp @@ -10,6 +10,7 @@ #include "common/pass/convert_to_swish_cpu.hpp" #include "common/pass/fc_bias_fusion.hpp" #include "common/pass/move_fc_reshape_to_weights.hpp" +#include "common/pass/move_readvalue_inputs_to_subgraph.hpp" #include "common/pass/rnn_sequences_optimization.hpp" #include "config.h" #include "itt.hpp" @@ -70,6 +71,7 @@ inline void ConvertToCPUSpecificOpset(std::shared_ptr& model, const C false); CPU_REGISTER_PASS_COMMON(manager, ov::pass::Validate); CPU_REGISTER_PASS_COMMON(manager, ov::pass::EliminateConvert); // Need to clean up after the ConvertPrecision. + CPU_REGISTER_PASS_COMMON(manager, MoveReadValueInputsToSubgraph); manager.run_passes(model); } diff --git a/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/common/stateful_init_graph.cpp b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/common/stateful_init_graph.cpp new file mode 100644 index 00000000000000..9186b43d3d863e --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/custom/subgraph_tests/src/common/stateful_init_graph.cpp @@ -0,0 +1,314 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "common_test_utils/ov_tensor_utils.hpp" +#include "utils/cpu_test_utils.hpp" + +using namespace ov::test; +using namespace CPUTestUtils; +using InitGraphStatefulModelTestParams = std::tuple, // input shapes + bool // ReadValue Assgin Direct pair or not + >; +class InitGraphStatefulModelBase : virtual public ov::test::SubgraphBaseTest, + public testing::WithParamInterface, + public CPUTestsBase { +public: + static std::string getTestCaseName(const testing::TestParamInfo& obj) { + std::ostringstream result; + + std::vector inputShapes; + bool directPair; + std::tie(inputShapes, directPair) = obj.param; + + result << "IS="; + for (const auto& shape : inputShapes) { + result << ov::test::utils::partialShape2str({shape.first}) << "_"; + } + result << "TS="; + for (const auto& shape : inputShapes) { + result << "("; + if (!shape.second.empty()) { + for (const auto& itr : shape.second) { + result << ov::test::utils::vec2str(itr); + } + } + result << ")"; + } + result << "_DirectAssign=" << ov::test::utils::bool2str(directPair); + result << ")"; + + return result.str(); + } + + std::vector calculate_refs() override { + for (const auto& param : functionRefs->get_parameters()) { + inferRequestRef.set_tensor(param->get_default_output(), inputs.at(matched_parameters[param])); + } + inferRequestRef.infer(); + + auto outputs = std::vector{}; + for (const auto& output : functionRefs->outputs()) { + outputs.push_back(inferRequestRef.get_tensor(output)); + } + + return outputs; + } + + std::vector get_plugin_outputs() override { + for (const auto& input : inputs) { + inferRequest.set_tensor(input.first, input.second); + } + inferRequest.infer(); + auto outputs = std::vector{}; + for (const auto& output : function->outputs()) { + outputs.push_back(inferRequest.get_tensor(output)); + } + return outputs; + } + + void run() override { + prepare(); + + auto&& states = inferRequest.query_state(); + auto&& refStates = inferRequestRef.query_state(); + + for (size_t i = 0; i < targetStaticShapes.size(); i++) { + for (auto iters = 0; iters < 5; iters++) { + generate_inputs(targetStaticShapes[i]); + + if (iters & 0x1) { + states.front().reset(); + refStates.front().reset(); + } else { + // generate and set state tensors every even iteration + using ov::test::utils::InputGenerateData; + + auto stateShape = get_state_shape(i); + auto tensor = utils::create_and_fill_tensor(statePrc, + stateShape, + InputGenerateData{0, 1, 1, iters}); + states.front().set_state(tensor); + refStates.front().set_state(tensor); + } + + validate(); + } + } + } + +protected: + virtual void check_init_graph_node() = 0; + + virtual ov::Shape get_state_shape(size_t i) = 0; + + void prepare() { + compile_model(); + + inferRequest = compiledModel.create_infer_request(); + ASSERT_TRUE(inferRequest); + + check_init_graph_node(); + + // ref + functionRefs = function->clone(); + + matched_parameters.clear(); + const auto& ref_params = functionRefs->get_parameters(); + const auto& params = function->get_parameters(); + for (size_t in_idx = 0; in_idx < params.size(); ++in_idx) { + matched_parameters.insert({ref_params[in_idx], params[in_idx]}); + } + + auto compiledModelRef = core->compile_model(functionRefs, ov::test::utils::DEVICE_TEMPLATE); + inferRequestRef = compiledModelRef.create_infer_request(); + } + + std::vector inputShapes; + const ov::element::Type netPrc = ElementType::f32; + ov::InferRequest inferRequestRef; + ov::element::Type statePrc; +}; + +// ReadValue Assign direct pair +// +// input_1 input_2 +// | | +// Add_1 / +// \ / +// MatMul +// | +// input_0 ReadValue .......... +// \ / \ . +// Add_0 Assign ........ +// | +// Result + +class InitGraphStatefulModel : public InitGraphStatefulModelBase { +public: + void SetUp() override { + targetDevice = utils::DEVICE_CPU; + + bool directPair; + std::tie(inputShapes, directPair) = this->GetParam(); + + init_input_shapes(inputShapes); + ov::ParameterVector input_params; + for (auto&& shape : inputDynamicShapes) { + input_params.push_back(std::make_shared(netPrc, shape)); + } + + input_params[0]->set_friendly_name("input_0"); + input_params[1]->set_friendly_name("input_1"); + input_params[2]->set_friendly_name("input_2"); + + // init_graph + auto add_1 = + std::make_shared(input_params[1], ov::op::v0::Constant::create(netPrc, {1}, {1.0f})); + add_1->set_friendly_name("init_graph/add_1"); + auto mm_0 = std::make_shared(add_1, input_params[2]); + mm_0->set_friendly_name("init_graph/mm_0"); + + const std::string variable_name("var_direct_pair"); + statePrc = netPrc; + auto variable = std::make_shared( + ov::op::util::VariableInfo{{inputDynamicShapes[1][0], inputDynamicShapes[2][1]}, statePrc, variable_name}); + + auto read = std::make_shared(mm_0, variable); + std::shared_ptr add_0 = std::make_shared(input_params[0], read); + add_0->set_friendly_name("add_0"); + auto assign = std::make_shared(directPair ? read : add_0, variable); + auto res = std::make_shared(add_0); + function = std::make_shared(ov::ResultVector({res}), ov::SinkVector({assign}), input_params); + } + + void check_init_graph_node() override { + // Node with friendly name "init_graph/add_1" and init_graph/mm_0 should be moved into subgraph. + CheckNumberOfNodesWithType(compiledModel, "Add", 0); + CheckNumberOfNodesWithType(compiledModel, "MatMul", 0); + } + + ov::Shape get_state_shape(size_t i) override { + return ov::Shape({inputShapes[1].second[i][0], inputShapes[2].second[i][1]}); + } +}; + +TEST_P(InitGraphStatefulModel, CompareWithRefs) { + run(); +} + +// ReadValueWithSubgraph have different precision. +// +// input[fp32] +// | +// Convert[fp32->fp16] +// | +// ReadValue .......... +// / \ . +// Add Assign ....... +// | +// Result +class InitGraphStatefulDiffPrimitiveModel : public InitGraphStatefulModelBase { +public: + void SetUp() override { + targetDevice = utils::DEVICE_CPU; + + configuration.insert({"SNIPPETS_MODE", "DISABLE"}); + + bool directPair; + std::tie(inputShapes, directPair) = this->GetParam(); + + init_input_shapes(inputShapes); + ov::ParameterVector input_params; + for (auto&& shape : inputDynamicShapes) { + input_params.push_back(std::make_shared(netPrc, shape)); + } + + input_params[0]->set_friendly_name("input"); + + // init_graph + auto convert = std::make_shared(input_params[0], ov::element::f16); + convert->set_friendly_name("init_graph/convert"); + + const std::string variable_name("var_diff_precison"); + statePrc = ov::element::f16; + auto variable = std::make_shared( + ov::op::util::VariableInfo{{inputDynamicShapes[0]}, statePrc, variable_name}); + + auto readvalue = std::make_shared(convert, variable); + + std::shared_ptr add = + std::make_shared(readvalue, ov::op::v0::Constant::create(ov::element::f16, {1}, {1.0f})); + + auto assign = std::make_shared(directPair ? readvalue : add, variable); + + auto res = std::make_shared(add); + + function = std::make_shared(ov::ResultVector({res}), ov::SinkVector({assign}), input_params); + } + + void check_init_graph_node() override { + CheckNumberOfNodesWithType(compiledModel, "Convert", 1); + } + + ov::Shape get_state_shape(size_t i) override { + return inputShapes[0].second[i]; + } +}; + +TEST_P(InitGraphStatefulDiffPrimitiveModel, CompareWithRefs) { + run(); +} + +namespace { +const std::vector> inputShapes = { + { + // Dynamic shape. + {{1, -1}, {{1, 2}, {1, 2}, {1, 1}}}, + {{2, -1}, {{2, 3}, {2, 10}, {2, 1}}}, + {{-1, 2}, {{3, 2}, {10, 2}, {1, 2}}}, + }, + { + // Static shape. + {{1, 1}, {{1, 1}}}, + {{4, 2}, {{4, 2}}}, + {{2, 10}, {{2, 10}}}, + } +}; + +const std::vector readValueAssginDirectPair = {true, false}; + +const auto testParams_smoke = ::testing::Combine( + ::testing::ValuesIn(inputShapes), + ::testing::ValuesIn(readValueAssginDirectPair)); + +INSTANTIATE_TEST_SUITE_P(smoke_StatefulInitGraph, + InitGraphStatefulModel, + testParams_smoke, + InitGraphStatefulModel::getTestCaseName); + + +const std::vector> inputShapesDiffPrecision = { + { + // Dynamic shape. + {{1, -1}, {{1, 10}, {1, 1}}}, + }, + { + // Static shape. + {{1, 1}, {{1, 1}}}, + } +}; + +const auto testParamsDiffPrecision_smoke = ::testing::Combine( + ::testing::ValuesIn(inputShapesDiffPrecision), + ::testing::ValuesIn(readValueAssginDirectPair)); + +INSTANTIATE_TEST_SUITE_P(smoke_StatefulInitGraph, + InitGraphStatefulDiffPrimitiveModel, + testParamsDiffPrecision_smoke, + InitGraphStatefulDiffPrimitiveModel::getTestCaseName); + +} // namespace + diff --git a/src/plugins/intel_cpu/tests/unit/transformations/readvalue_subgraph.cpp b/src/plugins/intel_cpu/tests/unit/transformations/readvalue_subgraph.cpp new file mode 100644 index 00000000000000..3656130b579edd --- /dev/null +++ b/src/plugins/intel_cpu/tests/unit/transformations/readvalue_subgraph.cpp @@ -0,0 +1,232 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include +#include + +#include "common_test_utils/ov_test_utils.hpp" +#include "openvino/op/add.hpp" +#include "openvino/op/convert.hpp" +#include "openvino/op/matmul.hpp" +#include "openvino/op/read_value.hpp" +#include "transformations/cpu_opset/common/op/read_value_with_subgraph.hpp" + +using namespace testing; +/**************************************************************** + * Pattern 1 (From whisper decoder): + * input input + * | | + * MatMul ReadValueWithSubgraph (MatMul) + * | -> | \ + * ReadValue Result Assign + * | \ + * Result Assign + ****************************************************************/ +static std::shared_ptr constructRVWithSubGraph( + std::shared_ptr input, + const ov::element::Type& type, + std::shared_ptr variable) { + auto mm_weights = std::make_shared(type, ov::Shape{2, 2}, std::vector{1, 2, 3, 4}); + + auto func_input = + std::make_shared(input->get_element_type(), input->get_output_partial_shape(0)); + + auto matmul = std::make_shared(func_input, mm_weights, false, false); + + auto func_output = std::make_shared(matmul); + + auto func = std::make_shared(ov::NodeVector({func_output}), + ov::ParameterVector{func_input}, + "state_init_submodel"); + + auto readvalue = std::make_shared(variable, func); + readvalue->set_input(input->output(0), func_input); + readvalue->set_output(func_output); + readvalue->validate_and_infer_types(); + + return readvalue; +} + +TEST(TransformationTests, ReadValueWithSubgraph_1) { + std::shared_ptr model(nullptr), model_ref(nullptr); + { + const ov::PartialShape shape{1, 1, 2}; + const ov::element::Type type = ov::element::f32; + std::shared_ptr variable = std::make_shared( + ov::op::util::VariableInfo{ov::PartialShape{1, 1, 2}, type, "var_id"}); + + { + auto input = std::make_shared(type, shape); + + auto mm_weights = + std::make_shared(type, ov::Shape{2, 2}, std::vector{1, 2, 3, 4}); + + auto matmul = std::make_shared(input, mm_weights, false, false); + + auto readvalue = std::make_shared(matmul, variable); + + auto assign = std::make_shared(readvalue, variable); + + auto result = std::make_shared(readvalue); + model = std::make_shared(ov::ResultVector{result}, + ov::SinkVector{assign}, + ov::ParameterVector{input}); + + ov::pass::Manager manager; + manager.register_pass(); + manager.run_passes(model); + } + { + auto input = std::make_shared(type, shape); + + auto readvalue = constructRVWithSubGraph(input, type, variable); + + auto assign = std::make_shared(readvalue, variable); + + auto result = std::make_shared(readvalue); + + model_ref = std::make_shared(ov::ResultVector{result}, + ov::SinkVector{assign}, + ov::ParameterVector{input}); + } + auto res = compare_functions(model, model_ref, 0, 0, 0, 0, 0, 0); + ASSERT_TRUE(res.first) << res.second; + } +} + +/*************************************************************************************************** + * Pattern 2 (Complex pattern): + * input input + * | | + * Convert Convert + * / | \ / | \ + * / | \ / Add2 \ + * Add1 Add2 \ | | \ | + * | | \ | ---> | | Add3 + * \ | Add3 | | / \ + * \ | / \ ReadValueWithSubgraph Result2 Subgraph(Add1, Add4, Add5) + * \ Add4 \ / \ + * \ | \ Result1 Assign + * Add5 Result2 + * | + * ReadValue + * / \ + * Result1 Assign + * + ***************************************************************************************************/ + +static std::shared_ptr create_const_node(ov::Shape shape) { + return std::make_shared(ov::element::i32, shape, std::vector{1}); +} + +static std::shared_ptr constructRVWithSubGraph2( + ov::NodeVector inputs, + const ov::element::Type& type, + std::shared_ptr variable) { + ov::ParameterVector func_inputs; + for (auto input : inputs) { + auto func_input = + std::make_shared(input->get_element_type(), input->get_output_partial_shape(0)); + func_inputs.push_back(func_input); + } + + auto add1 = std::make_shared(func_inputs[0], create_const_node(ov::Shape{4})); + + auto add4 = std::make_shared(func_inputs[1], func_inputs[2]); + + auto add5 = std::make_shared(add1, add4); + + auto func_output = std::make_shared(add5); + + auto func = std::make_shared(ov::NodeVector({func_output}), func_inputs, "state_init_submodel"); + + auto readvalue = std::make_shared(variable, func); + for (size_t i = 0; i < inputs.size(); i++) { + readvalue->set_input(inputs[i]->output(0), func_inputs[i]); + } + readvalue->set_output(func_output); + readvalue->validate_and_infer_types(); + + return readvalue; +} + +TEST(TransformationTests, ReadValueWithSubgraph_2) { + std::shared_ptr model(nullptr), model_ref(nullptr); + { + const ov::PartialShape shape{1, 2, 4}; + const ov::element::Type in_type = ov::element::f32; + const ov::element::Type out_type = ov::element::i32; + + std::shared_ptr variable = + std::make_shared(ov::op::util::VariableInfo{shape, out_type, "var_id"}); + + { + auto input = std::make_shared(in_type, shape); + input->set_friendly_name("input"); + + auto convert = std::make_shared(input, out_type); + convert->set_friendly_name("convert"); + + auto add1 = std::make_shared(convert, create_const_node(ov::Shape{4})); + add1->set_friendly_name("add1"); + + auto add2 = std::make_shared(convert, create_const_node(ov::Shape{4})); + add2->set_friendly_name("add2"); + + auto add3 = std::make_shared(add2, convert); + add3->set_friendly_name("add3"); + + auto add4 = std::make_shared(add2, add3); + add4->set_friendly_name("add4"); + + auto add5 = std::make_shared(add1, add4); + add5->set_friendly_name("add5"); + + auto readvalue = std::make_shared(add5, variable); + readvalue->set_friendly_name("readvalue"); + + auto assign = std::make_shared(readvalue, variable); + assign->set_friendly_name("assign"); + + auto result1 = std::make_shared(readvalue); + result1->set_friendly_name("result1"); + + auto result2 = std::make_shared(add3); + result2->set_friendly_name("result2"); + + model = std::make_shared(ov::ResultVector{result1, result2}, + ov::SinkVector{assign}, + ov::ParameterVector{input}); + + ov::pass::Manager manager; + manager.register_pass(); + manager.run_passes(model); + } + { + auto input = std::make_shared(in_type, shape); + + auto convert = std::make_shared(input, out_type); + + auto add2 = std::make_shared(convert, create_const_node(ov::Shape{4})); + + auto add3 = std::make_shared(add2, convert); + + auto readvalue = constructRVWithSubGraph2({convert, add2, add3}, out_type, variable); + + auto assign = std::make_shared(readvalue, variable); + + auto result1 = std::make_shared(readvalue); + + auto result2 = std::make_shared(add3); + + model_ref = std::make_shared(ov::ResultVector{result1, result2}, + ov::SinkVector{assign}, + ov::ParameterVector{input}); + } + auto res = compare_functions(model, model_ref, 0, 0, 0, 0, 0, 0); + ASSERT_TRUE(res.first) << res.second; + } +} \ No newline at end of file