From e567c9ee8603344a63df3dd1ad0308229b8e7ef2 Mon Sep 17 00:00:00 2001 From: Razvan Apetroaie Date: Mon, 12 Aug 2024 13:41:27 +0300 Subject: [PATCH] [NPU] Switching the I/O identification convention to indices (#24248) ### Details: - Please see PR#10348 (compiler repository) for a detailed description and some extra validation. ### Tickets: - *CVS-142751* --- .../duplicate_inputs_outputs_names.cpp | 18 + .../src/al/include/intel_npu/al/icompiler.hpp | 124 +++- .../src/al/include/sync_infer_request.hpp | 113 ++-- .../intel_npu/src/al/src/icompiler.cpp | 67 ++ .../src/al/src/sync_infer_request.cpp | 189 +++--- .../src/backend/include/zero_executor.hpp | 12 +- .../backend/include/zero_infer_request.hpp | 62 +- .../src/backend/include/zero_memory.hpp | 6 +- .../src/backend/include/zero_pipeline.hpp | 13 +- .../src/backend/src/zero_executor.cpp | 18 +- .../src/backend/src/zero_infer_request.cpp | 583 ++++++++---------- .../intel_npu/src/backend/src/zero_init.cpp | 3 +- .../intel_npu/src/backend/src/zero_memory.cpp | 20 +- .../src/backend/src/zero_pipeline.cpp | 161 +++-- .../include/zero_compiler_in_driver.hpp | 66 +- .../compiler/src/driver_compiler_adapter.cpp | 3 +- .../compiler/src/graph_transformations.cpp | 13 +- .../compiler/src/zero_compiler_in_driver.cpp | 300 ++++----- .../intel_npu/src/plugin/src/plugin.cpp | 76 +-- .../duplicate_inputs_outputs_names.cpp | 20 + .../duplicate_inputs_outputs_names.hpp | 15 + .../duplicate_inputs_outputs_names.cpp | 118 ++++ 22 files changed, 1098 insertions(+), 902 deletions(-) create mode 100644 src/plugins/intel_cpu/tests/functional/shared_tests_instances/execution_graph_tests/duplicate_inputs_outputs_names.cpp create mode 100644 src/plugins/intel_npu/src/al/src/icompiler.cpp create mode 100644 src/plugins/intel_npu/tests/functional/shared_tests_instances/execution_graph_tests/duplicate_inputs_outputs_names.cpp create mode 100644 src/tests/functional/plugin/shared/include/execution_graph_tests/duplicate_inputs_outputs_names.hpp create mode 100644 src/tests/functional/plugin/shared/src/execution_graph_tests/duplicate_inputs_outputs_names.cpp diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/execution_graph_tests/duplicate_inputs_outputs_names.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/execution_graph_tests/duplicate_inputs_outputs_names.cpp new file mode 100644 index 00000000000000..c6d3e14f82e3df --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/execution_graph_tests/duplicate_inputs_outputs_names.cpp @@ -0,0 +1,18 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "execution_graph_tests/duplicate_inputs_outputs_names.hpp" + +#include "common_test_utils/test_constants.hpp" + +using namespace ExecutionGraphTests; + +namespace { + +INSTANTIATE_TEST_SUITE_P(smoke_duplicateInputsOutputsNames, + ExecGraphDuplicateInputsOutputsNames, + ::testing::Values(ov::test::utils::DEVICE_CPU), + ExecGraphDuplicateInputsOutputsNames::getTestCaseName); + +} // namespace diff --git a/src/plugins/intel_npu/src/al/include/intel_npu/al/icompiler.hpp b/src/plugins/intel_npu/src/al/include/intel_npu/al/icompiler.hpp index 0175949db1ae73..25361c0dc957c5 100644 --- a/src/plugins/intel_npu/src/al/include/intel_npu/al/icompiler.hpp +++ b/src/plugins/intel_npu/src/al/include/intel_npu/al/icompiler.hpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -22,48 +23,107 @@ namespace intel_npu { /** - * @brief A helper structure used for storing the metadata found within the I/O nodes. - * @details The "legacyName" attribute holds the name most commonly used as map key for multiple structures. - * This value also corresponds to the identifier used by the OpenVINO 1.0 API. - * - * "originalShape" corresponds to the shape registered in the graph, while "transposedShape" holds the shape obtained - * upon applying a transposition corresponding to the legacy layout value. Use the "transposedShape" one if not sure - * which one you need. + * @brief A helper structure used for storing metadata corresponding to one input/output entry. */ -struct IONodeDescriptor { - std::string legacyName; - std::string currentNodeName; +struct IODescriptor { + /** + * @brief The name of the input/output assigned by the compiler. + * @details This value may differ from other name attributes: + * - The compiler could have created additional inputs/outputs (e.g. for representing states). These are not + * found in the original IR model. + * - The compiler may append indices to names in the case where duplicate names are found. + * @note The prefixes introduced by the compiler in order to differentiate the special cases (e.g. states and shape + * tensors) were removed prior to initializing this field. + */ + std::string nameFromCompiler; + + ov::element::Type precision; + + ov::PartialShape shapeFromCompiler; + + /** + * @brief If set to "true", the current object describes a buffer which may be used for altering a state tensor. + * @details This flag is set if the compiler prefixed the name using a "read value" prefix. The state input and + * state output descriptors are also tied using the "relatedDescriptorIndex" attribute. + */ + bool isStateInput = false; + + /** + * @brief If set to "true", the current object describes a buffer which reflects the value of a state tensor. + * @details This flag is set if the compiler prefixed the name using an "assign" prefix. The state input and + * state output descriptors are also tied using the "relatedDescriptorIndex" attribute. + */ + bool isStateOutput = false; + + /** + * @brief If set to "true", the buffer of the tensor described here contains as value the shape of the referenced + * tensor. + * @details This flag is set if the compiler prefixed the name using a "shape" prefix. + * + * The referenced tensor bears the same name ("nameFromCompiler"), but its "isShapeTensor" value is set to + * "false". The two descriptors are also tied using the "relatedDescriptorIndex" attribute. + */ + bool isShapeTensor = false; + + /** + * @brief Points towards a related descriptor. + * @details The related descriptors are defined by (state input, state output) or (dynamic tensor, shape tensor) + * pairs. + */ + std::optional relatedDescriptorIndex; + + /** + * @brief The friendly name of the node extracted from the IR model. + * @details In some cases, this field is required for constructing a dummy model which uses the same input/output + * metadata as the original IR model. + * + * This field may be empty if the I/O entry is not found in the original IR model (i.e. the entry was added by the + * compiler). + */ + std::string nodeFriendlyName; + + /** + * @brief The names of the output tensors extracted from the IR model. + * @details In some cases, this field is required for constructing a dummy model which uses the same input/output + * metadata as the original IR model. + * + * This field may be empty if the I/O entry is not found in the original IR model (i.e. the entry was added by the + * compiler). + */ std::unordered_set outputTensorNames; - ov::element::Type_t precision; - ov::PartialShape originalShape; - ov::PartialShape transposedShape; -}; -/** - * @brief A helper map to represent descriptions for inputs and outputs - * of a network - */ -using IONodeDescriptorMap = std::unordered_map; + /** + * @brief The shape extracted from the IR model. + * @details The values may differ from the ones found in "shapeFromCompiler" if batching is to be handled by the + * plugin. + * + * This field may be empty if the I/O entry is not found in the original IR model (i.e. the entry was added + * by the compiler). + */ + std::optional shapeFromIRModel = std::nullopt; +}; struct NetworkMetadata final { std::string name; - std::vector inputNames; - std::vector outputNames; - std::vector stateNames; - std::vector shapeNames; + std::vector inputs; + std::vector outputs; + std::vector profilingOutputs; - IONodeDescriptorMap parameters; - IONodeDescriptorMap results; - IONodeDescriptorMap states; - IONodeDescriptorMap shapes; - IONodeDescriptorMap profilingOutputs; + size_t numStreams = 1; - std::unordered_map inputOrder; - std::unordered_map outputOrder; + /** + * @brief Binds the (state input, state output) and (dynamic tensor, shape tensor) pairs using the + * "relatedDescriptorIndex" attribute. + * @details For state inputs, the "relatedDescriptorIndex" value is set to the index of the output which bears the + * same name. The reverse is also applied. + * + * For shape tensors, the lookup is performed in the same container (inputs or outputs). The value is once again set + * to the index of the entry which bears the same name. + */ + void bindRelatedDescriptors(); - int numStreams = 1; -}; +}; // namespace intel_npu /** * @struct NetworkDescription diff --git a/src/plugins/intel_npu/src/al/include/sync_infer_request.hpp b/src/plugins/intel_npu/src/al/include/sync_infer_request.hpp index 7272a67faafff1..bf9e0f20af3b78 100644 --- a/src/plugins/intel_npu/src/al/include/sync_infer_request.hpp +++ b/src/plugins/intel_npu/src/al/include/sync_infer_request.hpp @@ -92,56 +92,32 @@ class SyncInferRequest : public ov::IInferRequest { */ void initialize_states(); +protected: /** - * @return The state tensors accessible by their names. - */ - std::unordered_map>& get_variable_states() { - return _variableStates; - } - - /** - * @return The names used by the inputs in the order registered inside the model. - */ - std::vector get_input_names() { - return _metadata.inputNames; - } - - /** - * @return The names used by the outputs in the order registered inside the model. - */ - std::vector get_output_names() { - return _metadata.outputNames; - } - - /** - * @return The names used by the state variables in the order registered inside the model. + * @see ov::ISyncInferRequest */ - std::vector get_state_names() { - return _metadata.stateNames; - } + struct FoundPort { + size_t idx; + enum class Type { NOT_FOUND = 0, INPUT, OUTPUT } type; - /** - * @return The names used by the shape variables in the order registered inside the model. - */ - std::vector get_shape_names() { - return _metadata.shapeNames; - } + bool found() { + return type != Type::NOT_FOUND; + } + bool is_input() { + return type == Type::INPUT; + } + bool is_output() { + return !is_input(); + } + }; /** - * @return A map holding references towards all tensors used by the current inference request object. + * @brief Finds input or output port + * @return structure which contains index of Input/Output or report that port wasn't found + * @see ov::ISyncInferRequest */ - std::unordered_map>& get_all_tensors() { - return _allTensors; - } + FoundPort find_port(const ov::Output& port) const; - /** - * @return A map holding references towards all shapes tensors used by the current inference request object. - */ - std::unordered_map>& get_shapes_tensors() { - return _shapesTensors; - } - -protected: /** * @brief Basic checks for input/output tensor * @@ -163,32 +139,19 @@ class SyncInferRequest : public ov::IInferRequest { virtual void check_network_precision(const ov::element::Type_t precision) const = 0; /** - * @brief Indicates a kind of provided tensor. Marks special tensors, used for internal implementation - */ - enum class TensorType { InputOrOutput, Shape, State }; - - /** - * @brief Allocates a tensor on host and stores the reference inside the "_allTensors" attribute. If a buffer - * address is provided, then the tensor is built upon it and no additional data buffer is allocated. - * @param tensorName The name by which the tensor shall be identified + * @brief Allocates a tensor on host and stores the reference inside multiple attributes. * @param descriptor Tensor's metadata - * @param isState If true, the tensor shall also be stored inside the state variables map. In this case, adding the - * tensor to this structure would be required in order to correctly answer the state queries. + * @param index The index which the allocated tensor shall use. + * @param isInput Determines the containers in which the newly allocated tensors will be stored. * @param allocator If provided, the tensor uses the custom allocator instead of using the default one. + * @param batchSize If provided, the value of the shape on the 0th axis is overriden with this value. + * @return Pointer towards the allocated tensor */ - void allocate_tensor(std::string tensorName, - const IONodeDescriptor& descriptor, - TensorType tensorType = TensorType::InputOrOutput, - const ov::Allocator& allocator = {}) const; - - // Mutable to return reference to ov::Tensor - mutable std::unordered_map> _allTensors; - mutable std::unordered_map> _shapesTensors; - // A copy of each tensor is needed to maintain the original L0 memory allocation in case the user provides another - // memory area for the tensor. - mutable std::unordered_map> _copyAllTensors; - - mutable std::unordered_map> _variableStates; + std::shared_ptr allocate_tensor(const IODescriptor& descriptor, + const size_t index, + const bool isInput, + const ov::Allocator& allocator = {}, + const std::optional batchSize = std::nullopt) const; // This is intel_npu::ICompiledModel pointer, but need to use OV base class because // ov::IInferRequest::get_compiled_model returns a refernce to shared_ptr! @@ -196,12 +159,20 @@ class SyncInferRequest : public ov::IInferRequest { NetworkMetadata _metadata; - // Stored in order to avoid additional processing when launching inferences - std::vector _inputAndStateInputNames; - std::vector _outputAndStateOutputNames; + mutable std::vector> _userInputTensors; + mutable std::vector> _userOutputTensors; - std::unordered_map _nodeNameToLegacyName; - std::unordered_map _legacyNameToNodeName; + mutable std::vector> _variableStates; + + /** + * @see ov::ISyncInferRequest + */ + mutable std::unordered_map _cachedPorts; + + /** + * @see ov::ISyncInferRequest + */ + mutable std::mutex _cacheMutex; }; } // namespace intel_npu diff --git a/src/plugins/intel_npu/src/al/src/icompiler.cpp b/src/plugins/intel_npu/src/al/src/icompiler.cpp new file mode 100644 index 00000000000000..632a466d17d442 --- /dev/null +++ b/src/plugins/intel_npu/src/al/src/icompiler.cpp @@ -0,0 +1,67 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "intel_npu/al/icompiler.hpp" + +namespace intel_npu { + +void NetworkMetadata::bindRelatedDescriptors() { + size_t ioIndex = 0; + + for (IODescriptor& input : inputs) { + if (input.relatedDescriptorIndex.has_value()) { + ++ioIndex; + continue; + } + + if (input.isStateInput) { + const auto relatedDescriptorIterator = + std::find_if(outputs.begin(), outputs.end(), [&](const IODescriptor& output) { + return output.isStateOutput && (output.nameFromCompiler == input.nameFromCompiler); + }); + + if (relatedDescriptorIterator != outputs.end()) { + input.relatedDescriptorIndex = std::distance(outputs.begin(), relatedDescriptorIterator); + outputs.at(*input.relatedDescriptorIndex).relatedDescriptorIndex = ioIndex; + } + } else if (input.isShapeTensor) { + const auto relatedDescriptorIterator = + std::find_if(inputs.begin(), inputs.end(), [&](const IODescriptor& candidate) { + return !candidate.isShapeTensor && (candidate.nameFromCompiler == input.nameFromCompiler); + }); + + if (relatedDescriptorIterator != inputs.end()) { + input.relatedDescriptorIndex = std::distance(inputs.begin(), relatedDescriptorIterator); + inputs.at(*input.relatedDescriptorIndex).relatedDescriptorIndex = ioIndex; + } + } + + ++ioIndex; + } + + ioIndex = 0; + + for (IODescriptor& output : outputs) { + if (output.relatedDescriptorIndex.has_value()) { + ++ioIndex; + continue; + } + + if (output.isShapeTensor) { + const auto relatedDescriptorIterator = + std::find_if(outputs.begin(), outputs.end(), [&](const IODescriptor& candidate) { + return !candidate.isShapeTensor && (candidate.nameFromCompiler == output.nameFromCompiler); + }); + + if (relatedDescriptorIterator != outputs.end()) { + output.relatedDescriptorIndex = std::distance(outputs.begin(), relatedDescriptorIterator); + outputs.at(*output.relatedDescriptorIndex).relatedDescriptorIndex = ioIndex; + } + } + + ++ioIndex; + } +} + +} // namespace intel_npu diff --git a/src/plugins/intel_npu/src/al/src/sync_infer_request.cpp b/src/plugins/intel_npu/src/al/src/sync_infer_request.cpp index 04b930b7ca63ff..08d5b518b98cad 100644 --- a/src/plugins/intel_npu/src/al/src/sync_infer_request.cpp +++ b/src/plugins/intel_npu/src/al/src/sync_infer_request.cpp @@ -8,53 +8,90 @@ #include "openvino/op/util/op_types.hpp" #include "openvino/runtime/make_tensor.hpp" #include "openvino/runtime/plugin_itt.hpp" +#include "openvino/util/common_util.hpp" #include "transformations/utils/utils.hpp" +namespace { + +constexpr size_t BATCH_AXIS = 0; + +} + namespace intel_npu { SyncInferRequest::SyncInferRequest(const std::shared_ptr& compiledModel) : _compiledModel(compiledModel), - _metadata(compiledModel->get_network_metadata()) { + _metadata(compiledModel->get_network_metadata()), + _userInputTensors(_metadata.inputs.size(), nullptr), + _userOutputTensors(_metadata.outputs.size(), nullptr) { OPENVINO_ASSERT(_compiledModel); - const std::vector>& outputs = get_outputs(); - - if (outputs.empty()) { + if (get_outputs().empty()) { OPENVINO_THROW("Inference request creation: no output found for network " + _metadata.name); } - // Map the node names to the legacy ones used by the I/O tensors in order to allow an easier access to the tensors' - // contents - for (const auto& [name, resultDescriptor] : _metadata.results) { - _nodeNameToLegacyName[name] = resultDescriptor.legacyName; - _legacyNameToNodeName[resultDescriptor.legacyName] = name; - } - - _inputAndStateInputNames = _metadata.inputNames; - _outputAndStateOutputNames = _metadata.outputNames; - - for (const std::string& stateName : _metadata.stateNames) { - // State variables shall be identified by specific prefixes in order to avoid a potential tensor name collision - _inputAndStateInputNames.push_back(READVALUE_PREFIX + stateName); - _outputAndStateOutputNames.push_back(ASSIGN_PREFIX + stateName); + // Create map of empty tensors and cache ports from the compiled model + // See the ov::ISyncInferRequest constructor + auto portType = SyncInferRequest::FoundPort::Type::INPUT; + for (const auto& ports : {get_inputs(), get_outputs()}) { + for (size_t i = 0; i < ports.size(); i++) { + const auto& port = ports[i]; + size_t portHash = ov::util::hash_combine(std::vector{std::hash()(port.get_node()), + std::hash()(port.get_index())}); + _cachedPorts[portHash] = {i, portType}; + } + portType = SyncInferRequest::FoundPort::Type::OUTPUT; } +} - const auto contains = [](const auto& container, const auto& value) { - return std::find(container.begin(), container.end(), value) != container.end(); +SyncInferRequest::FoundPort SyncInferRequest::find_port(const ov::Output& port) const { + // check if the tensor names of target port is a subset of source port's tensor names + auto check_tensor_names = [](const std::unordered_set& source, + const std::unordered_set& target) { + for (auto const& name : target) { + if (source.find(name) == source.end()) { + return false; + } + } + return true; }; - for (const auto& shapeName : _metadata.shapeNames) { - if (contains(_inputAndStateInputNames, shapeName)) { - _inputAndStateInputNames.push_back(SHAPE_TENSOR_PREFIX + shapeName); + // This function is hotspot, need optimization. + auto check_nodes = [](const ov::Node* node1, const ov::Node* node2) { + return node1 == node2 || + (node1->outputs().size() == node2->outputs().size() && + node1->inputs().size() == node2->inputs().size() && node1->get_type_info() == node2->get_type_info() && + node1->get_friendly_name() == node2->get_friendly_name()); + }; + // Find port without caching work slow because we need each time iterate over all ports and compare different + // strings So use WA with caching in order to make 2+ calls for the same ports faster. + // Calculate hash for the port + size_t port_hash = ov::util::hash_combine( + std::vector{std::hash()(port.get_node()), std::hash()(port.get_index())}); + { + std::lock_guard lock(_cacheMutex); + if (_cachedPorts.find(port_hash) != _cachedPorts.end()) { + // Cached port for the hash was found + return _cachedPorts[port_hash]; } - - const auto& shapeNameMatch = _legacyNameToNodeName.find(shapeName); - if (shapeNameMatch != _legacyNameToNodeName.end()) { - if (contains(_outputAndStateOutputNames, shapeNameMatch->second)) { - _outputAndStateOutputNames.push_back(SHAPE_TENSOR_PREFIX + shapeName); + } + SyncInferRequest::FoundPort::Type type = SyncInferRequest::FoundPort::Type::INPUT; + for (const auto& ports : {get_inputs(), get_outputs()}) { + for (size_t i = 0; i < ports.size(); i++) { + // The order of the arguments might matter for the "check_tensor_names" call. If the "CompiledModel" object + // was obtained via "import_model", then the number of tensor names could be cut to 32 due to limitations + // inside the NPU stack. For this particular scenario, we are checking if all tensor names corresponding to + // the "CompiledModel" are found in the provided port instead of doing the opposite. + if (ports[i].get_index() == port.get_index() && check_nodes(ports[i].get_node(), port.get_node()) && + check_tensor_names(port.get_names(), ports[i].get_names())) { + std::lock_guard lock(_cacheMutex); + _cachedPorts[port_hash] = {i, type}; + return _cachedPorts[port_hash]; } } + type = SyncInferRequest::FoundPort::Type::OUTPUT; } + return {0, SyncInferRequest::FoundPort::Type::NOT_FOUND}; } const std::vector>& SyncInferRequest::get_inputs() const { @@ -70,34 +107,41 @@ const std::shared_ptr& SyncInferRequest::get_compiled_ } void SyncInferRequest::initialize_states() { - for (const std::string& stateName : _metadata.stateNames) { - _variableStates.at(stateName)->reset(); + for (const ov::SoPtr& variableState : _variableStates) { + variableState->reset(); } } std::vector> SyncInferRequest::query_state() const { - std::vector> queryResult; - - for (const std::string& stateName : _metadata.stateNames) { - queryResult.push_back(_variableStates.at(stateName)); - } - - return queryResult; + return _variableStates; } ov::SoPtr SyncInferRequest::get_tensor(const ov::Output& port) const { - return _allTensors.at(port.get_node()->get_friendly_name()); + auto foundPort = find_port(port); + OPENVINO_ASSERT(foundPort.found(), "Cannot find tensor for port ", port); + + if (foundPort.is_input()) { + return _userInputTensors.at(foundPort.idx); + } + return _userOutputTensors.at(foundPort.idx); } void SyncInferRequest::set_tensor(const ov::Output& port, const ov::SoPtr& tensor) { OV_ITT_SCOPED_TASK(ov::itt::domains::Plugin, "set_tensor"); + + auto foundPort = find_port(port); + OPENVINO_ASSERT(foundPort.found(), "Cannot find tensor for port ", port); try { check_tensor(port, tensor); } catch (const ov::Exception& ex) { OPENVINO_THROW("Failed to set tensor. ", ex.what()); } - _allTensors[port.get_node()->get_friendly_name()] = tensor._ptr; + if (foundPort.is_input()) { + _userInputTensors.at(foundPort.idx) = tensor._ptr; + } else { + _userOutputTensors.at(foundPort.idx) = tensor._ptr; + } } std::vector> SyncInferRequest::get_tensors(const ov::Output& /*port*/) const { @@ -151,54 +195,59 @@ void SyncInferRequest::check_tensor(const ov::Output& port, void SyncInferRequest::check_tensors() const { const auto& inputs = _compiledModel->inputs(); for (size_t i = 0; i < inputs.size(); i++) { - if (_allTensors.find(inputs[i].get_node()->get_friendly_name()) != _allTensors.end()) { - check_tensor(inputs[i], _allTensors.at(inputs[i].get_node()->get_friendly_name())); + if (_userInputTensors.at(i)) { + check_tensor(inputs[i], _userInputTensors.at(i)); } } const auto& outputs = _compiledModel->outputs(); for (size_t i = 0; i < outputs.size(); i++) { - if (_allTensors.find(outputs[i].get_node()->get_friendly_name()) != _allTensors.end()) { - check_tensor(outputs[i], _allTensors.at(outputs[i].get_node()->get_friendly_name())); + if (_userOutputTensors.at(i)) { + check_tensor(outputs[i], _userOutputTensors.at(i)); } } } -void SyncInferRequest::allocate_tensor(std::string tensorName, - const IONodeDescriptor& descriptor, - TensorType tensorType, - const ov::Allocator& allocator) const { +std::shared_ptr SyncInferRequest::allocate_tensor(const IODescriptor& descriptor, + const size_t index, + const bool isInput, + const ov::Allocator& allocator, + const std::optional batchSize) const { + check_network_precision(descriptor.precision); + std::shared_ptr tensor; + ov::Shape allocatedTensorShape = descriptor.shapeFromCompiler.get_max_shape(); - check_network_precision(descriptor.precision); + if (batchSize.has_value()) { + allocatedTensorShape[BATCH_AXIS] = *batchSize; + } - if (allocator) { - tensor = ov::make_tensor(descriptor.precision, descriptor.transposedShape.get_max_shape(), allocator); + if (descriptor.isStateOutput) { + // Only one buffer is required for each (state input, state output) pair, acting as an input before running the + // inference and as an output after performing it. Thus both the "state input" and "state output" entries shall + // point to the same buffer. + OPENVINO_ASSERT(descriptor.relatedDescriptorIndex.has_value(), + "The link between state descriptors is missing, state name: ", + descriptor.nameFromCompiler); + tensor = _userInputTensors.at(*descriptor.relatedDescriptorIndex); + } else if (allocator) { + tensor = ov::make_tensor(descriptor.precision, allocatedTensorShape, allocator); } else { - tensor = ov::make_tensor(descriptor.precision, descriptor.transposedShape.get_max_shape()); + tensor = ov::make_tensor(descriptor.precision, allocatedTensorShape); } - if (tensorType == TensorType::Shape) { - _shapesTensors[tensorName] = tensor; - tensorName = SHAPE_TENSOR_PREFIX + tensorName; - } + if (isInput) { + if (_userInputTensors.at(index) == nullptr) { + _userInputTensors.at(index) = tensor; + } - if (tensorType == TensorType::State) { - _variableStates[tensorName] = std::make_shared(tensorName, tensor); - - // State variables shall be identified by specific prefixes in order to avoid a potential tensor name collision. - // Additionally, only one buffer is required in the whole flow, acting as an input before running the inference - // and as an output after performing it. Thus both the "state input" and "state output" entries shall point to - // the same buffer. - _copyAllTensors[READVALUE_PREFIX + tensorName] = std::move(tensor); - _copyAllTensors[ASSIGN_PREFIX + tensorName] = _copyAllTensors[READVALUE_PREFIX + tensorName]; - _allTensors[READVALUE_PREFIX + tensorName] = _copyAllTensors[READVALUE_PREFIX + tensorName]; - _allTensors[ASSIGN_PREFIX + tensorName] = _copyAllTensors[READVALUE_PREFIX + tensorName]; - } else { - _copyAllTensors[tensorName] = std::move(tensor); - if (_allTensors.find(tensorName) == _allTensors.end()) { - _allTensors[tensorName] = _copyAllTensors[tensorName]; + if (descriptor.isStateInput) { + _variableStates.push_back(std::make_shared(descriptor.nameFromCompiler, tensor)); } + } else if (_userOutputTensors.at(index) == nullptr) { + _userOutputTensors.at(index) = tensor; } + + return tensor; } } // namespace intel_npu diff --git a/src/plugins/intel_npu/src/backend/include/zero_executor.hpp b/src/plugins/intel_npu/src/backend/include/zero_executor.hpp index 7ab180f6ced5e4..10b5c66233f369 100644 --- a/src/plugins/intel_npu/src/backend/include/zero_executor.hpp +++ b/src/plugins/intel_npu/src/backend/include/zero_executor.hpp @@ -53,11 +53,11 @@ class ZeroExecutor final : public IExecutor { inline const uint32_t& get_group_ordinal() const { return _group_ordinal; } - inline const std::unordered_map& inputs_desc_map() const { - return _inputs_desc_map; + inline const std::vector& get_input_descriptors() const { + return _input_descriptors; } - inline const std::unordered_map& outputs_desc_map() const { - return _outputs_desc_map; + inline const std::vector& get_output_descriptors() const { + return _output_descriptors; } private: @@ -74,8 +74,8 @@ class ZeroExecutor final : public IExecutor { ze_graph_handle_t _graph = nullptr; ze_graph_properties_t _props{}; - std::unordered_map _inputs_desc_map; - std::unordered_map _outputs_desc_map; + std::vector _input_descriptors; + std::vector _output_descriptors; std::array, stage::COUNT> _command_queues; diff --git a/src/plugins/intel_npu/src/backend/include/zero_infer_request.hpp b/src/plugins/intel_npu/src/backend/include/zero_infer_request.hpp index 8e9262fd2374c4..f6d15d2c2aed5e 100644 --- a/src/plugins/intel_npu/src/backend/include/zero_infer_request.hpp +++ b/src/plugins/intel_npu/src/backend/include/zero_infer_request.hpp @@ -16,10 +16,6 @@ #include "zero_utils.hpp" #include "zero_wrappers.hpp" -namespace { -constexpr std::size_t DEFAULT_BATCH_SIZE = 1; -} // namespace - namespace intel_npu { class ZeroInferRequest final : public SyncInferRequest { @@ -41,21 +37,40 @@ class ZeroInferRequest final : public SyncInferRequest { std::vector get_profiling_info() const override; std::vector get_raw_profiling_data() const; + /** + * @brief Determines if batching can be addressed inside the plugin. In the positive case, the batch size used by + * the model will also be deduced and returned. + * @details Batching can be handled by the plugin only if: + * - The batch axis is the first axis. + * - The batch size received by the compiler takes the default value of 1. + * - The batch size found in the IR model matches for all inputs/outputs and takes a value different than the + * default one. + * + * If any of the previous conditions is not fulfilled, the functon will return the default batch size, thus no + * custom algorithm will be applied inside the plugin in order to address batching. + * + * @param metadata Metadata containing the shape values as seen by both the compiler and IR model. These will + * ultimately be used for determining the batch size. + * @returns The batch size deduced by the algorithm or the default value of 1 if batching cannot be performed inside + * the plugin. + */ + std::optional getBatchSize(const NetworkMetadata& metadata); + /** * @brief Check the received tensor and set the Level Zero tensor accordingly * @param tensor Reference to a tensor. - * @param name Friendly name of the tensor. - * @param isParameter True if tensor is a parameter. + * @param index The index corresponding to the position of the tensor inside the I/O structures. + * @param isInput Used for identifying the structures to which the tensor belongs. */ - void set_tensor_data(std::shared_ptr tensor, const std::string& name, bool isParameter); + void set_tensor_data(const std::shared_ptr tensor, const size_t index, const bool isInput); /** * @brief Check the received remote tensor and copy it to the Level Zero tensor * @param tensor Reference to a tensor. - * @param name Friendly name of the tensor. - * @param isParameter True if tensor is a parameter. + * @param index The index corresponding to the position of the tensor inside the I/O structures. + * @param isInput Used for identifying the structures to which the tensor belongs. */ - void set_remote_tensor_data(std::shared_ptr tensor, const std::string& name, bool isParameter); + void set_remote_tensor_data(const std::shared_ptr tensor, const size_t index, const bool isInput); void check_network_precision(const ov::element::Type_t precision) const override; void create_pipeline(); @@ -66,6 +81,14 @@ class ZeroInferRequest final : public SyncInferRequest { const Config _config; Logger _logger; + // A copy of each tensor is needed to maintain the original L0 memory allocation in case the user provides another + // memory area for the tensor. + mutable std::vector> _levelZeroInputTensors; + mutable std::vector> _levelZeroOutputTensors; + + mutable std::vector> _inputTensorsData; + mutable std::vector> _outputTensorsData; + ze_device_properties_t _properties = {}; std::shared_ptr _inputAllocator; std::shared_ptr _outputAllocator; @@ -74,11 +97,22 @@ class ZeroInferRequest final : public SyncInferRequest { zeroProfiling::ProfilingQuery _profilingQuery; std::shared_ptr _npuProfiling; std::unique_ptr _pipeline; - mutable std::unordered_map _tensorsData; - // If batching is handled on the compiler side then batching on the plugin shall be set to 1, we don't do any - // specific operations on the plugin in this case. - size_t _batchSize = DEFAULT_BATCH_SIZE; + /** + * @brief Indicates how many command lists will be used inside the pipeline. + * @details Leveraging multiple command lists implies distributing the input/output buffers accross the batch axis + * between these lists. + * + * If batching is handled on compiler's side then a single command list shall be used, we don't do any + * specific operation inside the plugin in this case. + */ + size_t _numberOfCommandLists = 1; + + /** + * @brief The batch size used by the corresponding model. + * @details The attribute contains a value only if the plugin performs the batches splitting operation. + */ + std::optional _batchSize = std::nullopt; bool _pipelineIsCreated = false; }; diff --git a/src/plugins/intel_npu/src/backend/include/zero_memory.hpp b/src/plugins/intel_npu/src/backend/include/zero_memory.hpp index 93c9252aecd23b..a0110c3c74e4e7 100644 --- a/src/plugins/intel_npu/src/backend/include/zero_memory.hpp +++ b/src/plugins/intel_npu/src/backend/include/zero_memory.hpp @@ -96,7 +96,7 @@ class HostMemAllocator final { struct MemoryManagementUnit { MemoryManagementUnit() = default; - void appendArgument(const std::string& name, const std::size_t argSize); + void appendArgument(const std::size_t argSize); void allocate(const ze_device_handle_t device_handle, const ze_context_handle_t context); @@ -104,7 +104,7 @@ struct MemoryManagementUnit { const void* getDeviceMemRegion() const; void* getDeviceMemRegion(); - void* getDevicePtr(const std::string& name); + void* getDevicePtr(const size_t index); bool checkHostPtr(const void* ptr) const; @@ -112,7 +112,7 @@ struct MemoryManagementUnit { std::size_t _size = 0; std::unique_ptr _device; - std::map _offsets; + std::vector _offsets; static const std::size_t alignment = STANDARD_PAGE_SIZE; }; diff --git a/src/plugins/intel_npu/src/backend/include/zero_pipeline.hpp b/src/plugins/intel_npu/src/backend/include/zero_pipeline.hpp index b8724dcdd53f73..ad946579f11c84 100644 --- a/src/plugins/intel_npu/src/backend/include/zero_pipeline.hpp +++ b/src/plugins/intel_npu/src/backend/include/zero_pipeline.hpp @@ -27,11 +27,11 @@ struct Pipeline { Pipeline& operator=(Pipeline&&) = delete; virtual ~Pipeline() = default; - virtual void push(size_t batch_index) = 0; - virtual void pull(size_t batch_index) = 0; - virtual void reset(size_t batch_index) const = 0; + virtual void push() = 0; + virtual void pull() = 0; + virtual void reset() const = 0; - virtual void updateCommandList(const TensorData& tensors_data, uint32_t index, size_t batch_size) = 0; + virtual void updateCommandList(const TensorData& tensorsData, const uint32_t index) = 0; protected: zeroMemory::MemoryManagementUnit _deviceInputs; @@ -43,6 +43,7 @@ std::unique_ptr makePipeline(const std::shared_ptr& e zeroProfiling::ProfilingPool& profiling_pool, zeroProfiling::ProfilingQuery& profiling_query, std::shared_ptr npu_profiling, - std::unordered_map& tensors_data, - const size_t batch_size); + const std::vector>& inputTensorsData, + const std::vector>& outputTensorsData, + const size_t numberOfCommandLists); } // namespace intel_npu diff --git a/src/plugins/intel_npu/src/backend/src/zero_executor.cpp b/src/plugins/intel_npu/src/backend/src/zero_executor.cpp index 16e410c35ed382..194ce7024ac6a4 100644 --- a/src/plugins/intel_npu/src/backend/src/zero_executor.cpp +++ b/src/plugins/intel_npu/src/backend/src/zero_executor.cpp @@ -95,22 +95,10 @@ ZeroExecutor::ZeroExecutor(const std::shared_ptr& i zeroUtils::throwOnFail("pfnGetArgumentProperties3", _graph_ddi_table_ext->pfnGetArgumentProperties3(_graph, index, &arg3)); - if (ZE_GRAPH_ARGUMENT_TYPE_INPUT == arg3.type) { - if (isStateInputName(arg3.name) || isShapeTensorName(arg3.name)) { - _inputs_desc_map.emplace(std::make_pair(std::string(arg3.name), ArgumentDescriptor{arg3, index})); - - } else { - _inputs_desc_map.emplace( - std::make_pair(std::string(arg3.debug_friendly_name), ArgumentDescriptor{arg3, index})); - } + if (arg3.type == ZE_GRAPH_ARGUMENT_TYPE_INPUT) { + _input_descriptors.push_back(ArgumentDescriptor{arg3, index}); } else { - if (isStateOutputName(arg3.name) || isShapeTensorName(arg3.name)) { - _outputs_desc_map.emplace(std::make_pair(std::string(arg3.name), ArgumentDescriptor{arg3, index})); - - } else { - _outputs_desc_map.emplace( - std::make_pair(std::string(arg3.debug_friendly_name), ArgumentDescriptor{arg3, index})); - } + _output_descriptors.push_back(ArgumentDescriptor{arg3, index}); } } diff --git a/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp b/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp index 7c36033568591a..27c1aac7eeeff5 100644 --- a/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp +++ b/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp @@ -20,142 +20,124 @@ using namespace intel_npu; namespace { constexpr std::size_t BATCH_AXIS = 0; +constexpr std::size_t DEFAULT_BATCH_SIZE = 1; +constexpr bool INPUT = true; +constexpr bool OUTPUT = false; /** * @brief Checks that the metadata of the provided descriptor corresponds to the values registered in the Level Zero * structure. - * @param nodeDescriptor The OpenVINO API specific I/O descriptor which shall be compared. + * @param ioDescriptor The OpenVINO API specific I/O descriptor which shall be compared. * @param zeDescriptor The Level Zero specific structure used for comparison. - * @param name Tensor identifier used for error logging. */ -void checkLevelZeroAttributesMatch(const IONodeDescriptor& nodeDescriptor, - const ZeroExecutor::ArgumentDescriptor& zeDescriptor, - const std::string& name) { - const ov::element::Type_t ovPrecision = nodeDescriptor.precision; - const ze_graph_argument_precision_t zePrecision = zeDescriptor.info.devicePrecision; - - if (zeroUtils::getZePrecision(ovPrecision) != zePrecision) { - OPENVINO_THROW("Precision mismatch for parameter " + name); - } - - const std::vector& ovDimensions = nodeDescriptor.transposedShape.get_max_shape(); - - if (ovDimensions.size() > ZE_MAX_GRAPH_ARGUMENT_DIMENSIONS_SIZE) { - OPENVINO_THROW( - "Maximum number of dimensions supported: " + std::to_string(ZE_MAX_GRAPH_ARGUMENT_DIMENSIONS_SIZE) + '\n' + - "Given: " + std::to_string(ovDimensions.size())); +void checkLevelZeroAttributesMatch(const IODescriptor& ioDescriptor, + const ZeroExecutor::ArgumentDescriptor& zeDescriptor) { + std::string zeDescriptorName = zeDescriptor.info.name; + + if (isStateInputName(zeDescriptorName)) { + zeDescriptorName = zeDescriptorName.substr(READVALUE_PREFIX.length()); + } else if (isStateOutputName(zeDescriptorName)) { + zeDescriptorName = zeDescriptorName.substr(ASSIGN_PREFIX.length()); + } else if (isShapeTensorName(zeDescriptorName)) { + zeDescriptorName = zeDescriptorName.substr(SHAPE_TENSOR_PREFIX.length()); + } + + OPENVINO_ASSERT(ioDescriptor.nameFromCompiler == zeDescriptorName, + "Name mismatch between the I/O structure used internally and its Level Zero correspondent: ", + ioDescriptor.nameFromCompiler, + " vs. ", + zeDescriptorName, + ". The I/O order may have been altered, which could lead to an erroneous behavior."); + OPENVINO_ASSERT(zeroUtils::getZePrecision(ioDescriptor.precision) == zeDescriptor.info.devicePrecision, + "Precision mismatch for input/output named " + ioDescriptor.nameFromCompiler); + + const std::vector& ovDimensions = ioDescriptor.shapeFromCompiler.get_max_shape(); + OPENVINO_ASSERT(ovDimensions.size() <= ZE_MAX_GRAPH_ARGUMENT_DIMENSIONS_SIZE, + "Maximum number of dimensions supported: " + std::to_string(ZE_MAX_GRAPH_ARGUMENT_DIMENSIONS_SIZE) + + '\n' + "Given: " + std::to_string(ovDimensions.size())); + + for (size_t index = 0; index < ovDimensions.size(); ++index) { + OPENVINO_ASSERT( + ioDescriptor.shapeFromCompiler.is_dynamic() || ovDimensions[index] == zeDescriptor.info.dims[index], + "Shape mismatch for input/output named " + ioDescriptor.nameFromCompiler); } - for (size_t index = ovDimensions.size(); index < ZE_MAX_GRAPH_ARGUMENT_DIMENSIONS_SIZE; ++index) { - if (zeDescriptor.info.dims[index] != 0 && zeDescriptor.info.dims[index] != 1) { - OPENVINO_THROW("Shape mismatch for parameter " + name); - } + OPENVINO_ASSERT(zeDescriptor.info.dims[index] == 0 || zeDescriptor.info.dims[index] == 1, + "Shape mismatch for input/output named " + ioDescriptor.nameFromCompiler); } +} - for (size_t index = 1; index < ovDimensions.size(); ++index) { - if (ovDimensions[index] != zeDescriptor.info.dims[index] && !nodeDescriptor.transposedShape.is_dynamic()) { - OPENVINO_THROW("Shape mismatch for parameter " + name); - } +template +Type extract_object(const ov::AnyMap& params, const ov::Property& p) { + auto itrHandle = params.find(p.name()); + ov::Any res = nullptr; + if (itrHandle == params.end()) { + OPENVINO_THROW("No parameter ", p.name(), " found in parameters map"); } + res = itrHandle->second; + return res.as(); } -std::optional getBatchSizeForNode(const IONodeDescriptor& nodeDescriptor, - const ZeroExecutor::ArgumentDescriptor& zeDescriptor) { - Logger logger("GetBatchSizeForNode", Logger::global().level()); +} // namespace - if (nodeDescriptor.originalShape.rank().get_length() == 0) { - logger.warning("Networks with empty shapes are not supported when batching is handled by the plugin"); +std::optional ZeroInferRequest::getBatchSize(const NetworkMetadata& metadata) { + if (!metadata.outputs.at(0).shapeFromIRModel.has_value()) { + _logger.warning("Batching on the plugin is not used, batching is handled by the compiler"); return std::nullopt; } - if (nodeDescriptor.originalShape.is_dynamic()) { - logger.warning("Dynamic networks are not supported when batching is handled by the plugin"); + const ov::PartialShape& firstOutputShape = *metadata.outputs.at(0).shapeFromIRModel; + if (firstOutputShape.is_dynamic()) { + _logger.warning("Networks using dynamic shapes are not supported when batching is handled by the plugin"); return std::nullopt; } - - const std::vector& ovDimensions = nodeDescriptor.originalShape.get_shape(); - - if (ovDimensions[BATCH_AXIS] == zeDescriptor.info.dims[BATCH_AXIS] && - ovDimensions[BATCH_AXIS] != DEFAULT_BATCH_SIZE) { - logger.info("Batching on the plugin is not used, batching is handled by the compiler"); + if (firstOutputShape.rank().get_length() == 0) { + _logger.warning( + "Networks using rank 0 shapes for inputs/outputs are not supported when batching is handled by the plugin"); return std::nullopt; } - if (zeDescriptor.info.dims[BATCH_AXIS] == DEFAULT_BATCH_SIZE) { - return ovDimensions[BATCH_AXIS]; + const size_t candidateBatchSize = firstOutputShape[BATCH_AXIS].get_length(); + if (candidateBatchSize == 0 || candidateBatchSize == DEFAULT_BATCH_SIZE) { + _logger.warning("Batching on the plugin is not used, batching is handled by the compiler"); + return std::nullopt; } - return DEFAULT_BATCH_SIZE; -} - -/** - * @brief Get the batch size to be handled on the plugin. - * @details Analyze the shape from the compiled model with the shape from the originalShape and get the originalShape if - * it is different. - * @param metadata A map to represent descriptions for inputs and outputs of a network. - * @param executorInputDescriptors A map to represent Level zero inputs descriptors. - * @param executorOutputDescriptors A map to represent Level zero outputs descriptors. - */ - -std::optional getBatchSize( - const NetworkMetadata& metadata, - const std::unordered_map& executorInputDescriptors, - const std::unordered_map& executorOutputDescriptors) { - std::set batch_size; - - Logger logger("getBatchSize", Logger::global().level()); + auto checkDescriptorsUseCandidateBatchSize = [candidateBatchSize](const std::vector& descriptors) { + for (const IODescriptor& descriptor : descriptors) { + OPENVINO_ASSERT(descriptor.shapeFromIRModel.has_value(), + "Missing value for the \"shapeFromIRModel\" attribute, I/O descriptor"); - for (const std::string& inputName : metadata.inputNames) { - auto batchSizeForNode = - getBatchSizeForNode(metadata.parameters.at(inputName), executorInputDescriptors.at(inputName)); + const ov::PartialShape& shapeFromCompiler = descriptor.shapeFromCompiler; + const ov::PartialShape& shapeFromIRModel = *descriptor.shapeFromIRModel; - if (batchSizeForNode.has_value()) { - batch_size.insert(*batchSizeForNode); - } else { - return std::nullopt; - } - } + if (shapeFromCompiler.is_dynamic() || shapeFromCompiler.rank().get_length() == 0 || + *shapeFromCompiler.begin() != DEFAULT_BATCH_SIZE) { + return false; + } - for (const std::string& outputName : metadata.outputNames) { - if (!executorOutputDescriptors.count(outputName)) { - OPENVINO_THROW("Invalid graph output descriptor key: " + outputName); + if (!descriptor.isStateInput && !descriptor.isStateOutput && !descriptor.isShapeTensor) { + if (shapeFromIRModel.is_dynamic() || shapeFromIRModel.rank().get_length() == 0 || + *shapeFromIRModel.begin() != candidateBatchSize) { + return false; + } + } } - auto batchSizeForNode = - getBatchSizeForNode(metadata.results.at(outputName), executorOutputDescriptors.at(outputName)); - if (batchSizeForNode.has_value()) { - batch_size.insert(*batchSizeForNode); - } else { - return std::nullopt; - } - } + return true; + }; - if (batch_size.size() != 1) { - logger.info("Batching works only when we have the same batch size for all tensors!"); + if (!checkDescriptorsUseCandidateBatchSize(metadata.inputs) || + !checkDescriptorsUseCandidateBatchSize(metadata.outputs)) { + _logger.warning("Batching on the plugin is not used, batching is handled by the compiler"); return std::nullopt; } - auto it = batch_size.begin(); - if (*it) { - return *it; - } + _logger.warning("Batching is handled by the plugin"); - return std::nullopt; + return candidateBatchSize; } -template -Type extract_object(const ov::AnyMap& params, const ov::Property& p) { - auto itrHandle = params.find(p.name()); - ov::Any res = nullptr; - if (itrHandle == params.end()) { - OPENVINO_THROW("No parameter ", p.name(), " found in parameters map"); - } - res = itrHandle->second; - return res.as(); -} - -} // namespace - //------------------------------------------------------------------------------ ZeroInferRequest::ZeroInferRequest(const std::shared_ptr& initStructs, const std::shared_ptr& compiledModel, @@ -167,15 +149,18 @@ ZeroInferRequest::ZeroInferRequest(const std::shared_ptr& _executor(static_cast(_executorPtr.get())), _config(config), _logger("ZeroInferRequest", config.get()), + _levelZeroInputTensors(_metadata.inputs.size(), nullptr), + _levelZeroOutputTensors(_metadata.outputs.size(), nullptr), + _inputTensorsData(_metadata.inputs.size(), std::nullopt), + _outputTensorsData(_metadata.outputs.size(), std::nullopt), _profilingPool(_executor->graph(), zeroProfiling::POOL_SIZE, _executor->getInitStructs()->getProfilingDdiTable()), _profilingQuery(0, _executor->getInitStructs()->getDevice(), _executor->getInitStructs()->getProfilingDdiTable()) { _logger.debug("ZeroInferRequest::ZeroInferRequest - SyncInferRequest"); - const std::unordered_map& executorInputDescriptors = - _executor->inputs_desc_map(); - const std::unordered_map& executorOutputDescriptors = - _executor->outputs_desc_map(); + const std::vector& executorInputDescriptors = _executor->get_input_descriptors(); + const std::vector& executorOutputDescriptors = + _executor->get_output_descriptors(); auto proftype = config.get(); if (proftype == ov::intel_npu::ProfilingType::INFER) { @@ -189,10 +174,6 @@ ZeroInferRequest::ZeroInferRequest(const std::shared_ptr& zeroUtils::throwOnFail("zeDeviceGetProperties", zeDeviceGetProperties(_executor->getInitStructs()->getDevice(), &_properties)); - const auto contains = [](const auto& container, const auto& value) { - return std::find(container.begin(), container.end(), value) != container.end(); - }; - _outputAllocator = std::make_shared(_initStructs); _inputAllocator = (_properties.flags & ZE_DEVICE_PROPERTY_FLAG_INTEGRATED) @@ -200,145 +181,103 @@ ZeroInferRequest::ZeroInferRequest(const std::shared_ptr& ZE_HOST_MEM_ALLOC_FLAG_BIAS_WRITE_COMBINED) : _outputAllocator; - _logger.debug("ZeroInferRequest::ZeroInferRequest - performing I/O buffer allocation using Level Zero API"); - for (const std::string& inputName : _metadata.inputNames) { - if (!executorInputDescriptors.count(inputName)) { - OPENVINO_THROW("Invalid graph input descriptor key: " + inputName); - } + if (config.get() != ov::intel_npu::BatchMode::COMPILER) { + _batchSize = getBatchSize(_metadata); } - - for (const std::string& outputName : _metadata.outputNames) { - if (!executorOutputDescriptors.count(outputName)) { - OPENVINO_THROW("Invalid graph output descriptor key: " + outputName); - } + if (_batchSize.has_value()) { + _numberOfCommandLists = *_batchSize; } - if (config.get() != ov::intel_npu::BatchMode::COMPILER) { - auto batchSize = getBatchSize(_metadata, executorInputDescriptors, executorOutputDescriptors); + _logger.debug("ZeroInferRequest::ZeroInferRequest - checking level zero attributes and allocating tensors"); - if (batchSize.has_value()) { - _batchSize = *batchSize; - } - } - - for (const std::string& inputName : _metadata.inputNames) { - IONodeDescriptor& parameterDescriptor = _metadata.parameters.at(inputName); - checkLevelZeroAttributesMatch(parameterDescriptor, executorInputDescriptors.at(inputName), inputName); + size_t ioIndex = 0; + for (const IODescriptor& inputDescriptor : _metadata.inputs) { + checkLevelZeroAttributesMatch(inputDescriptor, executorInputDescriptors.at(ioIndex)); - // When batching is handled by the plugin we need to modify transposed shape with the original batch size since - // it will be forced to 1 at the compilation time - if (_batchSize > DEFAULT_BATCH_SIZE) { - parameterDescriptor.transposedShape[BATCH_AXIS] = _batchSize; + if (!(inputDescriptor.isStateInput || inputDescriptor.isShapeTensor)) { + ++ioIndex; + continue; } - if (contains(_metadata.shapeNames, inputName)) { - const std::string shapeBufferName = SHAPE_TENSOR_PREFIX + inputName; - const IONodeDescriptor& shapeDescriptor = _metadata.shapes.at(inputName); + _levelZeroInputTensors.at(ioIndex) = + allocate_tensor(inputDescriptor, ioIndex, INPUT, *_inputAllocator, _batchSize); + _inputTensorsData.at(ioIndex) = + TensorData{_levelZeroInputTensors.at(ioIndex)->data(), _levelZeroInputTensors.at(ioIndex)->get_byte_size()}; - checkLevelZeroAttributesMatch(shapeDescriptor, - executorInputDescriptors.at(shapeBufferName), - shapeBufferName); - - allocate_tensor(inputName, shapeDescriptor, TensorType::Shape, *_inputAllocator); - _tensorsData[shapeBufferName] = TensorData{_copyAllTensors.at(shapeBufferName)->data(), - _copyAllTensors.at(shapeBufferName)->get_byte_size()}; - } + ++ioIndex; } - for (const std::string& outputName : _metadata.outputNames) { - IONodeDescriptor& resultDescriptor = _metadata.results.at(outputName); - checkLevelZeroAttributesMatch(resultDescriptor, executorOutputDescriptors.at(outputName), outputName); + ioIndex = 0; + for (const IODescriptor& outputDescriptor : _metadata.outputs) { + checkLevelZeroAttributesMatch(outputDescriptor, executorOutputDescriptors.at(ioIndex)); - // When batching is handled by the plugin we need to modify transposed shape with the original batch size since - // it will be forced to 1 at the compilation time - if (_batchSize > DEFAULT_BATCH_SIZE) { - resultDescriptor.transposedShape[BATCH_AXIS] = _batchSize; + if (!(outputDescriptor.isStateOutput || outputDescriptor.isShapeTensor)) { + ++ioIndex; + continue; } - const auto& shapeNameMatch = _nodeNameToLegacyName.find(outputName); - if (shapeNameMatch != _nodeNameToLegacyName.end()) { - if (contains(_metadata.shapeNames, shapeNameMatch->second)) { - const std::string shapeBufferName = SHAPE_TENSOR_PREFIX + shapeNameMatch->second; - const IONodeDescriptor& shapeDescriptor = _metadata.shapes.at(shapeNameMatch->second); + _levelZeroOutputTensors.at(ioIndex) = + allocate_tensor(outputDescriptor, ioIndex, OUTPUT, *_outputAllocator, _batchSize); + _outputTensorsData.at(ioIndex) = + std::optional(TensorData{_levelZeroOutputTensors.at(ioIndex)->data(), + _levelZeroOutputTensors.at(ioIndex)->get_byte_size()}); - checkLevelZeroAttributesMatch(shapeDescriptor, - executorOutputDescriptors.at(shapeBufferName), - shapeBufferName); - - allocate_tensor(shapeNameMatch->second, shapeDescriptor, TensorType::Shape, *_outputAllocator); - _tensorsData[shapeBufferName] = TensorData{_copyAllTensors.at(shapeBufferName)->data(), - _copyAllTensors.at(shapeBufferName)->get_byte_size()}; - } - } + ++ioIndex; } - for (const std::string& stateName : _metadata.stateNames) { - const std::string& stateInputBufferName = READVALUE_PREFIX + stateName; - const std::string& stateOutputBufferName = ASSIGN_PREFIX + stateName; - - if (!executorInputDescriptors.count(stateInputBufferName)) { - OPENVINO_THROW("Invalid graph input descriptor key: " + stateInputBufferName); - } - if (!executorOutputDescriptors.count(stateOutputBufferName)) { - OPENVINO_THROW("Invalid graph output descriptor key: " + stateOutputBufferName); - } - - const IONodeDescriptor& stateDescriptor = _metadata.states.at(stateName); - checkLevelZeroAttributesMatch(stateDescriptor, - executorInputDescriptors.at(stateInputBufferName), - stateInputBufferName); - checkLevelZeroAttributesMatch(stateDescriptor, - executorOutputDescriptors.at(stateOutputBufferName), - stateOutputBufferName); - - // Only one buffer per state variable is required, we'll use the "output" one since this one captures the latest - // tensor value - allocate_tensor(stateName, stateDescriptor, TensorType::State, *_outputAllocator); - _tensorsData[stateInputBufferName] = TensorData{_copyAllTensors.at(stateInputBufferName)->data(), - _copyAllTensors.at(stateInputBufferName)->get_byte_size()}; - _tensorsData[stateOutputBufferName] = TensorData{_copyAllTensors.at(stateOutputBufferName)->data(), - _copyAllTensors.at(stateOutputBufferName)->get_byte_size()}; - } + _logger.debug("ZeroInferRequest::ZeroInferRequest - SyncInferRequest completed"); } void ZeroInferRequest::create_pipeline() { - for (const std::string& inputName : _metadata.inputNames) { - if (_copyAllTensors.find(inputName) != _copyAllTensors.end()) { - _logger.debug("ZeroInferRequest::create_pipeline - tensor %s was already allocated", inputName.c_str()); + for (size_t inputIndex = 0; inputIndex < _metadata.inputs.size(); ++inputIndex) { + if (_levelZeroInputTensors.at(inputIndex)) { + _logger.debug("ZeroInferRequest::create_pipeline - tensor %s was already allocated", + _metadata.inputs.at(inputIndex).nodeFriendlyName.c_str()); continue; } - IONodeDescriptor& parameterDescriptor = _metadata.parameters.at(inputName); - _logger.debug("ZeroInferRequest::create_pipeline - Allocate new tensor"); - allocate_tensor(inputName, parameterDescriptor, TensorType::InputOrOutput, *_inputAllocator); - _tensorsData[inputName] = - TensorData{_copyAllTensors.at(inputName)->data(), _copyAllTensors.at(inputName)->get_byte_size()}; + _levelZeroInputTensors.at(inputIndex) = + allocate_tensor(_metadata.inputs.at(inputIndex), inputIndex, INPUT, *_inputAllocator, _batchSize); + _inputTensorsData.at(inputIndex) = + std::optional(TensorData{_levelZeroInputTensors.at(inputIndex)->data(), + _levelZeroInputTensors.at(inputIndex)->get_byte_size()}); } - for (const std::string& outputName : _metadata.outputNames) { - if (_copyAllTensors.find(outputName) != _copyAllTensors.end()) { - _logger.debug("ZeroInferRequest::create_pipeline - tensor %s was already allocated", outputName.c_str()); + for (size_t outputIndex = 0; outputIndex < _metadata.outputs.size(); ++outputIndex) { + if (_levelZeroOutputTensors.at(outputIndex)) { + _logger.debug("ZeroInferRequest::create_pipeline - tensor %s was already allocated", + _metadata.outputs.at(outputIndex).nodeFriendlyName.c_str()); continue; } - - IONodeDescriptor& resultDescriptor = _metadata.results.at(outputName); - _logger.debug("ZeroInferRequest::create_pipeline - allocate new tensor"); - allocate_tensor(outputName, resultDescriptor, TensorType::InputOrOutput, *_outputAllocator); - _tensorsData[outputName] = - TensorData{_copyAllTensors.at(outputName)->data(), _copyAllTensors.at(outputName)->get_byte_size()}; + _levelZeroOutputTensors.at(outputIndex) = + allocate_tensor(_metadata.outputs.at(outputIndex), outputIndex, OUTPUT, *_outputAllocator, _batchSize); + _outputTensorsData.at(outputIndex) = + std::optional(TensorData{_levelZeroOutputTensors.at(outputIndex)->data(), + _levelZeroOutputTensors.at(outputIndex)->get_byte_size()}); } _logger.debug("ZeroInferRequest::create_pipeline - constructing pipeline"); // Construct pipeline - _pipeline = - makePipeline(_executorPtr, _config, _profilingPool, _profilingQuery, _npuProfiling, _tensorsData, _batchSize); + _pipeline = makePipeline(_executorPtr, + _config, + _profilingPool, + _profilingQuery, + _npuProfiling, + _inputTensorsData, + _outputTensorsData, + _numberOfCommandLists); _logger.debug("ZeroInferRequest::create_pipeline - SyncInferRequest completed"); } -void ZeroInferRequest::set_tensor_data(std::shared_ptr tensor, const std::string& name, bool isParameter) { +void ZeroInferRequest::set_tensor_data(const std::shared_ptr tensor, + const size_t index, + const bool isInput) { OV_ITT_TASK_CHAIN(ZERO_SET_TENSOR, itt::domains::LevelZeroBackend, "set_tensor", "set_tensor_data"); + auto& levelZeroTensors = isInput ? _levelZeroInputTensors : _levelZeroOutputTensors; + auto& tensorsData = isInput ? _inputTensorsData : _outputTensorsData; + bool setTensorData = false; bool levelZeroTensorCreatedLocally = true; @@ -353,7 +292,7 @@ void ZeroInferRequest::set_tensor_data(std::shared_ptr tensor, cons case ZE_MEMORY_TYPE_DEVICE: case ZE_MEMORY_TYPE_SHARED: _logger.debug("ZeroInferRequest::set_tensor_data - tensor was created in the same L0 context"); - _copyAllTensors[name] = tensor; + levelZeroTensors.at(index) = tensor; levelZeroTensorCreatedLocally = false; setTensorData = true; break; @@ -365,16 +304,18 @@ void ZeroInferRequest::set_tensor_data(std::shared_ptr tensor, cons } if (!setTensorData) { - // make sure that the L0 tensor was allocated locally and is not received from the user when receiving random - // tensor - if ((_tensorsData.find(name) != _tensorsData.end()) && !_tensorsData.at(name).levelZeroTensorCreatedLocally) { + // make sure that the L0 tensor was allocated locally and is not received from the user when receiving + // random tensor + if (tensorsData.at(index).has_value() && !tensorsData.at(index)->levelZeroTensorCreatedLocally) { _logger.debug("ZeroInferRequest::set_tensor_data - create locally L0 tensor"); OV_ITT_TASK_NEXT(ZERO_SET_TENSOR, "allocate tensor"); - allocate_tensor(name, - isParameter ? _metadata.parameters.at(name) : _metadata.results.at(name), - TensorType::InputOrOutput, - isParameter ? *_inputAllocator : *_outputAllocator); + levelZeroTensors.at(index) = + allocate_tensor(isInput ? _metadata.inputs.at(index) : _metadata.outputs.at(index), + index, + isInput, + isInput ? *_inputAllocator : *_outputAllocator, + _batchSize); setTensorData = true; levelZeroTensorCreatedLocally = true; @@ -382,29 +323,24 @@ void ZeroInferRequest::set_tensor_data(std::shared_ptr tensor, cons } if (setTensorData) { - _tensorsData[name] = TensorData{_copyAllTensors.at(name)->data(), - _copyAllTensors.at(name)->get_byte_size(), - levelZeroTensorCreatedLocally}; + tensorsData.at(index) = std::optional(TensorData{levelZeroTensors.at(index)->data(), + levelZeroTensors.at(index)->get_byte_size(), + levelZeroTensorCreatedLocally}); if (_pipelineIsCreated) { _logger.debug("ZeroInferRequest::infer_async - update command list"); - intel_npu::ZeroExecutor::ArgumentDescriptor desc; - if (isParameter) { - desc = _executor->inputs_desc_map().at(name); - } else { - desc = _executor->outputs_desc_map().at(name); - } - OV_ITT_TASK_NEXT(ZERO_SET_TENSOR, "updateCommandList"); - _pipeline->updateCommandList(_tensorsData[name], desc.idx, _batchSize); + _pipeline->updateCommandList(*tensorsData.at(index), + isInput ? _executor->get_input_descriptors().at(index).idx + : _executor->get_output_descriptors().at(index).idx); } } } -void ZeroInferRequest::set_remote_tensor_data(std::shared_ptr tensor, - const std::string& name, - bool isParameter) { +void ZeroInferRequest::set_remote_tensor_data(const std::shared_ptr tensor, + const size_t index, + const bool isInput) { OV_ITT_TASK_CHAIN(ZERO_SET_REMOTE_TENSOR, itt::domains::LevelZeroBackend, "set_tensor", "set_remote_tensor_data"); auto l0_context = reinterpret_cast( @@ -418,72 +354,82 @@ void ZeroInferRequest::set_remote_tensor_data(std::shared_ptr OPENVINO_THROW("Empty buffer"); } - _copyAllTensors[name] = tensor; - _tensorsData[name] = TensorData{data, tensor->get_byte_size(), false}; + auto& levelZeroTensors = isInput ? _levelZeroInputTensors : _levelZeroOutputTensors; + auto& tensorsData = isInput ? _inputTensorsData : _outputTensorsData; + + levelZeroTensors.at(index) = tensor; + tensorsData.at(index) = std::optional(TensorData{data, tensor->get_byte_size(), false}); if (_pipelineIsCreated) { _logger.debug("ZeroInferRequest::infer_async - update command list"); - intel_npu::ZeroExecutor::ArgumentDescriptor desc; - if (isParameter) { - desc = _executor->inputs_desc_map().at(name); - } else { - desc = _executor->outputs_desc_map().at(name); - } - OV_ITT_TASK_NEXT(ZERO_SET_REMOTE_TENSOR, "updateCommandList"); - _pipeline->updateCommandList(_tensorsData[name], desc.idx, _batchSize); + _pipeline->updateCommandList(*tensorsData.at(index), + isInput ? _executor->get_input_descriptors().at(index).idx + : _executor->get_output_descriptors().at(index).idx); } } void ZeroInferRequest::set_tensor(const ov::Output& port, const ov::SoPtr& tensor) { OV_ITT_SCOPED_TASK(itt::domains::LevelZeroBackend, "set_tensor"); + + auto foundPort = find_port(port); + OPENVINO_ASSERT(foundPort.found(), "Cannot find tensor for port ", port); try { check_tensor(port, tensor); } catch (const ov::Exception& ex) { OPENVINO_THROW("Failed to set tensor. ", ex.what()); } - _allTensors[port.get_node()->get_friendly_name()] = tensor._ptr; + if (foundPort.is_input()) { + _userInputTensors.at(foundPort.idx) = tensor._ptr; + } else { + _userOutputTensors.at(foundPort.idx) = tensor._ptr; + } if (_initStructs->getMutableCommandListVersion()) { auto remoteTensor = std::dynamic_pointer_cast(tensor._ptr); if (remoteTensor == nullptr) { _logger.debug("ZeroInferRequest::set_tensor - set new tensor"); - set_tensor_data(tensor._ptr, - port.get_node()->get_friendly_name(), - ov::op::util::is_parameter(port.get_node())); + set_tensor_data(tensor._ptr, foundPort.idx, foundPort.is_input()); } else { _logger.debug("ZeroInferRequest::set_tensor - set new remote tensor"); - set_remote_tensor_data(remoteTensor, - port.get_node()->get_friendly_name(), - ov::op::util::is_parameter(port.get_node())); + set_remote_tensor_data(remoteTensor, foundPort.idx, foundPort.is_input()); } } } ov::SoPtr ZeroInferRequest::get_tensor(const ov::Output& port) const { OV_ITT_SCOPED_TASK(itt::domains::LevelZeroBackend, "get_tensor"); - const std::string& nodeFriendlyName = port.get_node()->get_friendly_name(); - if (_allTensors.find(nodeFriendlyName) != _allTensors.end()) { + auto foundPort = find_port(port); + OPENVINO_ASSERT(foundPort.found(), "Cannot find tensor for port ", port); + + const size_t ioIndex = foundPort.idx; + const bool isInput = foundPort.is_input(); + auto& userTensors = isInput ? _userInputTensors : _userOutputTensors; + + if (userTensors.at(ioIndex)) { _logger.debug("ZeroInferRequest::get_tensor - tensor allocated, get the tensor"); - return _allTensors.at(nodeFriendlyName); + return userTensors.at(ioIndex); } _logger.debug("ZeroInferRequest::get_tensor - tensor is not allocated, create the tensor"); - const bool isParameter = ov::op::util::is_parameter(port.get_node()); - allocate_tensor(nodeFriendlyName, - isParameter ? _metadata.parameters.at(nodeFriendlyName) : _metadata.results.at(nodeFriendlyName), - TensorType::InputOrOutput, - isParameter ? *_inputAllocator : *_outputAllocator); + auto& levelZeroTensors = isInput ? _levelZeroInputTensors : _levelZeroOutputTensors; + auto& tensorsData = isInput ? _inputTensorsData : _outputTensorsData; - _tensorsData[nodeFriendlyName] = - TensorData{_copyAllTensors.at(nodeFriendlyName)->data(), _copyAllTensors.at(nodeFriendlyName)->get_byte_size()}; + levelZeroTensors.at(ioIndex) = + allocate_tensor(isInput ? _metadata.inputs.at(ioIndex) : _metadata.outputs.at(ioIndex), + ioIndex, + isInput, + isInput ? *_inputAllocator : *_outputAllocator, + _batchSize); + tensorsData.at(ioIndex) = + std::optional(TensorData{levelZeroTensors.at(ioIndex)->data(), levelZeroTensors.at(ioIndex)->get_byte_size()}); - return _allTensors.at(nodeFriendlyName); + return levelZeroTensors.at(ioIndex); } void ZeroInferRequest::infer() { @@ -504,102 +450,99 @@ void ZeroInferRequest::infer_async() { } _executor->mutexUnlock(); - for (const std::string& name : _inputAndStateInputNames) { - auto& inputTensor = _allTensors.at(name); - - if (isShapeTensorName(name)) { - const auto actualTensorName = name.substr(SHAPE_TENSOR_PREFIX.size()); - const auto& inputDims = _allTensors.at(actualTensorName)->get_shape(); + size_t inputIndex = 0; + for (const std::shared_ptr& userTensor : _userInputTensors) { + const IODescriptor inputDescriptor = _metadata.inputs.at(inputIndex); + if (inputDescriptor.isShapeTensor) { + OPENVINO_ASSERT(inputDescriptor.relatedDescriptorIndex.has_value(), + "The link between the dynamic tensor and its shape tensor is missing, entry name: ", + inputDescriptor.nameFromCompiler); + const auto& inputDims = _userInputTensors.at(*inputDescriptor.relatedDescriptorIndex)->get_shape(); - for (size_t i = 0; i < inputTensor->get_size(); ++i) { + for (size_t i = 0; i < userTensor->get_size(); ++i) { const auto reverseIdx = inputDims.size() - 1 - i; - inputTensor->data()[i] = static_cast(inputDims[reverseIdx]); + userTensor->data()[i] = static_cast(inputDims[reverseIdx]); } } - auto remoteTensor = std::dynamic_pointer_cast(inputTensor); - void* data = !remoteTensor ? inputTensor->data() - : extract_object(remoteTensor->get_properties(), ov::intel_npu::mem_handle); + auto userRemoteTensor = std::dynamic_pointer_cast(userTensor); + void* userBuffer = !userRemoteTensor + ? userTensor->data() + : extract_object(userRemoteTensor->get_properties(), ov::intel_npu::mem_handle); - const auto& copyInputTensor = _copyAllTensors.at(name); - auto copyRemoteTensor = std::dynamic_pointer_cast(copyInputTensor); - if (copyRemoteTensor == nullptr) { - void* copyData = copyInputTensor->data(); + const std::shared_ptr& levelZeroTensor = _levelZeroInputTensors.at(inputIndex); + auto levelZeroRemoteTensor = std::dynamic_pointer_cast(levelZeroTensor); + if (levelZeroRemoteTensor == nullptr) { + void* levelZeroBuffer = levelZeroTensor->data(); - if (data != copyData) { - if (data == nullptr || copyData == nullptr) { + if (userBuffer != levelZeroBuffer) { + if (userBuffer == nullptr || levelZeroBuffer == nullptr) { OPENVINO_THROW("Empty buffer"); } _logger.info("Tensor is not allocated in the current Level Zero context"); OV_ITT_TASK_NEXT(ZERO_INFER, "memcpy"); - std::memcpy(copyData, data, inputTensor->get_byte_size()); + std::memcpy(levelZeroBuffer, userBuffer, userTensor->get_byte_size()); } } + + ++inputIndex; } OV_ITT_TASK_NEXT(ZERO_INFER, "push"); - for (size_t i = 0; i < _batchSize; i++) { - _pipeline->push(i); - } + _pipeline->push(); } void ZeroInferRequest::get_result() { OV_ITT_TASK_CHAIN(ZERO_RESULT, itt::domains::LevelZeroBackend, "get_result", "pull"); _logger.debug("InferRequest::get_result start"); - - for (size_t i = 0; i < _batchSize; i++) { - _pipeline->pull(i); - } - - for (const auto& name : _outputAndStateOutputNames) { - const auto& outputTensor = _allTensors.at(name); - - if (isShapeTensorName(name)) { - const auto actualTensorName = name.substr(SHAPE_TENSOR_PREFIX.size()); - const auto& shapeNameMatch = _legacyNameToNodeName.find(actualTensorName); - if (shapeNameMatch != _legacyNameToNodeName.end()) { - ov::Shape actualDims; - actualDims.reserve(outputTensor->get_size()); - - for (size_t i = 0; i < outputTensor->get_size(); ++i) { - const auto reverseIdx = outputTensor->get_size() - 1 - i; - actualDims.push_back(outputTensor->data()[reverseIdx]); - } - auto& tensorToBeReshaped = _allTensors.at(shapeNameMatch->second); - tensorToBeReshaped->set_shape(actualDims); + _pipeline->pull(); + + size_t outputIndex = 0; + for (const std::shared_ptr& userTensor : _userOutputTensors) { + const IODescriptor outputDescriptor = _metadata.outputs.at(outputIndex); + if (outputDescriptor.isShapeTensor) { + OPENVINO_ASSERT(outputDescriptor.relatedDescriptorIndex.has_value(), + "The link between the dynamic tensor and its shape tensor is missing, entry name: ", + outputDescriptor.nameFromCompiler); + + ov::Shape actualDims; + actualDims.reserve(userTensor->get_size()); + + for (size_t i = 0; i < userTensor->get_size(); ++i) { + const auto reverseIdx = userTensor->get_size() - 1 - i; + actualDims.push_back(userTensor->data()[reverseIdx]); } + auto& tensorToBeReshaped = _userOutputTensors.at(*outputDescriptor.relatedDescriptorIndex); + tensorToBeReshaped->set_shape(actualDims); } - auto remoteTensor = std::dynamic_pointer_cast(outputTensor); - void* data = nullptr; - if (remoteTensor == nullptr) { - data = outputTensor->data(); - } else { - data = extract_object(remoteTensor->get_properties(), ov::intel_npu::mem_handle); - } + auto userRemoteTensor = std::dynamic_pointer_cast(userTensor); + void* userBuffer = !userRemoteTensor + ? userTensor->data() + : extract_object(userRemoteTensor->get_properties(), ov::intel_npu::mem_handle); - const auto& copyOutputTensor = _copyAllTensors.at(name); - auto copyRemoteTensor = std::dynamic_pointer_cast(copyOutputTensor); - if (copyRemoteTensor == nullptr) { - void* copyData = copyOutputTensor->data(); + const std::shared_ptr& levelZeroTensor = _levelZeroOutputTensors.at(outputIndex); + auto levelZeroRemoteTensor = std::dynamic_pointer_cast(levelZeroTensor); + if (levelZeroRemoteTensor == nullptr) { + void* levelZeroBuffer = levelZeroTensor->data(); - if (data != copyData) { - if (data == nullptr || copyData == nullptr) { + if (userBuffer != levelZeroBuffer) { + if (userBuffer == nullptr || levelZeroBuffer == nullptr) { OPENVINO_THROW("Empty buffer"); } _logger.info("Tensor is not allocated in the current Level Zero context"); OV_ITT_TASK_NEXT(ZERO_RESULT, "memcpy"); - std::memcpy(data, copyData, outputTensor->get_byte_size()); + std::memcpy(userBuffer, levelZeroBuffer, userTensor->get_byte_size()); } } + + ++outputIndex; } OV_ITT_TASK_NEXT(ZERO_RESULT, "reset"); - for (size_t i = 0; i < _batchSize; i++) { - _pipeline->reset(i); - } + _pipeline->reset(); _logger.debug("InferRequest::get_result finished"); } diff --git a/src/plugins/intel_npu/src/backend/src/zero_init.cpp b/src/plugins/intel_npu/src/backend/src/zero_init.cpp index 8490220be6a407..7ed1c66c00911a 100644 --- a/src/plugins/intel_npu/src/backend/src/zero_init.cpp +++ b/src/plugins/intel_npu/src/backend/src/zero_init.cpp @@ -29,8 +29,9 @@ static std::tuple queryDriverExtensionVersion( for (uint32_t i = 0; i < count; ++i) { auto& property = extProps[i]; - if (strncmp(property.name, ZE_GRAPH_EXT_NAME, strlen(ZE_GRAPH_EXT_NAME)) != 0) + if (strncmp(property.name, ZE_GRAPH_EXT_NAME, strlen(ZE_GRAPH_EXT_NAME)) != 0) { continue; + } // If the driver version is latest, will just use its name. if (property.version == ZE_GRAPH_EXT_VERSION_CURRENT) { diff --git a/src/plugins/intel_npu/src/backend/src/zero_memory.cpp b/src/plugins/intel_npu/src/backend/src/zero_memory.cpp index 6dea1396c853f0..79a5efab5ee419 100644 --- a/src/plugins/intel_npu/src/backend/src/zero_memory.cpp +++ b/src/plugins/intel_npu/src/backend/src/zero_memory.cpp @@ -71,8 +71,8 @@ bool HostMemAllocator::is_equal(const HostMemAllocator& other) const { return (_initStructs == other._initStructs) && (_flag == other._flag); } -void MemoryManagementUnit::appendArgument(const std::string& name, const std::size_t argSize) { - _offsets.emplace(std::make_pair(name, _size)); +void MemoryManagementUnit::appendArgument(const std::size_t argSize) { + _offsets.push_back(_size); _size += argSize + alignment - (argSize % alignment); // is this really necessary? if 0==argSize%alignment -> add 1 * alignment @@ -94,16 +94,16 @@ const void* MemoryManagementUnit::getDeviceMemRegion() const { void* MemoryManagementUnit::getDeviceMemRegion() { return _device ? _device->data() : nullptr; } -void* MemoryManagementUnit::getDevicePtr(const std::string& name) { +void* MemoryManagementUnit::getDevicePtr(const size_t index) { uint8_t* from = static_cast(_device ? _device->data() : nullptr); - if (from == nullptr) { - OPENVINO_THROW("Device memory not allocated yet"); - } - if (!_offsets.count(name)) { - OPENVINO_THROW("Invalid memory offset key: ", name); - } + OPENVINO_ASSERT(from != nullptr, "Device memory not allocated yet"); + OPENVINO_ASSERT(index < _offsets.size(), + "Memory offset index out of bound. Received: ", + index, + ", memory offset size: ", + _offsets.size()); - return _offsets.at(name) + from; + return _offsets.at(index) + from; } } // namespace zeroMemory } // namespace intel_npu diff --git a/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp b/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp index f6def94baf39c4..77d325420ac088 100644 --- a/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp +++ b/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp @@ -25,7 +25,8 @@ struct DiscretePipeline final : public Pipeline { ze_graph_profiling_query_handle_t profiling_handle, const std::array, stage::COUNT>& command_queues, const uint32_t& group_ordinal, - std::unordered_map& tensors_data) + const std::vector>& inputTensorsData, + const std::vector>& outputTensorsData) : _config(config), _command_queues{command_queues}, _command_list{{{device_handle, context, graph_ddi_table_ext, _config, group_ordinal}, @@ -44,23 +45,24 @@ struct DiscretePipeline final : public Pipeline { static const std::size_t alignment = STANDARD_PAGE_SIZE; OV_ITT_SCOPED_TASK(itt::domains::LevelZeroBackend, "Zero_infer_request::DiscretePipeline::DiscretePipeline"); - for (const auto& desc : executor->inputs_desc_map()) { - _deviceInputs.appendArgument(desc.first, zeroUtils::getSizeIOBytes(desc.second.info)); + for (const auto& desc : executor->get_input_descriptors()) { + _deviceInputs.appendArgument(zeroUtils::getSizeIOBytes(desc.info)); } _deviceInputs.allocate(device_handle, context); _logger.debug("DiscretePipeline - appending memory copy and set argument value for input"); - for (const auto& desc : executor->inputs_desc_map()) { - const TensorData& inputTensorData = tensors_data.at(desc.first); - const void* tensorBuffer = reinterpret_cast(inputTensorData.mem); + size_t inputIndex = 0; + for (const auto& desc : executor->get_input_descriptors()) { + const void* tensorBuffer = reinterpret_cast(inputTensorsData.at(inputIndex)->mem); - const std::size_t argSize = zeroUtils::getSizeIOBytes(desc.second.info); + const std::size_t argSize = zeroUtils::getSizeIOBytes(desc.info); std::size_t size = argSize + alignment - (argSize % alignment); - _command_list[stage::UPLOAD].appendMemoryCopy(_deviceInputs.getDevicePtr(desc.first), tensorBuffer, size); + _command_list[stage::UPLOAD].appendMemoryCopy(_deviceInputs.getDevicePtr(inputIndex), tensorBuffer, size); - executor->setArgumentValue(desc.second.idx, _deviceInputs.getDevicePtr(desc.first)); + executor->setArgumentValue(desc.idx, _deviceInputs.getDevicePtr(inputIndex)); + ++inputIndex; } _logger.debug("DiscretePipeline - append signal event"); @@ -68,24 +70,26 @@ struct DiscretePipeline final : public Pipeline { _command_list[stage::UPLOAD].appendBarrier(); _event[stage::UPLOAD].AppendSignalEvent(_command_list[stage::UPLOAD]); - for (const auto& desc : executor->outputs_desc_map()) { - _deviceOutputs.appendArgument(desc.first, zeroUtils::getSizeIOBytes(desc.second.info)); + for (const auto& desc : executor->get_output_descriptors()) { + _deviceOutputs.appendArgument(zeroUtils::getSizeIOBytes(desc.info)); } _deviceOutputs.allocate(device_handle, context); _logger.debug("DiscretePipeline - appending memory copy and set argument value for output"); - for (const auto& desc : executor->outputs_desc_map()) { - const TensorData& outputTensorData = tensors_data.at(desc.first); - void* tensorBuffer = reinterpret_cast(outputTensorData.mem); - const std::size_t argSize = zeroUtils::getSizeIOBytes(desc.second.info); + size_t outputIndex = 0; + for (const auto& desc : executor->get_output_descriptors()) { + void* tensorBuffer = reinterpret_cast(outputTensorsData.at(outputIndex)->mem); + + const std::size_t argSize = zeroUtils::getSizeIOBytes(desc.info); std::size_t size = argSize + alignment - (argSize % alignment); _command_list[stage::READBACK].appendMemoryCopy(tensorBuffer, - _deviceOutputs.getDevicePtr(desc.first), + _deviceOutputs.getDevicePtr(outputIndex), size); - executor->setArgumentValue(desc.second.idx, _deviceOutputs.getDevicePtr(desc.first)); + executor->setArgumentValue(desc.idx, _deviceOutputs.getDevicePtr(outputIndex)); + ++outputIndex; } _event[stage::UPLOAD].AppendWaitOnEvent(_command_list[stage::EXECUTE]); @@ -104,7 +108,7 @@ struct DiscretePipeline final : public Pipeline { DiscretePipeline& operator=(const DiscretePipeline&) = delete; virtual ~DiscretePipeline() = default; - void push(size_t) override { + void push() override { _logger.debug("DiscretePipeline - push() started"); OV_ITT_TASK_CHAIN(ZERO_INFER_REQUEST_DP_PUSH, itt::domains::LevelZeroBackend, @@ -119,7 +123,7 @@ struct DiscretePipeline final : public Pipeline { _logger.debug("DiscretePipeline - push() completed"); }; - void pull(size_t) override { + void pull() override { _logger.debug("DiscretePipeline - pull() started"); OV_ITT_TASK_CHAIN(ZERO_INFER_REQUEST_DP_PULL, itt::domains::LevelZeroBackend, @@ -136,14 +140,14 @@ struct DiscretePipeline final : public Pipeline { _logger.debug("DiscretePipeline - pull() completed"); }; - void reset(size_t) const override { + void reset() const override { // Reset the fence objects for (auto& fence : _fence) { fence.reset(); } }; - void updateCommandList(const TensorData&, uint32_t, size_t) override {} + void updateCommandList(const TensorData&, const uint32_t) override{}; private: const Config _config; @@ -166,23 +170,27 @@ struct IntegratedPipeline final : public Pipeline { std::shared_ptr npu_profiling, CommandQueue& command_queue, const uint32_t& group_ordinal, - std::unordered_map& tensors_data, - const size_t batch_size) + const std::vector>& inputTensorsData, + const std::vector>& outputTensorsData, + const size_t numberOfCommandLists) : _config(config), _executor(static_cast(executorPtr.get())), _command_queue{command_queue}, - _event_pool{device_handle, context, batch_size ? static_cast(batch_size) : 1, _config}, + _event_pool{device_handle, + context, + numberOfCommandLists ? static_cast(numberOfCommandLists) : 1, + _config}, _npu_profiling(std::move(npu_profiling)), _logger("IntegratedPipeline", _config.get()) { OV_ITT_SCOPED_TASK(itt::domains::LevelZeroBackend, "Zero_infer_request::IntegratedPipeline::IntegratedPipeline"); _logger.debug("IntegratedPipeline - initialize started"); - _command_lists.reserve(batch_size); - _events.reserve(batch_size); - _fences.reserve(batch_size); + _command_lists.reserve(numberOfCommandLists); + _events.reserve(numberOfCommandLists); + _fences.reserve(numberOfCommandLists); _logger.debug("IntegratedPipeline - emplace_back _event_pool and _command_queue"); - for (size_t i = 0; i < batch_size; i++) { + for (size_t i = 0; i < numberOfCommandLists; i++) { _command_lists.emplace_back(std::make_unique( device_handle, context, @@ -194,19 +202,21 @@ struct IntegratedPipeline final : public Pipeline { _fences.emplace_back(std::make_unique(_command_queue, _config)); } - for (size_t i = 0; i < batch_size; i++) { - for (const auto& desc : _executor->inputs_desc_map()) { - const TensorData& inputTensorData = tensors_data.at(desc.first); - _executor->setArgumentValue( - desc.second.idx, - static_cast(inputTensorData.mem) + (i * inputTensorData.size) / batch_size); + for (size_t i = 0; i < numberOfCommandLists; i++) { + size_t ioIndex = 0; + for (const auto& desc : _executor->get_input_descriptors()) { + _executor->setArgumentValue(desc.idx, + static_cast(inputTensorsData.at(ioIndex)->mem) + + (i * inputTensorsData.at(ioIndex)->size) / numberOfCommandLists); + ++ioIndex; } - for (const auto& desc : _executor->outputs_desc_map()) { - const TensorData& outputTensorData = tensors_data.at(desc.first); - _executor->setArgumentValue( - desc.second.idx, - static_cast(outputTensorData.mem) + (i * outputTensorData.size) / batch_size); + ioIndex = 0; + for (const auto& desc : _executor->get_output_descriptors()) { + _executor->setArgumentValue(desc.idx, + static_cast(outputTensorsData.at(ioIndex)->mem) + + (i * outputTensorsData.at(ioIndex)->size) / numberOfCommandLists); + ++ioIndex; } /// append timestamp command if feature was activated @@ -238,51 +248,65 @@ struct IntegratedPipeline final : public Pipeline { IntegratedPipeline& operator=(const IntegratedPipeline&) = delete; virtual ~IntegratedPipeline() = default; - void push(size_t batch_index) override { + void push() override { _logger.debug("IntegratedPipeline - push() started"); - OV_ITT_TASK_CHAIN(ZERO_EXECUTOR_IP_PUSH, itt::domains::LevelZeroBackend, "IntegratedPipeline", "push"); - if (sync_output_with_fences_) { - _command_queue.executeCommandList(*_command_lists.at(batch_index), *_fences.at(batch_index)); - } else { - _command_queue.executeCommandList(*_command_lists.at(batch_index)); + + for (size_t i = 0; i < _command_lists.size(); ++i) { + OV_ITT_TASK_CHAIN(ZERO_EXECUTOR_IP_PUSH, itt::domains::LevelZeroBackend, "IntegratedPipeline", "push"); + if (sync_output_with_fences_) { + _command_queue.executeCommandList(*_command_lists.at(i), *_fences.at(i)); + } else { + _command_queue.executeCommandList(*_command_lists.at(i)); + } } + _logger.debug("IntegratedPipeline - push() completed"); }; - void pull(size_t batch_index) override { + void pull() override { _logger.debug("IntegratedPipeline - pull() started"); OV_ITT_TASK_CHAIN(ZERO_EXECUTOR_IP_PULL, itt::domains::LevelZeroBackend, "IntegratedPipeline", "pull"); - if (sync_output_with_fences_) { - _fences.at(batch_index)->hostSynchronize(); - } else { - _events.at(batch_index)->hostSynchronize(); - } - /// sample npu timestamps if feature was activated - if (_npu_profiling != nullptr) { - _npu_profiling->sampleNpuTimestamps(); + + for (size_t i = 0; i < _command_lists.size(); ++i) { + if (sync_output_with_fences_) { + _fences.at(i)->hostSynchronize(); + } else { + _events.at(i)->hostSynchronize(); + } + /// sample npu timestamps if feature was activated + if (_npu_profiling != nullptr) { + _npu_profiling->sampleNpuTimestamps(); + } } + _logger.debug("IntegratedPipeline - pull() completed"); }; - void reset(size_t batch_index) const override { + void reset() const override { _logger.debug("IntegratedPipeline - rest() started"); - if (sync_output_with_fences_) { - _fences.at(batch_index)->reset(); - } else { - _events.at(batch_index)->reset(); + + for (size_t i = 0; i < _command_lists.size(); ++i) { + if (sync_output_with_fences_) { + _fences.at(i)->reset(); + } else { + _events.at(i)->reset(); + } } + _logger.debug("IntegratedPipeline - rest() completed"); }; - void updateCommandList(const TensorData& tensors_data, uint32_t index, size_t batch_size) override { + void updateCommandList(const TensorData& tensorsData, const uint32_t index) override { OV_ITT_TASK_CHAIN(ZERO_EXECUTOR_IP_UMCL, itt::domains::LevelZeroBackend, "IntegratedPipeline", "updateCommandList"); - for (size_t i = 0; i < batch_size; i++) { + const size_t numberOfCommandLists = _command_lists.size(); + + for (size_t i = 0; i < numberOfCommandLists; i++) { _command_lists.at(i)->updateMutableCommandList( index, - static_cast(tensors_data.mem) + (i * tensors_data.size) / batch_size); + static_cast(tensorsData.mem) + (i * tensorsData.size) / numberOfCommandLists); _command_lists.at(i)->close(); } }; @@ -305,8 +329,9 @@ std::unique_ptr makePipeline(const std::shared_ptr& e zeroProfiling::ProfilingPool& profiling_pool, zeroProfiling::ProfilingQuery& profiling_query, std::shared_ptr npu_profiling, - std::unordered_map& tensors_data, - const size_t batch_size) { + const std::vector>& inputTensorsData, + const std::vector>& outputTensorsData, + const size_t numberOfCommandLists) { OV_ITT_SCOPED_TASK(itt::domains::LevelZeroBackend, "Infer_request::makePipeline"); if (profiling_pool.create()) profiling_query.create(profiling_pool._handle); @@ -333,8 +358,9 @@ std::unique_ptr makePipeline(const std::shared_ptr& e npu_profiling, *command_queues[stage::EXECUTE], group_ordinal, - tensors_data, - batch_size); + inputTensorsData, + outputTensorsData, + numberOfCommandLists); } return std::make_unique(config, @@ -345,7 +371,8 @@ std::unique_ptr makePipeline(const std::shared_ptr& e profiling_query.getHandle(), command_queues, group_ordinal, - tensors_data); + inputTensorsData, + outputTensorsData); } } // namespace intel_npu diff --git a/src/plugins/intel_npu/src/compiler/include/zero_compiler_in_driver.hpp b/src/plugins/intel_npu/src/compiler/include/zero_compiler_in_driver.hpp index ffe022a1800ef5..18042250e46386 100644 --- a/src/plugins/intel_npu/src/compiler/include/zero_compiler_in_driver.hpp +++ b/src/plugins/intel_npu/src/compiler/include/zero_compiler_in_driver.hpp @@ -35,8 +35,10 @@ using SerializedIR = std::pair>; (std::is_same::value || std::is_same::value || \ std::is_same::value) -// For ext version >= 1.6, originalShape is avaible -#define NotSupportOriginalShape(T) \ +// A bug inside the driver makes the "pfnGraphGetArgumentMetadata" call not safe for use prior to +// "ze_graph_dditable_ext_1_6_t". +// See: E#117498 +#define NotSupportArgumentMetadata(T) \ (std::is_same::value || std::is_same::value || \ std::is_same::value || std::is_same::value) @@ -79,16 +81,18 @@ class LevelZeroCompilerInDriver final : public ICompiler { /** * @brief Serialize input / output information to string format. * @details Format: - * --inputs_precisions=": [:]" - * --inputs_layouts=": [:]" - * --outputs_precisions=":" - * --outputs_layouts=":" + * --inputs_precisions="0: [1:]" + * --inputs_layouts="0: [1:]" + * --outputs_precisions="0:" + * --outputs_layouts="0:" + * + * For older compiler versions, the name of the inputs/outputs may be used instead of their indices. * * Since the layout information is no longer an important part of the metadata values when using the 2.0 OV * API, the layout fields shall be filled with default values in order to assure the backward compatibility * with the driver. */ - static std::string serializeIOInfo(const std::shared_ptr& model); + static std::string serializeIOInfo(const std::shared_ptr& model, const bool useIndices); private: NetworkMetadata getNetworkMeta(ze_graph_handle_t graphHandle) const; @@ -97,53 +101,19 @@ class LevelZeroCompilerInDriver final : public ICompiler { ze_graph_compiler_version_info_t compilerVersion) const; std::string serializeConfig(const Config& config, ze_graph_compiler_version_info_t& compilerVersion) const; - /** - * @brief Extracts the layout value or the state descriptor from the given Level Zero structure. - * @details Extracting the layout information is required only when using older driver versions which rely on - * this legacy attribute. Since this information is not found within the parameter/result nodes, we need to - * extract this value here. - * - * The state variables are also not found in the previously mentioned nodes, thus if the given Level Zero - * parameter corresponds to an input/output, we shall extract the layout value from it. Else it represents a - * state variable and the descriptor will be extracted and stored in an OpenVINO specific format. - * @param parameters Holds the already extracted input node descriptors. The transposed shape attribute of the - * corresponding entry may be updated according to the extracted layout value. - * @param results Holds the already extracted output node descriptors. The transposed shape attribute of the - * corresponding entry may be updated according to the extracted layout value. - * @param states The state descriptors shall be stored here in an OpenVINO specific format. - * @param stateNames The output location of the state variables' names in the order found within the compiled - * model. - * @param arg The Level Zero specific structure from which the layout value or state variable descriptor shall - * be extracted. - */ - template - void getLayoutOrStateDescriptor(IONodeDescriptorMap& parameters, - IONodeDescriptorMap& results, - IONodeDescriptorMap& states, - std::vector& stateNames, - const T& arg) const; - - template = true> + template = true> void getMetadata(TableExtension* graphDdiTableExt, ze_graph_handle_t graphHandle, uint32_t index, - std::vector& inputNames, - std::vector& outputNames, - std::vector& stateNames, - IONodeDescriptorMap& parameters, - IONodeDescriptorMap& results, - IONodeDescriptorMap& state) const; - - template = true> + std::vector& inputs, + std::vector& outputs) const; + + template = true> void getMetadata(TableExtension* graphDdiTableExt, ze_graph_handle_t graphHandle, uint32_t index, - std::vector& inputNames, - std::vector& outputNames, - std::vector& stateNames, - IONodeDescriptorMap& parameters, - IONodeDescriptorMap& results, - IONodeDescriptorMap& state) const; + std::vector& inputs, + std::vector& outputs) const; template = true> ze_result_t seriazlideIRModelAndQueryNetworkCreateV2(const std::shared_ptr& model, diff --git a/src/plugins/intel_npu/src/compiler/src/driver_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler/src/driver_compiler_adapter.cpp index e9fee3d9ee2f2a..6543b1199b7a4b 100644 --- a/src/plugins/intel_npu/src/compiler/src/driver_compiler_adapter.cpp +++ b/src/plugins/intel_npu/src/compiler/src/driver_compiler_adapter.cpp @@ -100,8 +100,9 @@ LevelZeroCompilerAdapter::LevelZeroCompilerAdapter() : _logger("LevelZeroCompile for (uint32_t i = 0; i < count; ++i) { auto& property = extProps[i]; - if (strncmp(property.name, ZE_GRAPH_EXT_NAME, strlen(ZE_GRAPH_EXT_NAME)) != 0) + if (strncmp(property.name, ZE_GRAPH_EXT_NAME, strlen(ZE_GRAPH_EXT_NAME)) != 0) { continue; + } // If the driver version is latest, will just use its name. if (property.version == ZE_GRAPH_EXT_VERSION_CURRENT) { diff --git a/src/plugins/intel_npu/src/compiler/src/graph_transformations.cpp b/src/plugins/intel_npu/src/compiler/src/graph_transformations.cpp index cc9655a38dd3ff..7259673191441a 100644 --- a/src/plugins/intel_npu/src/compiler/src/graph_transformations.cpp +++ b/src/plugins/intel_npu/src/compiler/src/graph_transformations.cpp @@ -47,7 +47,12 @@ void IRSerializer::serializeModelToStream(std::ostream& xml, std::ostream& weigh // precision/layout preprocessing requirement. We are setting this value to "true" since the API version is no // longer a cause for altering the metadata. This is due to the preprocessing performed in the OpenVINO framework's // implementaion, the "ov::Model" object is preprocessed before reaching the NPU plugin. - const auto new_api_key = "is_new_api"; + const auto newAPIKey = "is_new_api"; + + // Flag used for indicating an NPU plugin version which switched the I/O identification convention from names to + // indices. The flag is required in order to inform the driver-compiler adapter to expect indices when attempting to + // deserialize the I/O metadata. + const auto useIndicesForIOMetadata = "use_indices_for_io_metadata"; // We modify the original model object here therefore a mutex is required static std::mutex rtInfoMutex; @@ -55,12 +60,14 @@ void IRSerializer::serializeModelToStream(std::ostream& xml, std::ostream& weigh { std::lock_guard lock(rtInfoMutex); - _model->set_rt_info(true, new_api_key); + _model->set_rt_info(true, newAPIKey); + _model->set_rt_info(true, useIndicesForIOMetadata); manager.run_passes(_model); auto& rtInfo = _model->get_rt_info(); - rtInfo.erase(new_api_key); + rtInfo.erase(newAPIKey); + rtInfo.erase(useIndicesForIOMetadata); } _logger.debug("serializeModelToStream end"); } diff --git a/src/plugins/intel_npu/src/compiler/src/zero_compiler_in_driver.cpp b/src/plugins/intel_npu/src/compiler/src/zero_compiler_in_driver.cpp index 5f41e0dcd8a6aa..efe8d2e594f5b7 100644 --- a/src/plugins/intel_npu/src/compiler/src/zero_compiler_in_driver.cpp +++ b/src/plugins/intel_npu/src/compiler/src/zero_compiler_in_driver.cpp @@ -174,62 +174,6 @@ std::string rankToLegacyLayoutString(const size_t rank) { } } -size_t zeLayoutToRank(const ze_graph_argument_layout_t layout) { - switch (layout) { - case ZE_GRAPH_ARGUMENT_LAYOUT_C: - return 1; - case ZE_GRAPH_ARGUMENT_LAYOUT_CN: - return 2; - case ZE_GRAPH_ARGUMENT_LAYOUT_HW: - return 2; - case ZE_GRAPH_ARGUMENT_LAYOUT_NC: - return 2; - case ZE_GRAPH_ARGUMENT_LAYOUT_CHW: - return 3; - case ZE_GRAPH_ARGUMENT_LAYOUT_NCHW: - return 4; - case ZE_GRAPH_ARGUMENT_LAYOUT_NHWC: - return 4; - case ZE_GRAPH_ARGUMENT_LAYOUT_NCDHW: - return 5; - case ZE_GRAPH_ARGUMENT_LAYOUT_NDHWC: - return 5; - default: - // TODO #-30200 Extend to support all cases - return 0; - } -} - -/** - * @brief Transposes the original shape value according to given layout. - */ -std::vector reshapeByLayout(const std::vector& originalDimensions, - const ze_graph_argument_layout_t layout) { - std::vector order; - std::vector reshapedDimensions; - - switch (layout) { - case ZE_GRAPH_ARGUMENT_LAYOUT_CN: - order = NC_TO_CN_LAYOUT_DIMENSIONS_ORDER; - break; - case ZE_GRAPH_ARGUMENT_LAYOUT_NHWC: - order = NCHW_TO_NHWC_LAYOUT_DIMENSIONS_ORDER; - break; - case ZE_GRAPH_ARGUMENT_LAYOUT_NDHWC: - order = NCDHW_TO_NDHWC_LAYOUT_DIMENSIONS_ORDER; - break; - default: - // TODO #-30200 Extend to support all cases - return originalDimensions; - } - - for (const size_t& orderElement : order) { - reshapedDimensions.push_back(originalDimensions[orderElement]); - } - - return reshapedDimensions; -} - } // namespace namespace intel_npu { @@ -311,7 +255,8 @@ SerializedIR LevelZeroCompilerInDriver::serializeIR( } template -std::string LevelZeroCompilerInDriver::serializeIOInfo(const std::shared_ptr& model) { +std::string LevelZeroCompilerInDriver::serializeIOInfo(const std::shared_ptr& model, + const bool useIndices) { const ov::ParameterVector& parameters = model->get_parameters(); const ov::ResultVector& results = model->get_results(); @@ -324,21 +269,32 @@ std::string LevelZeroCompilerInDriver::serializeIOInfo(const std inputsLayoutSS << INPUTS_LAYOUTS_KEY << KEY_VALUE_SEPARATOR << VALUE_DELIMITER; if (!parameters.empty()) { - const std::string& firstInputName = parameters.at(0)->get_friendly_name(); + size_t parameterIndex = 0; for (const std::shared_ptr& parameter : parameters) { - const std::string& name = parameter->get_friendly_name(); const ov::element::Type& precision = parameter->get_element_type(); const size_t rank = parameter->get_shape().size(); - if (name != firstInputName) { + if (parameterIndex != 0) { inputsPrecisionSS << VALUES_SEPARATOR; inputsLayoutSS << VALUES_SEPARATOR; } - inputsPrecisionSS << name << NAME_VALUE_SEPARATOR << ovPrecisionToLegacyPrecisionString(precision); - // Ticket: E-88902 - inputsLayoutSS << name << NAME_VALUE_SEPARATOR << rankToLegacyLayoutString(rank); + if (useIndices) { + inputsPrecisionSS << parameterIndex; + inputsLayoutSS << parameterIndex; + } else { + const std::string& name = parameter->get_friendly_name(); + + inputsPrecisionSS << name; + // Ticket: E-88902 + inputsLayoutSS << name; + } + + inputsPrecisionSS << NAME_VALUE_SEPARATOR << ovPrecisionToLegacyPrecisionString(precision); + inputsLayoutSS << NAME_VALUE_SEPARATOR << rankToLegacyLayoutString(rank); + + ++parameterIndex; } } @@ -348,20 +304,31 @@ std::string LevelZeroCompilerInDriver::serializeIOInfo(const std outputsPrecisionSS << OUTPUTS_PRECISIONS_KEY << KEY_VALUE_SEPARATOR << VALUE_DELIMITER; outputsLayoutSS << OUTPUTS_LAYOUTS_KEY << KEY_VALUE_SEPARATOR << VALUE_DELIMITER; - const std::string& firstOutputName = results.at(0)->get_input_node_ptr(0)->get_friendly_name(); + size_t resultIndex = 0; for (const std::shared_ptr& result : results) { - const std::string& name = result->get_input_node_ptr(0)->get_friendly_name(); const ov::element::Type_t precision = result->get_element_type(); const size_t rank = result->get_shape().size(); - if (name != firstOutputName) { + if (resultIndex != 0) { outputsPrecisionSS << VALUES_SEPARATOR; outputsLayoutSS << VALUES_SEPARATOR; } - outputsPrecisionSS << name << NAME_VALUE_SEPARATOR << ovPrecisionToLegacyPrecisionString(precision); - outputsLayoutSS << name << NAME_VALUE_SEPARATOR << rankToLegacyLayoutString(rank); + if (useIndices) { + outputsPrecisionSS << resultIndex; + outputsLayoutSS << resultIndex; + } else { + const std::string& name = result->get_input_node_ptr(0)->get_friendly_name(); + + outputsPrecisionSS << name; + outputsLayoutSS << name; + } + + outputsPrecisionSS << NAME_VALUE_SEPARATOR << ovPrecisionToLegacyPrecisionString(precision); + outputsLayoutSS << NAME_VALUE_SEPARATOR << rankToLegacyLayoutString(rank); + + ++resultIndex; } outputsPrecisionSS << VALUE_DELIMITER; @@ -808,8 +775,9 @@ ze_result_t LevelZeroCompilerInDriver::seriazlideIRModelAndCreat ze_graph_format_t format = ZE_GRAPH_FORMAT_NGRAPH_LITE; std::string buildFlags; + const bool useIndices = !((compilerVersion.major < 5) || (compilerVersion.major == 5 && compilerVersion.minor < 9)); - buildFlags += serializeIOInfo(model); + buildFlags += serializeIOInfo(model, useIndices); buildFlags += " "; buildFlags += serializeConfig(config, const_cast(compilerVersion)); @@ -977,112 +945,68 @@ uint32_t LevelZeroCompilerInDriver::getSupportedOpsetVersion() c return maxOpsetVersion; } -template -template -void LevelZeroCompilerInDriver::getLayoutOrStateDescriptor(IONodeDescriptorMap& parameters, - IONodeDescriptorMap& results, - IONodeDescriptorMap& states, - std::vector& stateNames, - const T& arg) const { - std::string legacyName = arg.name; - - // The layout may differ from the default one only when using significantly older drivers. In order to accommodate - // this case, an extra attribute needs to be stored which holds the transposed shape. - const std::vector originalDimensions(arg.dims, arg.dims + zeLayoutToRank(arg.deviceLayout)); - const std::vector reshapedDimensions = reshapeByLayout(originalDimensions, arg.deviceLayout); - const ov::Shape shape = ov::Shape(reshapedDimensions); - - if (!isStateInputName(legacyName) && !isStateOutputName(legacyName)) { - if (arg.type == ZE_GRAPH_ARGUMENT_TYPE_INPUT) { - _logger.info("getLayoutOrStateDescriptor Found input \"%s\"", legacyName.c_str()); - - parameters[legacyName].transposedShape = shape; - } - if (arg.type == ZE_GRAPH_ARGUMENT_TYPE_OUTPUT) { - _logger.info("getLayoutOrStateDescriptor Found output \"%s\"", legacyName.c_str()); - - results[legacyName].transposedShape = shape; - } - } else if (isStateInputName(legacyName)) { - // The inputs and outputs of the state nodes share the same metadata, thus we'll consider only the the inputs - // here - legacyName = legacyName.substr(READVALUE_PREFIX.length()); - _logger.info("getLayoutOrStateDescriptor Found state variable \"%s\"", legacyName.c_str()); - - const ov::element::Type_t precision = toOVElementType(arg.devicePrecision); - - stateNames.push_back(legacyName); - states[legacyName] = {legacyName, "", {}, precision, shape, shape}; - } -} - /** - * @brief Extracts the parameter/result (i.e. input/output) descriptors from Level Zero specific structures into - * OpenVINO specific ones. - * @param nodeDescriptors The map in which the result shall be stored. - * @param names The I/O identifiers shall be stored here in the order found within the compiled model. - * @param metadata The Level Zero structure fomr which the descriptors will be extracted. + * @brief Extracts the I/O metadata from Level Zero specific structures and converts them into OpenVINO specific ones. + * + * @param arg The main Level Zero structure from which most metadata will be extracted. + * @param metadata The secondary Level Zero structure from which metadata will be extracted. More specifically, the + * argument is used for populating "shapeFromIRModel". Not providing this argument will lead to an empty value for the + * referenced attribute. + * @returns A descriptor object containing the metadata converted in OpenVINO specific structures. */ -static void getNodeDescriptor(IONodeDescriptorMap& nodeDescriptors, - std::vector& names, - ze_graph_argument_properties_3_t& arg) { +static IODescriptor getIODescriptor(const ze_graph_argument_properties_3_t& arg, + const std::optional& metadata) { ov::element::Type_t precision = toOVElementType(arg.devicePrecision); - ov::Shape shape; + ov::Shape shapeFromCompiler, shapeFromIRModel; std::unordered_set outputTensorNames; for (uint32_t id = 0; id < arg.associated_tensor_names_count; id++) { outputTensorNames.insert(arg.associated_tensor_names[id]); } - for (uint32_t id = 0; id < arg.dims_count; id++) { - shape.push_back(arg.dims[id]); + shapeFromCompiler.push_back(arg.dims[id]); } - - const std::string& legacyName = arg.name; - - names.push_back(arg.debug_friendly_name); - nodeDescriptors[arg.debug_friendly_name] = - {legacyName, arg.debug_friendly_name, std::move(outputTensorNames), precision, shape, shape}; -} - -static void getNodeDescriptor(IONodeDescriptorMap& nodeDescriptors, - std::vector& names, - ze_graph_argument_properties_3_t& arg, - ze_graph_argument_metadata_t& metadata) { - ov::element::Type_t precision = toOVElementType(arg.devicePrecision); - ov::Shape transposedShape, originalShape; - std::unordered_set outputTensorNames; - - for (uint32_t id = 0; id < arg.associated_tensor_names_count; id++) { - outputTensorNames.insert(arg.associated_tensor_names[id]); - } - - for (uint32_t id = 0; id < arg.dims_count; id++) { - transposedShape.push_back(arg.dims[id]); + if (metadata.has_value()) { + for (uint32_t id = 0; id < metadata->shape_size; id++) { + shapeFromIRModel.push_back(metadata->shape[id]); + } } - for (uint32_t id = 0; id < metadata.shape_size; id++) { - originalShape.push_back(metadata.shape[id]); + // Flags will be used instead of indices for informing the type of the current entry + std::string nameFromCompiler = arg.name; + bool isStateInput = false; + bool isStateOutput = false; + bool isShapeTensor = false; + if (isStateInputName(nameFromCompiler)) { + nameFromCompiler = nameFromCompiler.substr(READVALUE_PREFIX.length()); + isStateInput = true; + } else if (isStateOutputName(nameFromCompiler)) { + nameFromCompiler = nameFromCompiler.substr(ASSIGN_PREFIX.length()); + isStateOutput = true; + } else if (isShapeTensorName(nameFromCompiler)) { + nameFromCompiler = nameFromCompiler.substr(SHAPE_TENSOR_PREFIX.length()); + isShapeTensor = true; } - const std::string& legacyName = arg.name; - - names.push_back(arg.debug_friendly_name); - nodeDescriptors[arg.debug_friendly_name] = - {legacyName, arg.debug_friendly_name, std::move(outputTensorNames), precision, originalShape, transposedShape}; + return {nameFromCompiler, + precision, + std::move(shapeFromCompiler), + isStateInput, + isStateOutput, + isShapeTensor, + std::nullopt, + arg.debug_friendly_name, + std::move(outputTensorNames), + metadata.has_value() ? std::optional(shapeFromIRModel) : std::nullopt}; } template -template > +template > void LevelZeroCompilerInDriver::getMetadata(TableExtension* graphDdiTableExt, ze_graph_handle_t graphHandle, uint32_t index, - std::vector& inputNames, - std::vector& outputNames, - std::vector& stateNames, - IONodeDescriptorMap& parameters, - IONodeDescriptorMap& results, - IONodeDescriptorMap& states) const { + std::vector& inputs, + std::vector& outputs) const { ze_graph_argument_properties_3_t arg; auto result = graphDdiTableExt->pfnGetArgumentProperties3(graphHandle, index, &arg); if (ZE_RESULT_SUCCESS != result) { @@ -1094,30 +1018,26 @@ void LevelZeroCompilerInDriver::getMetadata(TableExtension* grap uint64_t(result)); } - if (!isStateInputName(arg.name) && !isStateOutputName(arg.name)) { - if (ZE_GRAPH_ARGUMENT_TYPE_INPUT == arg.type) { - getNodeDescriptor(parameters, inputNames, arg); - } - - if (ZE_GRAPH_ARGUMENT_TYPE_OUTPUT == arg.type) { - getNodeDescriptor(results, outputNames, arg); - } + switch (arg.type) { + case ZE_GRAPH_ARGUMENT_TYPE_INPUT: { + inputs.push_back(getIODescriptor(arg, std::nullopt)); + } break; + case ZE_GRAPH_ARGUMENT_TYPE_OUTPUT: { + outputs.push_back(getIODescriptor(arg, std::nullopt)); + } break; + default: { + OPENVINO_THROW("Invalid ze_graph_argument_type_t found in ze_graph_argument_properties_3_t object: ", arg.type); + } } - - getLayoutOrStateDescriptor(parameters, results, states, stateNames, arg); } template -template > +template > void LevelZeroCompilerInDriver::getMetadata(TableExtension* graphDdiTableExt, ze_graph_handle_t graphHandle, uint32_t index, - std::vector& inputNames, - std::vector& outputNames, - std::vector& stateNames, - IONodeDescriptorMap& parameters, - IONodeDescriptorMap& results, - IONodeDescriptorMap& states) const { + std::vector& inputs, + std::vector& outputs) const { ze_graph_argument_properties_3_t arg; auto result = graphDdiTableExt->pfnGetArgumentProperties3(graphHandle, index, &arg); if (ZE_RESULT_SUCCESS != result) { @@ -1129,7 +1049,9 @@ void LevelZeroCompilerInDriver::getMetadata(TableExtension* grap uint64_t(result)); } - if (!isStateInputName(arg.name) && !isStateOutputName(arg.name)) { + std::optional optionalMetadata = std::nullopt; + + if (!isStateInputName(arg.name) && !isStateOutputName(arg.name) && !isShapeTensorName(arg.name)) { ze_graph_argument_metadata_t metadata; result = graphDdiTableExt->pfnGraphGetArgumentMetadata(graphHandle, index, &metadata); if (ZE_RESULT_SUCCESS != result) { @@ -1141,16 +1063,20 @@ void LevelZeroCompilerInDriver::getMetadata(TableExtension* grap uint64_t(result)); } - if (ZE_GRAPH_ARGUMENT_TYPE_INPUT == arg.type) { - getNodeDescriptor(parameters, inputNames, arg, metadata); - } - - if (ZE_GRAPH_ARGUMENT_TYPE_OUTPUT == arg.type) { - getNodeDescriptor(results, outputNames, arg, metadata); - } + optionalMetadata = std::optional(metadata); } - getLayoutOrStateDescriptor(parameters, results, states, stateNames, arg); + switch (arg.type) { + case ZE_GRAPH_ARGUMENT_TYPE_INPUT: { + inputs.push_back(getIODescriptor(arg, optionalMetadata)); + } break; + case ZE_GRAPH_ARGUMENT_TYPE_OUTPUT: { + outputs.push_back(getIODescriptor(arg, optionalMetadata)); + } break; + default: { + OPENVINO_THROW("Invalid ze_graph_argument_type_t found in ze_graph_argument_properties_3_t object: ", arg.type); + } + } } template @@ -1171,18 +1097,12 @@ NetworkMetadata LevelZeroCompilerInDriver::getNetworkMeta(ze_gra NetworkMetadata meta; for (uint32_t index = 0; index < graphProperties.numGraphArgs; ++index) { - getMetadata(_graphDdiTableExt, - graphHandle, - index, - meta.inputNames, - meta.outputNames, - meta.stateNames, - meta.parameters, - meta.results, - meta.states); + getMetadata(_graphDdiTableExt, graphHandle, index, meta.inputs, meta.outputs); } // TODO: support this information in CiD [track: E#33479] meta.numStreams = 1; + meta.bindRelatedDescriptors(); + return meta; } diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp index f5d7153974ccd3..1155f313a3cd60 100644 --- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp +++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp @@ -37,62 +37,52 @@ const char* NPU_PLUGIN_LIB_NAME = "openvino_intel_npu_plugin"; * Note that a stored compiled model does not hold the original IR model within it. The only related information * which may be extracted is the original model's "parameter"/"result" nodes. Thus, we need to build a dummy model * starting from these fields in order to satisfy the API. - * @param parameterDescriptors Describes the input nodes. - * @param resultDescriptors Describes the output nodes. - * @param inputNames The names of the inputs registered in the order given by the model. - * @param outputNames The names of the outputs registered in the order given by the model. - * @param isBatchingSupported Newer driver versions support batching mode on the plugin. + * + * @param inputDescriptors Describes the input nodes. + * @param outputDescriptors Describes the output nodes. + * @returns The dummy "ov::Model" composed of "parameter" and "result" nodes built using the given descriptors. */ -std::shared_ptr create_dummy_model(const IONodeDescriptorMap& parameterDescriptors, - const IONodeDescriptorMap& resultDescriptors, - const std::vector& inputNames, - const std::vector& outputNames, - bool isBatchingSupported) { +std::shared_ptr create_dummy_model(const std::vector& inputDescriptors, + const std::vector& outputDescriptors) { ov::ParameterVector parameters; ov::NodeVector results; - for (const std::string& inputName : inputNames) { - const IONodeDescriptor& parameterDescriptor = parameterDescriptors.at(inputName); + for (const IODescriptor& inputDescriptor : inputDescriptors) { + if (inputDescriptor.isStateInput || inputDescriptor.isStateOutput || inputDescriptor.isShapeTensor) { + continue; + } - std::shared_ptr parameter = [&] { - if (isBatchingSupported) { - return std::make_shared(parameterDescriptor.precision, - parameterDescriptor.originalShape); - } - return std::make_shared(parameterDescriptor.precision, - parameterDescriptor.transposedShape); - }(); + std::shared_ptr parameter = std::make_shared( + inputDescriptor.precision, + inputDescriptor.shapeFromIRModel.has_value() ? *inputDescriptor.shapeFromIRModel + : inputDescriptor.shapeFromCompiler); - parameter->set_friendly_name(parameterDescriptor.currentNodeName); - parameter->output(0).get_tensor().set_names(parameterDescriptor.outputTensorNames); + parameter->set_friendly_name(inputDescriptor.nodeFriendlyName); + parameter->output(0).get_tensor().set_names(inputDescriptor.outputTensorNames); parameters.push_back(parameter); } - // The "result" nodes require a parent node in order to satisfy the legacy API naming conventions as well (in - // the 1.0 API, the name of an output is given by the parent of the "result" node). Additionally, a dummy shape for + // The "result" nodes require a parent node in order to satisfy the API conventions. Additionally, a dummy shape for // the "Constant" node was required since the specific constructor does not accept "ov::PartialShape" values (a // constant can't have dynamic shape). The dummy tensor was also brought in order to register the correct, // potentially dynamic, output shape. - for (const std::string& outputName : outputNames) { - const IONodeDescriptor& resultDescriptor = resultDescriptors.at(outputName); + for (const IODescriptor& outputDescriptor : outputDescriptors) { + if (outputDescriptor.isStateInput || outputDescriptor.isStateOutput || outputDescriptor.isShapeTensor) { + continue; + } + std::shared_ptr constantDummy = - std::make_shared(resultDescriptor.precision, CONSTANT_NODE_DUMMY_SHAPE); - constantDummy->set_friendly_name(resultDescriptor.legacyName); - - const std::shared_ptr& tensorDummy = [&] { - if (isBatchingSupported) { - return std::make_shared(resultDescriptor.precision, - resultDescriptor.originalShape, - resultDescriptor.outputTensorNames); - } - return std::make_shared(resultDescriptor.precision, - resultDescriptor.transposedShape, - resultDescriptor.outputTensorNames); - }(); + std::make_shared(outputDescriptor.precision, CONSTANT_NODE_DUMMY_SHAPE); + + const std::shared_ptr& tensorDummy = std::make_shared( + outputDescriptor.precision, + outputDescriptor.shapeFromIRModel.has_value() ? *outputDescriptor.shapeFromIRModel + : outputDescriptor.shapeFromCompiler, + outputDescriptor.outputTensorNames); std::shared_ptr result = std::make_shared(constantDummy); result->output(0).set_tensor_ptr(tensorDummy); - result->set_friendly_name(resultDescriptor.currentNodeName); + result->set_friendly_name(outputDescriptor.nodeFriendlyName); results.push_back(result); } @@ -756,11 +746,7 @@ std::shared_ptr Plugin::import_model(std::istream& stream, c auto meta = compiler->parse(blob, localConfig); meta.name = "net" + std::to_string(_compiledModelLoadCounter++); - const std::shared_ptr modelDummy = create_dummy_model(meta.parameters, - meta.results, - meta.inputNames, - meta.outputNames, - _backends->isBatchingSupported()); + const std::shared_ptr modelDummy = create_dummy_model(meta.inputs, meta.outputs); bool profiling = localConfig.get(); diff --git a/src/plugins/intel_npu/tests/functional/shared_tests_instances/execution_graph_tests/duplicate_inputs_outputs_names.cpp b/src/plugins/intel_npu/tests/functional/shared_tests_instances/execution_graph_tests/duplicate_inputs_outputs_names.cpp new file mode 100644 index 00000000000000..f029388ab9bb02 --- /dev/null +++ b/src/plugins/intel_npu/tests/functional/shared_tests_instances/execution_graph_tests/duplicate_inputs_outputs_names.cpp @@ -0,0 +1,20 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "execution_graph_tests/duplicate_inputs_outputs_names.hpp" + +#include "common/npu_test_env_cfg.hpp" +#include "common/utils.hpp" +#include "common_test_utils/test_constants.hpp" + +using namespace ExecutionGraphTests; + +namespace { + +INSTANTIATE_TEST_SUITE_P(smoke_BehaviorTests, + ExecGraphDuplicateInputsOutputsNames, + ::testing::Values(ov::test::utils::DEVICE_NPU), + ov::test::utils::appendPlatformTypeTestName); + +} // namespace diff --git a/src/tests/functional/plugin/shared/include/execution_graph_tests/duplicate_inputs_outputs_names.hpp b/src/tests/functional/plugin/shared/include/execution_graph_tests/duplicate_inputs_outputs_names.hpp new file mode 100644 index 00000000000000..b6a7f3fcab038b --- /dev/null +++ b/src/tests/functional/plugin/shared/include/execution_graph_tests/duplicate_inputs_outputs_names.hpp @@ -0,0 +1,15 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "gtest/gtest.h" + +namespace ExecutionGraphTests { + +class ExecGraphDuplicateInputsOutputsNames + : public testing::TestWithParam { +public: + static std::string getTestCaseName(testing::TestParamInfo obj); +}; + +} // namespace ExecutionGraphTests diff --git a/src/tests/functional/plugin/shared/src/execution_graph_tests/duplicate_inputs_outputs_names.cpp b/src/tests/functional/plugin/shared/src/execution_graph_tests/duplicate_inputs_outputs_names.cpp new file mode 100644 index 00000000000000..879675eeb3e201 --- /dev/null +++ b/src/tests/functional/plugin/shared/src/execution_graph_tests/duplicate_inputs_outputs_names.cpp @@ -0,0 +1,118 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "execution_graph_tests/duplicate_inputs_outputs_names.hpp" + +#include "functional_test_utils/skip_tests_config.hpp" +#include "openvino/core/model.hpp" +#include "openvino/op/add.hpp" +#include "openvino/op/multiply.hpp" +#include "openvino/runtime/core.hpp" + +namespace { + +constexpr char DUMMY_NAME[] = "dummy_name"; + +} // namespace + +namespace ExecutionGraphTests { + +std::string ExecGraphDuplicateInputsOutputsNames::getTestCaseName(testing::TestParamInfo obj) { + std::string targetDevice = obj.param; + return "Dev=" + targetDevice; +} + +/** + * Checks whether running predictions on a model containing duplicate names within its inputs/outputs yields the same + * result as when using unique names for the same architecture. + */ +TEST_P(ExecGraphDuplicateInputsOutputsNames, CheckOutputsMatch) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + + const std::string device_name = this->GetParam(); + const ov::element::Type precision = ov::element::f32; + const ov::Shape shape = {3, 2}; + float input_data1[] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0}; + float input_data2[] = {2.0, 2.0, 2.0, 2.0, 2.0, 2.0}; + const ov::Tensor input_tensor1{precision, shape, input_data1}; + const ov::Tensor input_tensor2{precision, shape, input_data2}; + + // A simple graph with 2 inputs and 2 outputs + auto input1 = std::make_shared(precision, shape); + auto input2 = std::make_shared(precision, shape); + auto sum = std::make_shared(input1, input2); + auto mul = std::make_shared(input1, input2); + auto output1 = std::make_shared(sum->get_default_output()); + auto output2 = std::make_shared(mul->get_default_output()); + + // Set the same name for all inputs/outputs + input1->set_friendly_name(DUMMY_NAME); + input2->set_friendly_name(DUMMY_NAME); + input1->get_output_tensor(0).set_names({DUMMY_NAME}); + input2->get_output_tensor(0).set_names({DUMMY_NAME}); + + output1->set_friendly_name(DUMMY_NAME); + output2->set_friendly_name(DUMMY_NAME); + output1->get_input_tensor(0).set_names({DUMMY_NAME}); + output2->get_input_tensor(0).set_names({DUMMY_NAME}); + + auto model = std::make_shared(ov::ResultVector{output1, output2}, + ov::ParameterVector{input1, input2}, + "SimpleNetwork1"); + + // Load the plugin, compile the model and run a single prediction + auto core = ov::Core(); + ov::CompiledModel compiled_model_duplicate_names = core.compile_model(model, device_name); + ov::InferRequest inference_request_duplicate_names = compiled_model_duplicate_names.create_infer_request(); + + inference_request_duplicate_names.set_tensor(compiled_model_duplicate_names.input(0), input_tensor1); + inference_request_duplicate_names.set_tensor(compiled_model_duplicate_names.input(1), input_tensor2); + inference_request_duplicate_names.infer(); + + const ov::Tensor output_tensor1 = + inference_request_duplicate_names.get_tensor(compiled_model_duplicate_names.output(0)); + const ov::Tensor output_tensor2 = + inference_request_duplicate_names.get_tensor(compiled_model_duplicate_names.output(1)); + const float* output_buffer1 = output_tensor1.data(); + const float* output_buffer2 = output_tensor2.data(); + + // Rebuild the model using unique names for inputs/outputs + size_t name_index = 0; + input1->set_friendly_name(DUMMY_NAME + std::to_string(name_index++)); + input2->set_friendly_name(DUMMY_NAME + std::to_string(name_index++)); + input1->get_output_tensor(0).set_names({DUMMY_NAME + std::to_string(name_index++)}); + input2->get_output_tensor(0).set_names({DUMMY_NAME + std::to_string(name_index++)}); + + output1->set_friendly_name(DUMMY_NAME + std::to_string(name_index++)); + output2->set_friendly_name(DUMMY_NAME + std::to_string(name_index++)); + output1->get_input_tensor(0).set_names({DUMMY_NAME + std::to_string(name_index++)}); + output2->get_input_tensor(0).set_names({DUMMY_NAME + std::to_string(name_index)}); + + model = std::make_shared(ov::ResultVector{output1, output2}, + ov::ParameterVector{input1, input2}, + "SimpleNetwork2"); + + // Compile the new model and run a single prediction + ov::CompiledModel compiled_model_unique_names = core.compile_model(model, device_name); + ov::InferRequest inference_request_unique_names = compiled_model_unique_names.create_infer_request(); + + inference_request_unique_names.set_tensor(input1, input_tensor1); + inference_request_unique_names.set_tensor(input2, input_tensor2); + inference_request_unique_names.infer(); + + const ov::Tensor reference_tensor1 = + inference_request_unique_names.get_tensor(compiled_model_unique_names.output(0)); + const ov::Tensor reference_tensor2 = + inference_request_unique_names.get_tensor(compiled_model_unique_names.output(1)); + const float* reference_buffer1 = reference_tensor1.data(); + const float* reference_buffer2 = reference_tensor2.data(); + + // Both models are using the same architecture, thus the results should match + for (size_t element_index = 0; element_index < shape_size(shape); ++element_index) { + ASSERT_EQ(output_buffer1[element_index], reference_buffer1[element_index]); + ASSERT_EQ(output_buffer2[element_index], reference_buffer2[element_index]); + } +} + +} // namespace ExecutionGraphTests