Skip to content

Commit

Permalink
[NPU] Switching the I/O identification convention to indices (openvin…
Browse files Browse the repository at this point in the history
…otoolkit#24248)

### Details:
- Please see PR#10348 (compiler repository) for a detailed description
and some extra validation.

### Tickets:
 - *CVS-142751*
  • Loading branch information
razvanapetroaie authored Aug 12, 2024
1 parent 033a515 commit e567c9e
Show file tree
Hide file tree
Showing 22 changed files with 1,098 additions and 902 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
// Copyright (C) 2018-2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#include "execution_graph_tests/duplicate_inputs_outputs_names.hpp"

#include "common_test_utils/test_constants.hpp"

using namespace ExecutionGraphTests;

namespace {

INSTANTIATE_TEST_SUITE_P(smoke_duplicateInputsOutputsNames,
ExecGraphDuplicateInputsOutputsNames,
::testing::Values(ov::test::utils::DEVICE_CPU),
ExecGraphDuplicateInputsOutputsNames::getTestCaseName);

} // namespace
124 changes: 92 additions & 32 deletions src/plugins/intel_npu/src/al/include/intel_npu/al/icompiler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include <memory>
#include <set>
#include <string>
#include <string_view>
#include <unordered_map>
#include <unordered_set>

Expand All @@ -22,48 +23,107 @@
namespace intel_npu {

/**
* @brief A helper structure used for storing the metadata found within the I/O nodes.
* @details The "legacyName" attribute holds the name most commonly used as map key for multiple structures.
* This value also corresponds to the identifier used by the OpenVINO 1.0 API.
*
* "originalShape" corresponds to the shape registered in the graph, while "transposedShape" holds the shape obtained
* upon applying a transposition corresponding to the legacy layout value. Use the "transposedShape" one if not sure
* which one you need.
* @brief A helper structure used for storing metadata corresponding to one input/output entry.
*/
struct IONodeDescriptor {
std::string legacyName;
std::string currentNodeName;
struct IODescriptor {
/**
* @brief The name of the input/output assigned by the compiler.
* @details This value may differ from other name attributes:
* - The compiler could have created additional inputs/outputs (e.g. for representing states). These are not
* found in the original IR model.
* - The compiler may append indices to names in the case where duplicate names are found.
* @note The prefixes introduced by the compiler in order to differentiate the special cases (e.g. states and shape
* tensors) were removed prior to initializing this field.
*/
std::string nameFromCompiler;

ov::element::Type precision;

ov::PartialShape shapeFromCompiler;

/**
* @brief If set to "true", the current object describes a buffer which may be used for altering a state tensor.
* @details This flag is set if the compiler prefixed the name using a "read value" prefix. The state input and
* state output descriptors are also tied using the "relatedDescriptorIndex" attribute.
*/
bool isStateInput = false;

/**
* @brief If set to "true", the current object describes a buffer which reflects the value of a state tensor.
* @details This flag is set if the compiler prefixed the name using an "assign" prefix. The state input and
* state output descriptors are also tied using the "relatedDescriptorIndex" attribute.
*/
bool isStateOutput = false;

/**
* @brief If set to "true", the buffer of the tensor described here contains as value the shape of the referenced
* tensor.
* @details This flag is set if the compiler prefixed the name using a "shape" prefix.
*
* The referenced tensor bears the same name ("nameFromCompiler"), but its "isShapeTensor" value is set to
* "false". The two descriptors are also tied using the "relatedDescriptorIndex" attribute.
*/
bool isShapeTensor = false;

/**
* @brief Points towards a related descriptor.
* @details The related descriptors are defined by (state input, state output) or (dynamic tensor, shape tensor)
* pairs.
*/
std::optional<size_t> relatedDescriptorIndex;

/**
* @brief The friendly name of the node extracted from the IR model.
* @details In some cases, this field is required for constructing a dummy model which uses the same input/output
* metadata as the original IR model.
*
* This field may be empty if the I/O entry is not found in the original IR model (i.e. the entry was added by the
* compiler).
*/
std::string nodeFriendlyName;

/**
* @brief The names of the output tensors extracted from the IR model.
* @details In some cases, this field is required for constructing a dummy model which uses the same input/output
* metadata as the original IR model.
*
* This field may be empty if the I/O entry is not found in the original IR model (i.e. the entry was added by the
* compiler).
*/
std::unordered_set<std::string> outputTensorNames;
ov::element::Type_t precision;
ov::PartialShape originalShape;
ov::PartialShape transposedShape;
};

/**
* @brief A helper map to represent descriptions for inputs and outputs
* of a network
*/
using IONodeDescriptorMap = std::unordered_map<std::string, IONodeDescriptor>;
/**
* @brief The shape extracted from the IR model.
* @details The values may differ from the ones found in "shapeFromCompiler" if batching is to be handled by the
* plugin.
*
* This field may be empty if the I/O entry is not found in the original IR model (i.e. the entry was added
* by the compiler).
*/
std::optional<ov::PartialShape> shapeFromIRModel = std::nullopt;
};

struct NetworkMetadata final {
std::string name;

std::vector<std::string> inputNames;
std::vector<std::string> outputNames;
std::vector<std::string> stateNames;
std::vector<std::string> shapeNames;
std::vector<IODescriptor> inputs;
std::vector<IODescriptor> outputs;
std::vector<IODescriptor> profilingOutputs;

IONodeDescriptorMap parameters;
IONodeDescriptorMap results;
IONodeDescriptorMap states;
IONodeDescriptorMap shapes;
IONodeDescriptorMap profilingOutputs;
size_t numStreams = 1;

std::unordered_map<std::string, size_t> inputOrder;
std::unordered_map<std::string, size_t> outputOrder;
/**
* @brief Binds the (state input, state output) and (dynamic tensor, shape tensor) pairs using the
* "relatedDescriptorIndex" attribute.
* @details For state inputs, the "relatedDescriptorIndex" value is set to the index of the output which bears the
* same name. The reverse is also applied.
*
* For shape tensors, the lookup is performed in the same container (inputs or outputs). The value is once again set
* to the index of the entry which bears the same name.
*/
void bindRelatedDescriptors();

int numStreams = 1;
};
}; // namespace intel_npu

/**
* @struct NetworkDescription
Expand Down
113 changes: 42 additions & 71 deletions src/plugins/intel_npu/src/al/include/sync_infer_request.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -92,56 +92,32 @@ class SyncInferRequest : public ov::IInferRequest {
*/
void initialize_states();

protected:
/**
* @return The state tensors accessible by their names.
*/
std::unordered_map<std::string, std::shared_ptr<VariableState>>& get_variable_states() {
return _variableStates;
}

/**
* @return The names used by the inputs in the order registered inside the model.
*/
std::vector<std::string> get_input_names() {
return _metadata.inputNames;
}

/**
* @return The names used by the outputs in the order registered inside the model.
*/
std::vector<std::string> get_output_names() {
return _metadata.outputNames;
}

/**
* @return The names used by the state variables in the order registered inside the model.
* @see ov::ISyncInferRequest
*/
std::vector<std::string> get_state_names() {
return _metadata.stateNames;
}
struct FoundPort {
size_t idx;
enum class Type { NOT_FOUND = 0, INPUT, OUTPUT } type;

/**
* @return The names used by the shape variables in the order registered inside the model.
*/
std::vector<std::string> get_shape_names() {
return _metadata.shapeNames;
}
bool found() {
return type != Type::NOT_FOUND;
}
bool is_input() {
return type == Type::INPUT;
}
bool is_output() {
return !is_input();
}
};

/**
* @return A map holding references towards all tensors used by the current inference request object.
* @brief Finds input or output port
* @return structure which contains index of Input/Output or report that port wasn't found
* @see ov::ISyncInferRequest
*/
std::unordered_map<std::string, std::shared_ptr<ov::ITensor>>& get_all_tensors() {
return _allTensors;
}
FoundPort find_port(const ov::Output<const ov::Node>& port) const;

/**
* @return A map holding references towards all shapes tensors used by the current inference request object.
*/
std::unordered_map<std::string, std::shared_ptr<ov::ITensor>>& get_shapes_tensors() {
return _shapesTensors;
}

protected:
/**
* @brief Basic checks for input/output tensor
*
Expand All @@ -163,45 +139,40 @@ class SyncInferRequest : public ov::IInferRequest {
virtual void check_network_precision(const ov::element::Type_t precision) const = 0;

/**
* @brief Indicates a kind of provided tensor. Marks special tensors, used for internal implementation
*/
enum class TensorType { InputOrOutput, Shape, State };

/**
* @brief Allocates a tensor on host and stores the reference inside the "_allTensors" attribute. If a buffer
* address is provided, then the tensor is built upon it and no additional data buffer is allocated.
* @param tensorName The name by which the tensor shall be identified
* @brief Allocates a tensor on host and stores the reference inside multiple attributes.
* @param descriptor Tensor's metadata
* @param isState If true, the tensor shall also be stored inside the state variables map. In this case, adding the
* tensor to this structure would be required in order to correctly answer the state queries.
* @param index The index which the allocated tensor shall use.
* @param isInput Determines the containers in which the newly allocated tensors will be stored.
* @param allocator If provided, the tensor uses the custom allocator instead of using the default one.
* @param batchSize If provided, the value of the shape on the 0th axis is overriden with this value.
* @return Pointer towards the allocated tensor
*/
void allocate_tensor(std::string tensorName,
const IONodeDescriptor& descriptor,
TensorType tensorType = TensorType::InputOrOutput,
const ov::Allocator& allocator = {}) const;

// Mutable to return reference to ov::Tensor
mutable std::unordered_map<std::string, std::shared_ptr<ov::ITensor>> _allTensors;
mutable std::unordered_map<std::string, std::shared_ptr<ov::ITensor>> _shapesTensors;
// A copy of each tensor is needed to maintain the original L0 memory allocation in case the user provides another
// memory area for the tensor.
mutable std::unordered_map<std::string, std::shared_ptr<ov::ITensor>> _copyAllTensors;

mutable std::unordered_map<std::string, std::shared_ptr<VariableState>> _variableStates;
std::shared_ptr<ov::ITensor> allocate_tensor(const IODescriptor& descriptor,
const size_t index,
const bool isInput,
const ov::Allocator& allocator = {},
const std::optional<std::size_t> batchSize = std::nullopt) const;

// This is intel_npu::ICompiledModel pointer, but need to use OV base class because
// ov::IInferRequest::get_compiled_model returns a refernce to shared_ptr!
std::shared_ptr<const ov::ICompiledModel> _compiledModel;

NetworkMetadata _metadata;

// Stored in order to avoid additional processing when launching inferences
std::vector<std::string> _inputAndStateInputNames;
std::vector<std::string> _outputAndStateOutputNames;
mutable std::vector<std::shared_ptr<ov::ITensor>> _userInputTensors;
mutable std::vector<std::shared_ptr<ov::ITensor>> _userOutputTensors;

std::unordered_map<std::string, std::string> _nodeNameToLegacyName;
std::unordered_map<std::string, std::string> _legacyNameToNodeName;
mutable std::vector<ov::SoPtr<ov::IVariableState>> _variableStates;

/**
* @see ov::ISyncInferRequest
*/
mutable std::unordered_map<size_t, FoundPort> _cachedPorts;

/**
* @see ov::ISyncInferRequest
*/
mutable std::mutex _cacheMutex;
};

} // namespace intel_npu
67 changes: 67 additions & 0 deletions src/plugins/intel_npu/src/al/src/icompiler.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
// Copyright (C) 2018-2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#include "intel_npu/al/icompiler.hpp"

namespace intel_npu {

void NetworkMetadata::bindRelatedDescriptors() {
size_t ioIndex = 0;

for (IODescriptor& input : inputs) {
if (input.relatedDescriptorIndex.has_value()) {
++ioIndex;
continue;
}

if (input.isStateInput) {
const auto relatedDescriptorIterator =
std::find_if(outputs.begin(), outputs.end(), [&](const IODescriptor& output) {
return output.isStateOutput && (output.nameFromCompiler == input.nameFromCompiler);
});

if (relatedDescriptorIterator != outputs.end()) {
input.relatedDescriptorIndex = std::distance(outputs.begin(), relatedDescriptorIterator);
outputs.at(*input.relatedDescriptorIndex).relatedDescriptorIndex = ioIndex;
}
} else if (input.isShapeTensor) {
const auto relatedDescriptorIterator =
std::find_if(inputs.begin(), inputs.end(), [&](const IODescriptor& candidate) {
return !candidate.isShapeTensor && (candidate.nameFromCompiler == input.nameFromCompiler);
});

if (relatedDescriptorIterator != inputs.end()) {
input.relatedDescriptorIndex = std::distance(inputs.begin(), relatedDescriptorIterator);
inputs.at(*input.relatedDescriptorIndex).relatedDescriptorIndex = ioIndex;
}
}

++ioIndex;
}

ioIndex = 0;

for (IODescriptor& output : outputs) {
if (output.relatedDescriptorIndex.has_value()) {
++ioIndex;
continue;
}

if (output.isShapeTensor) {
const auto relatedDescriptorIterator =
std::find_if(outputs.begin(), outputs.end(), [&](const IODescriptor& candidate) {
return !candidate.isShapeTensor && (candidate.nameFromCompiler == output.nameFromCompiler);
});

if (relatedDescriptorIterator != outputs.end()) {
output.relatedDescriptorIndex = std::distance(outputs.begin(), relatedDescriptorIterator);
outputs.at(*output.relatedDescriptorIndex).relatedDescriptorIndex = ioIndex;
}
}

++ioIndex;
}
}

} // namespace intel_npu
Loading

0 comments on commit e567c9e

Please sign in to comment.