-
Notifications
You must be signed in to change notification settings - Fork 2.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[NPU] Switching the I/O identification convention to indices #24248
Changes from 32 commits
4bef825
9321da5
64fa079
01a36d7
f6ead2c
4ec8608
6184d77
ed66429
7b4064e
008f6bf
0c6e4b9
64095cc
518d2fe
0171e44
01aa897
b470022
c6dcdd2
1f77085
49736c5
cc738a9
ae37396
501b6d1
a8546f8
a6161ae
3fb4e45
dfa2cc7
a0056a6
e717416
60f4fef
593afa0
925809b
8f3251b
e5960a1
3b8d69b
92e79eb
d9334df
a2998f4
3d0a194
c110096
4b0246d
c2f47b5
8543438
860552c
7c106e3
4853fd7
d3e1657
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
// Copyright (C) 2018-2024 Intel Corporation | ||
// SPDX-License-Identifier: Apache-2.0 | ||
// | ||
|
||
#include "execution_graph_tests/duplicate_inputs_outputs_names.hpp" | ||
|
||
#include "common_test_utils/test_constants.hpp" | ||
|
||
using namespace ExecutionGraphTests; | ||
|
||
namespace { | ||
|
||
INSTANTIATE_TEST_SUITE_P(smoke_duplicateInputsOutputsNames, | ||
ExecGraphDuplicateInputsOutputsNames, | ||
::testing::Values(ov::test::utils::DEVICE_CPU), | ||
ExecGraphDuplicateInputsOutputsNames::getTestCaseName); | ||
|
||
} // namespace |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -92,56 +92,32 @@ class SyncInferRequest : public ov::IInferRequest { | |
*/ | ||
void initialize_states(); | ||
|
||
protected: | ||
/** | ||
* @return The state tensors accessible by their names. | ||
*/ | ||
std::unordered_map<std::string, std::shared_ptr<VariableState>>& get_variable_states() { | ||
return _variableStates; | ||
} | ||
|
||
/** | ||
* @return The names used by the inputs in the order registered inside the model. | ||
*/ | ||
std::vector<std::string> get_input_names() { | ||
return _metadata.inputNames; | ||
} | ||
|
||
/** | ||
* @return The names used by the outputs in the order registered inside the model. | ||
*/ | ||
std::vector<std::string> get_output_names() { | ||
return _metadata.outputNames; | ||
} | ||
|
||
/** | ||
* @return The names used by the state variables in the order registered inside the model. | ||
* @see ov::ISyncInferRequest | ||
*/ | ||
std::vector<std::string> get_state_names() { | ||
return _metadata.stateNames; | ||
} | ||
struct FoundPort { | ||
size_t idx; | ||
enum class Type { NOT_FOUND = 0, INPUT, OUTPUT } type; | ||
|
||
/** | ||
* @return The names used by the shape variables in the order registered inside the model. | ||
*/ | ||
std::vector<std::string> get_shape_names() { | ||
return _metadata.shapeNames; | ||
} | ||
bool found() { | ||
return type != Type::NOT_FOUND; | ||
} | ||
bool is_input() { | ||
return type == Type::INPUT; | ||
} | ||
bool is_output() { | ||
return !is_input(); | ||
} | ||
}; | ||
|
||
/** | ||
* @return A map holding references towards all tensors used by the current inference request object. | ||
* @brief Finds input or output port | ||
* @return structure which contains index of Input/Output or report that port wasn't found | ||
* @see ov::ISyncInferRequest | ||
*/ | ||
std::unordered_map<std::string, std::shared_ptr<ov::ITensor>>& get_all_tensors() { | ||
return _allTensors; | ||
} | ||
FoundPort find_port(const ov::Output<const ov::Node>& port) const; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we expect this to be used in IMD? If not let's consider moving into ZeroInferRequest There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, it is essential for IMD too. The method is used inside the |
||
|
||
/** | ||
* @return A map holding references towards all shapes tensors used by the current inference request object. | ||
*/ | ||
std::unordered_map<std::string, std::shared_ptr<ov::ITensor>>& get_shapes_tensors() { | ||
return _shapesTensors; | ||
} | ||
|
||
protected: | ||
/** | ||
* @brief Basic checks for input/output tensor | ||
* | ||
|
@@ -163,45 +139,40 @@ class SyncInferRequest : public ov::IInferRequest { | |
virtual void check_network_precision(const ov::element::Type_t precision) const = 0; | ||
|
||
/** | ||
* @brief Indicates a kind of provided tensor. Marks special tensors, used for internal implementation | ||
*/ | ||
enum class TensorType { InputOrOutput, Shape, State }; | ||
|
||
/** | ||
* @brief Allocates a tensor on host and stores the reference inside the "_allTensors" attribute. If a buffer | ||
* address is provided, then the tensor is built upon it and no additional data buffer is allocated. | ||
* @param tensorName The name by which the tensor shall be identified | ||
* @brief Allocates a tensor on host and stores the reference inside multiple attributes. | ||
* @param descriptor Tensor's metadata | ||
* @param isState If true, the tensor shall also be stored inside the state variables map. In this case, adding the | ||
* tensor to this structure would be required in order to correctly answer the state queries. | ||
* @param index The index which the allocated tensor shall use. | ||
* @param isInput Determines the containers in which the newly allocated tensors will be stored. | ||
* @param allocator If provided, the tensor uses the custom allocator instead of using the default one. | ||
* @param batchSize If provided, the value of the shape on the 0th axis is overriden with this value. | ||
* @return Pointer towards the allocated tensor | ||
*/ | ||
void allocate_tensor(std::string tensorName, | ||
const IONodeDescriptor& descriptor, | ||
TensorType tensorType = TensorType::InputOrOutput, | ||
const ov::Allocator& allocator = {}) const; | ||
|
||
// Mutable to return reference to ov::Tensor | ||
mutable std::unordered_map<std::string, std::shared_ptr<ov::ITensor>> _allTensors; | ||
mutable std::unordered_map<std::string, std::shared_ptr<ov::ITensor>> _shapesTensors; | ||
// A copy of each tensor is needed to maintain the original L0 memory allocation in case the user provides another | ||
// memory area for the tensor. | ||
mutable std::unordered_map<std::string, std::shared_ptr<ov::ITensor>> _copyAllTensors; | ||
|
||
mutable std::unordered_map<std::string, std::shared_ptr<VariableState>> _variableStates; | ||
std::shared_ptr<ov::ITensor> allocate_tensor(const IODescriptor& descriptor, | ||
const size_t index, | ||
const bool isInput, | ||
const ov::Allocator& allocator = {}, | ||
const std::optional<std::size_t> batchSize = std::nullopt) const; | ||
|
||
// This is intel_npu::ICompiledModel pointer, but need to use OV base class because | ||
// ov::IInferRequest::get_compiled_model returns a refernce to shared_ptr! | ||
std::shared_ptr<const ov::ICompiledModel> _compiledModel; | ||
|
||
NetworkMetadata _metadata; | ||
|
||
// Stored in order to avoid additional processing when launching inferences | ||
std::vector<std::string> _inputAndStateInputNames; | ||
std::vector<std::string> _outputAndStateOutputNames; | ||
mutable std::vector<std::shared_ptr<ov::ITensor>> _userInputTensors; | ||
mutable std::vector<std::shared_ptr<ov::ITensor>> _userOutputTensors; | ||
|
||
std::unordered_map<std::string, std::string> _nodeNameToLegacyName; | ||
std::unordered_map<std::string, std::string> _legacyNameToNodeName; | ||
mutable std::vector<ov::SoPtr<ov::IVariableState>> _variableStates; | ||
|
||
/** | ||
* @see ov::ISyncInferRequest | ||
*/ | ||
mutable std::unordered_map<size_t, FoundPort> _cachedPorts; | ||
|
||
/** | ||
* @see ov::ISyncInferRequest | ||
*/ | ||
mutable std::mutex _cacheMutex; | ||
}; | ||
|
||
} // namespace intel_npu |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
// Copyright (C) 2018-2024 Intel Corporation | ||
// SPDX-License-Identifier: Apache-2.0 | ||
// | ||
|
||
#include "intel_npu/al/icompiler.hpp" | ||
|
||
namespace intel_npu { | ||
|
||
std::optional<size_t> NetworkMetadata::findByName(const std::vector<IODescriptor>& descriptors, | ||
const std::string_view targetName) { | ||
for (size_t descriptorIndex = 0; descriptorIndex < descriptors.size(); ++descriptorIndex) { | ||
if (descriptors.at(descriptorIndex).nameFromCompiler == targetName) { | ||
return descriptorIndex; | ||
} | ||
} | ||
|
||
return std::nullopt; | ||
} | ||
|
||
void NetworkMetadata::bindRelatedDescriptors() { | ||
size_t ioIndex = 0; | ||
|
||
for (IODescriptor& input : inputs) { | ||
if (input.relatedDescriptorIndex.has_value()) { | ||
++ioIndex; | ||
continue; | ||
} | ||
|
||
if (input.isStateInput) { | ||
const std::optional<size_t> relatedDescriptorIndex = findByName(outputs, input.nameFromCompiler); | ||
|
||
if (relatedDescriptorIndex.has_value()) { | ||
input.relatedDescriptorIndex = relatedDescriptorIndex; | ||
outputs.at(*relatedDescriptorIndex).relatedDescriptorIndex = std::optional(ioIndex); | ||
} | ||
} else if (input.isShapeTensor) { | ||
const std::optional<size_t> relatedDescriptorIndex = findByName(inputs, input.nameFromCompiler); | ||
|
||
if (relatedDescriptorIndex.has_value() && *relatedDescriptorIndex != ioIndex) { | ||
input.relatedDescriptorIndex = relatedDescriptorIndex; | ||
inputs.at(*relatedDescriptorIndex).relatedDescriptorIndex = std::optional(ioIndex); | ||
} | ||
} | ||
|
||
++ioIndex; | ||
} | ||
|
||
ioIndex = 0; | ||
|
||
for (IODescriptor& output : outputs) { | ||
if (output.relatedDescriptorIndex.has_value()) { | ||
++ioIndex; | ||
continue; | ||
} | ||
|
||
if (output.isShapeTensor) { | ||
const std::optional<size_t> relatedDescriptorIndex = findByName(outputs, output.nameFromCompiler); | ||
|
||
if (relatedDescriptorIndex.has_value() && *relatedDescriptorIndex != ioIndex) { | ||
output.relatedDescriptorIndex = relatedDescriptorIndex; | ||
outputs.at(*relatedDescriptorIndex).relatedDescriptorIndex = std::optional(ioIndex); | ||
} | ||
} | ||
|
||
++ioIndex; | ||
} | ||
} | ||
|
||
} // namespace intel_npu |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Most of the changes snowballed from here.