Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Eisw 121295 indices as ids poc backup #24

Closed
Changes from 1 commit
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
e1211f3
Renaming the I/O descriptor structure
razvanapetroaie Apr 25, 2024
74871bb
Using indices for serializing the I/O metadata when the compiler vers…
razvanapetroaie May 1, 2024
74a3764
Reintroducing an additional shape attribute inside the "IODescriptor"
razvanapetroaie May 1, 2024
23710dd
Updating the I/O metadata extraction performed inside the plugin-driv…
razvanapetroaie May 2, 2024
ac73acd
Refactoring the "SyncInferRequest" class
razvanapetroaie May 2, 2024
64ba3cd
Refactoring the Level Zero backend
razvanapetroaie May 2, 2024
aa19290
Adding back support for stateful and dynamic models
razvanapetroaie May 7, 2024
d3612aa
Refactoring the "checkLevelZeroAttributesMatch" function
razvanapetroaie May 8, 2024
64fe760
Fixing the accuracy issues
razvanapetroaie May 8, 2024
3860ffc
Removing a couple of unused functions
razvanapetroaie May 8, 2024
59a019c
Adding more comments
razvanapetroaie May 9, 2024
e3157ea
Constraining the input/output entries inside the dummy OV model const…
razvanapetroaie May 12, 2024
81f52d3
Fixing the batching implementation
razvanapetroaie May 13, 2024
db634de
Fixing the "getBatchSize" function
razvanapetroaie May 13, 2024
7997aa0
Removing some unused code passages
razvanapetroaie May 15, 2024
7b0a66a
Restoring the optional "shapeFromIRModel" due to potential driver bug
razvanapetroaie May 20, 2024
2f59b32
Adding an extra log message in the batching implementation
razvanapetroaie May 20, 2024
5a502be
Adding more comments
razvanapetroaie May 20, 2024
4432a71
Adding a test checking whether models using duplicate node names work…
razvanapetroaie May 27, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Restoring the optional "shapeFromIRModel" due to potential driver bug
razvanapetroaie committed May 27, 2024
commit 7b0a66af3d3e047d339e55ca3cf45f6467b9d9a0
Original file line number Diff line number Diff line change
@@ -100,7 +100,7 @@ struct IODescriptor {
* This field may be empty if the I/O entry is not found in the original IR model (i.e. the entry was added
* by the compiler).
*/
ov::PartialShape shapeFromIRModel;
std::optional<ov::PartialShape> shapeFromIRModel;
};

struct NetworkMetadata final {
12 changes: 10 additions & 2 deletions src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp
Original file line number Diff line number Diff line change
@@ -65,7 +65,12 @@ void checkLevelZeroAttributesMatch(const IODescriptor& ioDescriptor,
} // namespace

size_t ZeroInferRequest::getBatchSize(const NetworkMetadata& metadata) {
const ov::PartialShape& firstOutputShape = metadata.outputs.at(0).shapeFromIRModel;
if (!metadata.outputs.at(0).shapeFromIRModel.has_value()) {
_logger.info("Batching on the plugin is not used, batching is handled by the compiler");
return DEFAULT_BATCH_SIZE;
}

const ov::PartialShape& firstOutputShape = *metadata.outputs.at(0).shapeFromIRModel;
if (firstOutputShape.is_dynamic()) {
_logger.info("Networks using dynamic shapes are not supported when batching is handled by the plugin");
return DEFAULT_BATCH_SIZE;
@@ -84,8 +89,11 @@ size_t ZeroInferRequest::getBatchSize(const NetworkMetadata& metadata) {

auto checkDescriptorsUseCandidateBatchSize = [candidateBatchSize](const std::vector<IODescriptor>& descriptors) {
for (const IODescriptor& descriptor : descriptors) {
OPENVINO_ASSERT(descriptor.shapeFromIRModel.has_value(),
"Missing value for the \"shapeFromIRModel\" attribute, I/O descriptor");

const ov::PartialShape& shapeFromCompiler = descriptor.shapeFromCompiler;
const ov::PartialShape& shapeFromIRModel = descriptor.shapeFromIRModel;
const ov::PartialShape& shapeFromIRModel = *descriptor.shapeFromIRModel;

if (shapeFromCompiler.is_dynamic() || shapeFromCompiler.rank().get_length() == 0 ||
*shapeFromCompiler.begin() != DEFAULT_BATCH_SIZE) {
Original file line number Diff line number Diff line change
@@ -32,7 +32,7 @@ namespace driverCompilerAdapter {
std::is_same<T, ze_graph_dditable_ext_1_4_t>::value)

// For ext version >= 1.6, originalShape is avaible
#define NotSupportOriginalShape(T) \
#define NotSupportArgumentMetadata(T) \
(std::is_same<T, ze_graph_dditable_ext_1_2_t>::value || std::is_same<T, ze_graph_dditable_ext_1_3_t>::value || \
std::is_same<T, ze_graph_dditable_ext_1_4_t>::value || std::is_same<T, ze_graph_dditable_ext_1_5_t>::value)

@@ -86,6 +86,14 @@ class LevelZeroCompilerInDriver final : public IExternalCompiler {
std::vector<uint8_t> serializeIR(IR& irModel, ze_graph_compiler_version_info_t compilerVersion) const;
std::string serializeConfig(const Config& config, ze_graph_compiler_version_info_t& compilerVersion) const;

template <typename T = TableExtension, typename std::enable_if_t<NotSupportArgumentMetadata(T), bool> = true>
void getMetadata(TableExtension* graphDdiTableExt,
ze_graph_handle_t graphHandle,
uint32_t index,
std::vector<IODescriptor>& inputs,
std::vector<IODescriptor>& outputs) const;

template <typename T = TableExtension, typename std::enable_if_t<!NotSupportArgumentMetadata(T), bool> = true>
void getMetadata(TableExtension* graphDdiTableExt,
ze_graph_handle_t graphHandle,
uint32_t index,
Original file line number Diff line number Diff line change
@@ -881,10 +881,42 @@ static IODescriptor getIODescriptor(const ze_graph_argument_properties_3_t& arg,
std::nullopt,
arg.debug_friendly_name,
std::move(outputTensorNames),
std::move(shapeFromIRModel)};
std::optional(shapeFromIRModel)};
}

template <typename TableExtension>
template <typename T, std::enable_if_t<NotSupportArgumentMetadata(T), bool>>
void LevelZeroCompilerInDriver<TableExtension>::getMetadata(TableExtension* graphDdiTableExt,
ze_graph_handle_t graphHandle,
uint32_t index,
std::vector<IODescriptor>& inputs,
std::vector<IODescriptor>& outputs) const {
ze_graph_argument_properties_3_t arg;
auto result = graphDdiTableExt->pfnGetArgumentProperties3(graphHandle, index, &arg);
if (ZE_RESULT_SUCCESS != result) {
OPENVINO_THROW("L0 pfnGetArgumentProperties3",
" result: ",
ze_result_to_string(result),
", code 0x",
std::hex,
uint64_t(result));
}

switch (arg.type) {
case ZE_GRAPH_ARGUMENT_TYPE_INPUT: {
inputs.push_back(getIODescriptor(arg, std::nullopt));
} break;
case ZE_GRAPH_ARGUMENT_TYPE_OUTPUT: {
outputs.push_back(getIODescriptor(arg, std::nullopt));
} break;
default: {
OPENVINO_THROW("Invalid ze_graph_argument_type_t found in ze_graph_argument_properties_3_t object: ", arg.type);
}
}
}

template <typename TableExtension>
template <typename T, std::enable_if_t<!NotSupportArgumentMetadata(T), bool>>
void LevelZeroCompilerInDriver<TableExtension>::getMetadata(TableExtension* graphDdiTableExt,
ze_graph_handle_t graphHandle,
uint32_t index,
15 changes: 9 additions & 6 deletions src/plugins/intel_npu/src/plugin/src/plugin.cpp
Original file line number Diff line number Diff line change
@@ -46,8 +46,10 @@ std::shared_ptr<ov::Model> create_dummy_model(const std::vector<IODescriptor>& i
continue;
}

std::shared_ptr<ov::op::v0::Parameter> parameter =
std::make_shared<ov::op::v0::Parameter>(inputDescriptor.precision, inputDescriptor.shapeFromIRModel);
std::shared_ptr<ov::op::v0::Parameter> parameter = std::make_shared<ov::op::v0::Parameter>(
inputDescriptor.precision,
inputDescriptor.shapeFromIRModel.has_value() ? *inputDescriptor.shapeFromIRModel
: inputDescriptor.shapeFromCompiler);

parameter->set_friendly_name(inputDescriptor.nodeFriendlyName);
parameter->output(0).get_tensor().set_names(inputDescriptor.outputTensorNames);
@@ -67,10 +69,11 @@ std::shared_ptr<ov::Model> create_dummy_model(const std::vector<IODescriptor>& i
std::shared_ptr<ov::Node> constantDummy =
std::make_shared<ov::op::v0::Constant>(outputDescriptor.precision, CONSTANT_NODE_DUMMY_SHAPE);

const std::shared_ptr<ov::descriptor::Tensor>& tensorDummy =
std::make_shared<ov::descriptor::Tensor>(outputDescriptor.precision,
outputDescriptor.shapeFromIRModel,
outputDescriptor.outputTensorNames);
const std::shared_ptr<ov::descriptor::Tensor>& tensorDummy = std::make_shared<ov::descriptor::Tensor>(
outputDescriptor.precision,
outputDescriptor.shapeFromIRModel.has_value() ? *outputDescriptor.shapeFromIRModel
: outputDescriptor.shapeFromCompiler,
outputDescriptor.outputTensorNames);

std::shared_ptr<ov::Node> result = std::make_shared<ov::op::v0::Result>(constantDummy);
result->output(0).set_tensor_ptr(tensorDummy);