Restoring the optional "shapeFromIRModel" due to potential driver bug

DanLiu2Intel · DanLiu2Intel · Apr 25, 2024 · May 1, 2024 · May 1, 2024 · May 2, 2024
commit 7b0a66af3d3e047d339e55ca3cf45f6467b9d9a0
diff --git a/src/plugins/intel_npu/src/al/include/intel_npu/al/icompiler.hpp b/src/plugins/intel_npu/src/al/include/intel_npu/al/icompiler.hpp
@@ -100,7 +100,7 @@ struct IODescriptor {
      * This field may be empty if the I/O entry is not found in the original IR model (i.e. the entry was added
      * by the compiler).
      */
-    ov::PartialShape shapeFromIRModel;
+    std::optional<ov::PartialShape> shapeFromIRModel;
 };
 
 struct NetworkMetadata final {

diff --git a/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp b/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp
@@ -65,7 +65,12 @@ void checkLevelZeroAttributesMatch(const IODescriptor& ioDescriptor,
 }  // namespace
 
 size_t ZeroInferRequest::getBatchSize(const NetworkMetadata& metadata) {
-    const ov::PartialShape& firstOutputShape = metadata.outputs.at(0).shapeFromIRModel;
+    if (!metadata.outputs.at(0).shapeFromIRModel.has_value()) {
+        _logger.info("Batching on the plugin is not used, batching is handled by the compiler");
+        return DEFAULT_BATCH_SIZE;
+    }
+
+    const ov::PartialShape& firstOutputShape = *metadata.outputs.at(0).shapeFromIRModel;
     if (firstOutputShape.is_dynamic()) {
         _logger.info("Networks using dynamic shapes are not supported when batching is handled by the plugin");
         return DEFAULT_BATCH_SIZE;
@@ -84,8 +89,11 @@ size_t ZeroInferRequest::getBatchSize(const NetworkMetadata& metadata) {
 
     auto checkDescriptorsUseCandidateBatchSize = [candidateBatchSize](const std::vector<IODescriptor>& descriptors) {
         for (const IODescriptor& descriptor : descriptors) {
+            OPENVINO_ASSERT(descriptor.shapeFromIRModel.has_value(),
+                            "Missing value for the \"shapeFromIRModel\" attribute, I/O descriptor");
+
             const ov::PartialShape& shapeFromCompiler = descriptor.shapeFromCompiler;
-            const ov::PartialShape& shapeFromIRModel = descriptor.shapeFromIRModel;
+            const ov::PartialShape& shapeFromIRModel = *descriptor.shapeFromIRModel;
 
             if (shapeFromCompiler.is_dynamic() || shapeFromCompiler.rank().get_length() == 0 ||
                 *shapeFromCompiler.begin() != DEFAULT_BATCH_SIZE) {

diff --git a/src/plugins/intel_npu/src/compiler/include/zero_compiler_in_driver.hpp b/src/plugins/intel_npu/src/compiler/include/zero_compiler_in_driver.hpp
@@ -32,7 +32,7 @@ namespace driverCompilerAdapter {
      std::is_same<T, ze_graph_dditable_ext_1_4_t>::value)
 
 // For ext version >= 1.6, originalShape is avaible
-#define NotSupportOriginalShape(T)                                                                                 \
+#define NotSupportArgumentMetadata(T)                                                                              \
     (std::is_same<T, ze_graph_dditable_ext_1_2_t>::value || std::is_same<T, ze_graph_dditable_ext_1_3_t>::value || \
      std::is_same<T, ze_graph_dditable_ext_1_4_t>::value || std::is_same<T, ze_graph_dditable_ext_1_5_t>::value)
 
@@ -86,6 +86,14 @@ class LevelZeroCompilerInDriver final : public IExternalCompiler {
     std::vector<uint8_t> serializeIR(IR& irModel, ze_graph_compiler_version_info_t compilerVersion) const;
     std::string serializeConfig(const Config& config, ze_graph_compiler_version_info_t& compilerVersion) const;
 
+    template <typename T = TableExtension, typename std::enable_if_t<NotSupportArgumentMetadata(T), bool> = true>
+    void getMetadata(TableExtension* graphDdiTableExt,
+                     ze_graph_handle_t graphHandle,
+                     uint32_t index,
+                     std::vector<IODescriptor>& inputs,
+                     std::vector<IODescriptor>& outputs) const;
+
+    template <typename T = TableExtension, typename std::enable_if_t<!NotSupportArgumentMetadata(T), bool> = true>
     void getMetadata(TableExtension* graphDdiTableExt,
                      ze_graph_handle_t graphHandle,
                      uint32_t index,

diff --git a/src/plugins/intel_npu/src/compiler/src/zero_compiler_in_driver.cpp b/src/plugins/intel_npu/src/compiler/src/zero_compiler_in_driver.cpp
@@ -881,10 +881,42 @@ static IODescriptor getIODescriptor(const ze_graph_argument_properties_3_t& arg,
             std::nullopt,
             arg.debug_friendly_name,
             std::move(outputTensorNames),
-            std::move(shapeFromIRModel)};
+            std::optional(shapeFromIRModel)};
 }
 
 template <typename TableExtension>
+template <typename T, std::enable_if_t<NotSupportArgumentMetadata(T), bool>>
+void LevelZeroCompilerInDriver<TableExtension>::getMetadata(TableExtension* graphDdiTableExt,
+                                                            ze_graph_handle_t graphHandle,
+                                                            uint32_t index,
+                                                            std::vector<IODescriptor>& inputs,
+                                                            std::vector<IODescriptor>& outputs) const {
+    ze_graph_argument_properties_3_t arg;
+    auto result = graphDdiTableExt->pfnGetArgumentProperties3(graphHandle, index, &arg);
+    if (ZE_RESULT_SUCCESS != result) {
+        OPENVINO_THROW("L0 pfnGetArgumentProperties3",
+                       " result: ",
+                       ze_result_to_string(result),
+                       ", code 0x",
+                       std::hex,
+                       uint64_t(result));
+    }
+
+    switch (arg.type) {
+    case ZE_GRAPH_ARGUMENT_TYPE_INPUT: {
+        inputs.push_back(getIODescriptor(arg, std::nullopt));
+    } break;
+    case ZE_GRAPH_ARGUMENT_TYPE_OUTPUT: {
+        outputs.push_back(getIODescriptor(arg, std::nullopt));
+    } break;
+    default: {
+        OPENVINO_THROW("Invalid ze_graph_argument_type_t found in ze_graph_argument_properties_3_t object: ", arg.type);
+    }
+    }
+}
+
+template <typename TableExtension>
+template <typename T, std::enable_if_t<!NotSupportArgumentMetadata(T), bool>>
 void LevelZeroCompilerInDriver<TableExtension>::getMetadata(TableExtension* graphDdiTableExt,
                                                             ze_graph_handle_t graphHandle,
                                                             uint32_t index,

diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp
@@ -46,8 +46,10 @@ std::shared_ptr<ov::Model> create_dummy_model(const std::vector<IODescriptor>& i
             continue;
         }
 
-        std::shared_ptr<ov::op::v0::Parameter> parameter =
-            std::make_shared<ov::op::v0::Parameter>(inputDescriptor.precision, inputDescriptor.shapeFromIRModel);
+        std::shared_ptr<ov::op::v0::Parameter> parameter = std::make_shared<ov::op::v0::Parameter>(
+            inputDescriptor.precision,
+            inputDescriptor.shapeFromIRModel.has_value() ? *inputDescriptor.shapeFromIRModel
+                                                         : inputDescriptor.shapeFromCompiler);
 
         parameter->set_friendly_name(inputDescriptor.nodeFriendlyName);
         parameter->output(0).get_tensor().set_names(inputDescriptor.outputTensorNames);
@@ -67,10 +69,11 @@ std::shared_ptr<ov::Model> create_dummy_model(const std::vector<IODescriptor>& i
         std::shared_ptr<ov::Node> constantDummy =
             std::make_shared<ov::op::v0::Constant>(outputDescriptor.precision, CONSTANT_NODE_DUMMY_SHAPE);
 
-        const std::shared_ptr<ov::descriptor::Tensor>& tensorDummy =
-            std::make_shared<ov::descriptor::Tensor>(outputDescriptor.precision,
-                                                     outputDescriptor.shapeFromIRModel,
-                                                     outputDescriptor.outputTensorNames);
+        const std::shared_ptr<ov::descriptor::Tensor>& tensorDummy = std::make_shared<ov::descriptor::Tensor>(
+            outputDescriptor.precision,
+            outputDescriptor.shapeFromIRModel.has_value() ? *outputDescriptor.shapeFromIRModel
+                                                          : outputDescriptor.shapeFromCompiler,
+            outputDescriptor.outputTensorNames);
 
         std::shared_ptr<ov::Node> result = std::make_shared<ov::op::v0::Result>(constantDummy);
         result->output(0).set_tensor_ptr(tensorDummy);