Merge branch 'master' into an/new_deconv_slt

allnes · Dec 20, 2023 · bc022e0 · bc022e0
2 parents 149e6ff + 15e43c6
commit bc022e0
Show file tree

Hide file tree

Showing 3 changed files with 35 additions and 5 deletions.
diff --git a/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp b/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp
@@ -160,7 +160,7 @@ void SyncInferRequest::set_tensor(const ov::Output<const ov::Node>& port, const
     OPENVINO_ASSERT(tensor != nullptr, "[GPU] Failed to set empty tensor to port: \'", name, "\'");
     OPENVINO_ASSERT(port.get_element_type() == tensor->get_element_type(),
                     "[GPU] Mismtach tensor and port type: ", port.get_element_type(), " vs ", tensor->get_element_type());
-    OPENVINO_ASSERT(shape.compatible(ov::PartialShape(tensor->get_shape())) || tensor->get_shape() == ov::Shape{0},
+    OPENVINO_ASSERT(shape.compatible(ov::PartialShape(tensor->get_shape())) || tensor->get_shape() == ov::Shape {0} || port.get_partial_shape().is_dynamic(),
                     "[GPU] The tensor size is not equal to model, can't set input tensor with name: ",
                     name,
                     ", because model input (shape=",
@@ -351,8 +351,8 @@ void SyncInferRequest::wait() {
             GPU_DEBUG_TRACE_DETAIL << name << " handle output tensor (host): " << output_tensor->data() << std::endl;
         }
 
-        OPENVINO_ASSERT(output_tensor_wrapper.owner == TensorOwner::PLUGIN || output_tensor_wrapper.actual_size >= output_memory->size(),
-                        "[GPU] Output tensor set by user has smaller size (", output_tensor->get_byte_size(), ") ",
+        OPENVINO_ASSERT(output_tensor_wrapper.owner == TensorOwner::PLUGIN || is_dynamic || output_tensor_wrapper.actual_size >= output_memory->size(),
+                        "[GPU] Output port is static and output tensor set by user has smaller size (", output_tensor->get_byte_size(), ") ",
                         "than required (", output_memory->size(), ")");
 
         bool need_output_update = output_layout.bytes_count() == 0 || (output_memory && output_tensor->get_byte_size() != output_memory->size());
@@ -371,6 +371,8 @@ void SyncInferRequest::wait() {
                 auto usm_host_tensor = std::dynamic_pointer_cast<USMHostTensor>(output_tensor);
                 if (usm_host_tensor && output_memory)
                     need_reallocate = usm_host_tensor->get_impl()->get_original_memory()->size() < output_memory->size();
+                else if (!is_remote && output_memory)
+                    need_reallocate = output_tensor_wrapper.actual_size < output_memory->size();
 
                 if (need_reallocate) {
                     auto actual_memory_shape = predict_shape(name, mem_shape, output_tensor->get_element_type(), *m_shape_predictor);

diff --git a/src/plugins/intel_gpu/tests/functional/behavior/infer_request.cpp b/src/plugins/intel_gpu/tests/functional/behavior/infer_request.cpp
@@ -171,6 +171,30 @@ TEST(TensorTest, smoke_canSetTensorForDynamicInput) {
     ASSERT_NO_THROW(inf_req.infer());
 }
 
+TEST(TensorTest, smoke_canSetTensorForDynamicOutput) {
+    auto core = ov::Core();
+    using namespace ov::preprocess;
+    auto p = PrePostProcessor(ov::test::utils::make_split_multi_conv_concat());
+    p.input().tensor().set_element_type(ov::element::i8);
+    p.input().preprocess().convert_element_type(ov::element::f32);
+
+    auto function = p.build();
+    std::map<size_t, ov::PartialShape> shapes = { {0, ov::PartialShape{-1, -1, -1, -1}} };
+    function->reshape(shapes);
+    auto exec_net = core.compile_model(function, ov::test::utils::DEVICE_GPU);
+    auto inf_req = exec_net.create_infer_request();
+
+    ov::Tensor t1(ov::element::i8, {1, 4, 20, 20});
+    auto out_tensor = inf_req.get_output_tensor();
+    ov::Tensor t2(out_tensor.get_element_type(), out_tensor.get_shape());
+    ASSERT_EQ(t2.get_byte_size(), 0);
+    // Check set_shape call for pre-allocated input/output tensors
+    ASSERT_NO_THROW(inf_req.set_input_tensor(t1));
+    ASSERT_NO_THROW(inf_req.set_output_tensor(t2));
+    ASSERT_NO_THROW(inf_req.infer());
+    ASSERT_NE(t2.get_byte_size(), 0);
+}
+
 TEST(TensorTest, smoke_canReallocateDeviceInputForHostTensor) {
     auto ov = ov::Core();
     using namespace ov::preprocess;

diff --git a/src/plugins/intel_gpu/tests/functional/remote_tensor_tests/gpu_remote_tensor_tests.cpp b/src/plugins/intel_gpu/tests/functional/remote_tensor_tests/gpu_remote_tensor_tests.cpp
@@ -667,12 +667,16 @@ TEST(OVRemoteTensorTests, smoke_MixedTensorTypes) {
 
         {
             // Keep same output, but use larger input
-            // In that case user tensor is not enough to store the result and the plugin throws exception
+            // In that case user tensor is not enough to store the result and set shape will be called on the user
+            // tensor
             ov::Shape input_shape{1, 4, 32, 32};
+            ov::Shape output_shape_actual{1, 4, 32, 32};
             auto input_tensor = gpu_context.create_tensor(input->get_element_type(), input_shape);
 
             infer_request.set_tensor(input, input_tensor);
-            OV_EXPECT_THROW(infer_request.infer(), ov::Exception, HasSubstr("Output tensor set by user has smaller size"));
+            ASSERT_NO_THROW(infer_request.infer());
+
+            ASSERT_EQ(infer_request.get_output_tensor().get_shape(), output_shape_actual);
         }
 
         {