diff --git a/src/plugins/auto_batch/src/compiled_model.cpp b/src/plugins/auto_batch/src/compiled_model.cpp index a52ae30a726064..867cf09680a2fc 100644 --- a/src/plugins/auto_batch/src/compiled_model.cpp +++ b/src/plugins/auto_batch/src/compiled_model.cpp @@ -13,8 +13,8 @@ CompiledModel::CompiledModel(const std::shared_ptr& model, const std::shared_ptr& plugin, const ov::AnyMap& config, const DeviceInformation& device_info, - const std::set& batched_inputs, - const std::set& batched_outputs, + const std::set& batched_inputs, + const std::set& batched_outputs, const ov::SoPtr& compiled_model_with_batch, const ov::SoPtr& compiled_model_without_batch, const ov::SoPtr& context) diff --git a/src/plugins/auto_batch/src/compiled_model.hpp b/src/plugins/auto_batch/src/compiled_model.hpp index 245c0ea36ecc15..7934414ec809d0 100644 --- a/src/plugins/auto_batch/src/compiled_model.hpp +++ b/src/plugins/auto_batch/src/compiled_model.hpp @@ -36,8 +36,8 @@ class CompiledModel : public ov::ICompiledModel { const std::shared_ptr& plugin, const ov::AnyMap& config, const DeviceInformation& device_info, - const std::set& batched_inputs, - const std::set& batched_outputs, + const std::set& batched_inputs, + const std::set& batched_outputs, const ov::SoPtr& compiled_model_with_batch, const ov::SoPtr& compiled_model_without_batch, const ov::SoPtr& context); @@ -73,8 +73,8 @@ class CompiledModel : public ov::ICompiledModel { mutable std::atomic_size_t m_num_requests_created = {0}; std::atomic m_time_out = {0}; // in ms - const std::set m_batched_inputs; - const std::set m_batched_outputs; + const std::set m_batched_inputs; + const std::set m_batched_outputs; ov::SoPtr m_compiled_model_with_batch; ov::SoPtr m_compiled_model_without_batch; diff --git a/src/plugins/auto_batch/src/plugin.cpp b/src/plugins/auto_batch/src/plugin.cpp index 4024051f45c602..32bb6e7230de95 100644 --- a/src/plugins/auto_batch/src/plugin.cpp +++ b/src/plugins/auto_batch/src/plugin.cpp @@ -140,8 +140,8 @@ std::shared_ptr Plugin::compile_model(const std::shared_ptr< // avoid recursive auto-batching device_config_no_auto_batch[ov::hint::allow_auto_batching.name()] = false; - std::set batched_inputs; - std::set batched_outputs; + std::set batched_inputs; + std::set batched_outputs; // check that the auto-batching is applicable in general try { // if applicable, the Auto-Batching is implicitly enabled via the performance hints @@ -172,8 +172,7 @@ std::shared_ptr Plugin::compile_model(const std::shared_ptr< const auto& static_shape = input->get_shape(); if (static_shape[0] != 1) OPENVINO_THROW("Auto-batching does not reshape/re-batch originally batched networks!"); - batched_inputs.insert( - ov::op::util::get_ie_output_name(params[input_id]->output(0))); // batched dim for the input + batched_inputs.insert(input_id); // batched dim for the input } else { // if the 0-th dim is not for the batch, then we support only the case when NONE dimension is batch for (size_t s = 1; s < shape.size(); s++) @@ -182,7 +181,9 @@ std::shared_ptr Plugin::compile_model(const std::shared_ptr< "Auto-batching operates only networks with inputs/outputs batched by 0th dimension"); } } - for (const auto& output : cloned_model->get_results()) { + const auto& results = cloned_model->get_results(); + for (size_t output_id = 0; output_id < results.size(); output_id++) { + const auto& output = results[output_id]; const auto& shape = output->get_output_partial_shape(0); if (shape.is_dynamic()) OPENVINO_THROW("Auto-batching does not support dynamic networks!"); @@ -190,9 +191,7 @@ std::shared_ptr Plugin::compile_model(const std::shared_ptr< if (shape.size() && ov::DimensionTracker::get_label(shape[0])) { if (shape[0] != 1) OPENVINO_THROW("Auto-batching does not reshape/re-batch originally batched networks!"); - const auto& node = output->input_value(0); - batched_outputs.insert( - ov::op::util::get_ie_output_name(ov::Output(node.get_node(), node.get_index()))); + batched_outputs.insert(output_id); } else { // if the 0-th dim is not for the batch, then we support only the case when NONE dimension is batch for (size_t s = 1; s < shape.size(); s++) @@ -266,13 +265,13 @@ std::shared_ptr Plugin::compile_model(const std::shared_ptr< if (meta_device.device_batch_size > 1 && batched_inputs.size()) { try { auto inputs = reshaped->inputs(); - std::map, ov::PartialShape> partial_shapes; - for (auto& input : inputs) { - auto input_shape = input.get_shape(); - if (batched_inputs.find(ov::op::util::get_ie_output_name(input)) != batched_inputs.end()) { + std::map partial_shapes; + for (size_t input_id = 0; input_id < inputs.size(); input_id++) { + auto input_shape = inputs[input_id].get_shape(); + if (batched_inputs.find(input_id) != batched_inputs.end()) { input_shape[0] = meta_device.device_batch_size; } - partial_shapes.insert({input, ov::PartialShape(input_shape)}); + partial_shapes.insert({input_id, ov::PartialShape(input_shape)}); } reshaped->reshape(partial_shapes); diff --git a/src/plugins/auto_batch/src/sync_infer_request.cpp b/src/plugins/auto_batch/src/sync_infer_request.cpp index 707adedc3b9bad..361d1b74daec02 100644 --- a/src/plugins/auto_batch/src/sync_infer_request.cpp +++ b/src/plugins/auto_batch/src/sync_infer_request.cpp @@ -13,15 +13,15 @@ namespace ov { namespace autobatch_plugin { inline ov::SoPtr create_shared_tensor_on_batched_tensor(ov::SoPtr batched_tensor, - std::string name, - const std::set& batched_names, + std::size_t port, + const std::set& batched_ports, size_t batch_id, size_t batch_num) { auto ptr = static_cast(batched_tensor->data()); auto size_per_batch = batched_tensor->get_byte_size() / batch_num; auto batched_shape = batched_tensor->get_shape(); // for performance reason (copy avoidance) current impl of the auto-batching supports only batching by 0th dim - if (batched_names.count(name)) { + if (batched_ports.count(port)) { batched_shape[0] = 1; return {ov::make_tensor(batched_tensor->get_element_type(), batched_shape, ptr + size_per_batch * batch_id), batched_tensor._so}; @@ -35,8 +35,8 @@ SyncInferRequest::SyncInferRequest( const std::shared_ptr& worker_request, int batch_id, int num_batch, - const std::set& batched_inputs, - const std::set& batched_outputs) + const std::set& batched_inputs, + const std::set& batched_outputs) : ov::ISyncInferRequest(compiled_model), m_batched_request_wrapper(worker_request), m_batch_id(batch_id), @@ -49,34 +49,33 @@ size_t SyncInferRequest::get_batch_size() const { return m_batch_size; } -void SyncInferRequest::share_tensors_with_batched_req(const std::set& batched_inputs, - const std::set& batched_outputs) { - for (const auto& it : get_inputs()) { - auto name = ov::op::util::get_ie_output_name(it); +void SyncInferRequest::share_tensors_with_batched_req(const std::set& batched_inputs, + const std::set& batched_outputs) { + const auto inputs = get_inputs(); + for (size_t input_id = 0; input_id < inputs.size(); input_id++) { + const auto& input = inputs[input_id]; ov::SoPtr res; - auto batched_tensor = m_batched_request_wrapper->_infer_request_batched->get_tensor(it); + auto batched_tensor = m_batched_request_wrapper->_infer_request_batched->get_tensor(input); if (!batched_tensor._so) batched_tensor._so = m_batched_request_wrapper->_infer_request_batched._so; - res = create_shared_tensor_on_batched_tensor(batched_tensor, - std::move(name), - batched_inputs, - m_batch_id, - m_batch_size); - set_tensor(it, res); + res = + create_shared_tensor_on_batched_tensor(batched_tensor, input_id, batched_inputs, m_batch_id, m_batch_size); + set_tensor(input, res); } - for (const auto& it : get_outputs()) { - auto name = ov::op::util::get_ie_output_name(it.get_node_shared_ptr()->input_value(0)); + const auto& outputs = get_outputs(); + for (size_t output_id = 0; output_id < outputs.size(); output_id++) { + const auto& output = outputs[output_id]; ov::SoPtr res; - auto batched_tensor = m_batched_request_wrapper->_infer_request_batched->get_tensor(it); + auto batched_tensor = m_batched_request_wrapper->_infer_request_batched->get_tensor(output); if (!batched_tensor._so) batched_tensor._so = m_batched_request_wrapper->_infer_request_batched._so; res = create_shared_tensor_on_batched_tensor(batched_tensor, - std::move(name), + output_id, batched_outputs, m_batch_id, m_batch_size); - set_tensor(it, res); + set_tensor(output, res); } } diff --git a/src/plugins/auto_batch/src/sync_infer_request.hpp b/src/plugins/auto_batch/src/sync_infer_request.hpp index 01b61f538a0bd7..f13222e9141c78 100644 --- a/src/plugins/auto_batch/src/sync_infer_request.hpp +++ b/src/plugins/auto_batch/src/sync_infer_request.hpp @@ -17,8 +17,8 @@ class SyncInferRequest : public ov::ISyncInferRequest { const std::shared_ptr& worker_request, int batch_id, int num_batch, - const std::set& batched_inputs = {}, - const std::set& batched_outputs = {}); + const std::set& batched_inputs = {}, + const std::set& batched_outputs = {}); // Batch-Device impl specific: sets the data (blobs from the device request to the batched device request) void set_tensors_to_another_request(ov::SoPtr& req); @@ -48,8 +48,8 @@ class SyncInferRequest : public ov::ISyncInferRequest { protected: void copy_tensor_if_needed(const ov::SoPtr& src, ov::SoPtr& dst, const bool bInput); - void share_tensors_with_batched_req(const std::set& batched_inputs, - const std::set& batched_outputs); + void share_tensors_with_batched_req(const std::set& batched_inputs, + const std::set& batched_outputs); size_t m_batch_id; diff --git a/src/plugins/auto_batch/tests/unit/async_infer_request_test.cpp b/src/plugins/auto_batch/tests/unit/async_infer_request_test.cpp index a78d71f79b4c58..ef88ff5fc55eca 100644 --- a/src/plugins/auto_batch/tests/unit/async_infer_request_test.cpp +++ b/src/plugins/auto_batch/tests/unit/async_infer_request_test.cpp @@ -33,8 +33,8 @@ class AutoBatchAsyncInferRequestTest : public ::testing::TestWithParam m_batched_inputs; - std::set m_batched_outputs; + std::set m_batched_inputs; + std::set m_batched_outputs; ov::SoPtr m_remote_context; std::shared_ptr m_auto_batch_compile_model; @@ -121,13 +121,13 @@ class AutoBatchAsyncInferRequestTest : public ::testing::TestWithParamclone(); auto inputs = reshaped->inputs(); - std::map, ov::PartialShape> partial_shapes; - for (auto& input : inputs) { - auto input_shape = input.get_shape(); - if (m_batched_inputs.find(ov::op::util::get_ie_output_name(input)) != m_batched_inputs.end()) { + std::map partial_shapes; + for (size_t input_id = 0; input_id < inputs.size(); input_id++) { + auto input_shape = inputs[input_id].get_shape(); + if (m_batched_inputs.find(input_id) != m_batched_inputs.end()) { input_shape[0] = m_batch_size; } - partial_shapes.insert({input, ov::PartialShape(input_shape)}); + partial_shapes.insert({input_id, ov::PartialShape(input_shape)}); } reshaped->reshape(partial_shapes); @@ -228,15 +228,12 @@ class AutoBatchAsyncInferRequestTest : public ::testing::TestWithParam& model, int batch_size) { const auto& params = model->get_parameters(); - for (size_t i = 0; i < params.size(); i++) { - m_batched_inputs.insert(ov::op::util::get_ie_output_name(params[i]->output(0))); + for (size_t input_id = 0; input_id < params.size(); input_id++) { + m_batched_inputs.insert(input_id); } const auto& results = model->get_results(); - for (size_t i = 0; i < results.size(); i++) { - const auto& output = results[i]; - const auto& node = output->input_value(0); - m_batched_outputs.insert( - ov::op::util::get_ie_output_name(ov::Output(node.get_node(), node.get_index()))); + for (size_t output_id = 0; output_id < results.size(); output_id++) { + m_batched_outputs.insert(output_id); } } }; diff --git a/src/plugins/auto_batch/tests/unit/compile_model_create_infer_request_test.cpp b/src/plugins/auto_batch/tests/unit/compile_model_create_infer_request_test.cpp index 71db54be4fe8ed..8a30bb0e6379f5 100644 --- a/src/plugins/auto_batch/tests/unit/compile_model_create_infer_request_test.cpp +++ b/src/plugins/auto_batch/tests/unit/compile_model_create_infer_request_test.cpp @@ -2,10 +2,10 @@ // SPDX-License-Identifier: Apache-2.0 // +#include "common_test_utils/subgraph_builders/multi_single_conv.hpp" #include "mock_common.hpp" #include "openvino/runtime/threading/immediate_executor.hpp" #include "unit_test_utils/mocks/openvino/runtime/mock_icore.hpp" -#include "common_test_utils/subgraph_builders/multi_single_conv.hpp" using CreateInferRequestTestParams = std::tuple; // inferReq number @@ -24,8 +24,8 @@ class CompileModelCreateInferRequestTest : public ::testing::TestWithParam m_batched_inputs; - std::set m_batched_outputs; + std::set m_batched_inputs; + std::set m_batched_outputs; ov::SoPtr m_remote_context; std::shared_ptr m_auto_batch_compile_model; @@ -77,8 +77,8 @@ class CompileModelCreateInferRequestTest : public ::testing::TestWithParam(200))}}; m_device_info = {"CPU", {}, m_batch_size}; - m_batched_inputs = {"Parameter_0"}; - m_batched_outputs = {"Convolution_20"}; + m_batched_inputs = {}; + m_batched_outputs = {}; if (m_batch_size > 1) { m_i_compile_model_with_batch = std::make_shared>(m_model, m_auto_batch_plugin); diff --git a/src/plugins/auto_batch/tests/unit/mock_common.hpp b/src/plugins/auto_batch/tests/unit/mock_common.hpp index 2a9f0230fb43e7..fdb4bd71b0ac3e 100644 --- a/src/plugins/auto_batch/tests/unit/mock_common.hpp +++ b/src/plugins/auto_batch/tests/unit/mock_common.hpp @@ -73,8 +73,8 @@ class MockAutoBatchCompileModel : public CompiledModel { const std::shared_ptr& plugin, const ov::AnyMap& config, const DeviceInformation& device_info, - const std::set& batched_inputs, - const std::set& batched_outputs, + const std::set& batched_inputs, + const std::set& batched_outputs, const ov::SoPtr& compiled_model_with_batch, const ov::SoPtr& compiled_model_without_batch, const ov::SoPtr& context) diff --git a/src/plugins/auto_batch/tests/unit/sync_infer_request_test.cpp b/src/plugins/auto_batch/tests/unit/sync_infer_request_test.cpp index 5b836efd97b0a8..dd3e746778623b 100644 --- a/src/plugins/auto_batch/tests/unit/sync_infer_request_test.cpp +++ b/src/plugins/auto_batch/tests/unit/sync_infer_request_test.cpp @@ -29,8 +29,8 @@ class AutoBatchRequestTest : public ::testing::TestWithParam m_batched_inputs; - std::set m_batched_outputs; + std::set m_batched_inputs; + std::set m_batched_outputs; ov::SoPtr m_remote_context; std::shared_ptr m_auto_batch_compile_model; @@ -95,8 +95,8 @@ class AutoBatchRequestTest : public ::testing::TestWithParam(200))}}; m_device_info = {"CPU", {}, m_batch_size}; - m_batched_inputs = {"Parameter_0"}; - m_batched_outputs = {"Convolution_20"}; + m_batched_inputs = {}; + m_batched_outputs = {}; m_i_compile_model_with_batch = std::make_shared>(m_model, m_auto_batch_plugin); m_compile_model_with_batch = {m_i_compile_model_with_batch, {}}; @@ -147,15 +147,12 @@ class AutoBatchRequestTest : public ::testing::TestWithParam& model, int batch_size) { const auto& params = model->get_parameters(); - for (size_t i = 0; i < params.size(); i++) { - m_batched_inputs.insert(ov::op::util::get_ie_output_name(params[i]->output(0))); + for (size_t input_id = 0; input_id < params.size(); input_id++) { + m_batched_inputs.insert(input_id); } const auto& results = model->get_results(); - for (size_t i = 0; i < results.size(); i++) { - const auto& output = results[i]; - const auto& node = output->input_value(0); - m_batched_outputs.insert( - ov::op::util::get_ie_output_name(ov::Output(node.get_node(), node.get_index()))); + for (size_t output_id = 0; output_id < results.size(); output_id++) { + m_batched_outputs.insert(output_id); } } };