diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/async_infer_request.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/async_infer_request.hpp index 399e20cc42df59..e24bf37cb3e02c 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/async_infer_request.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/async_infer_request.hpp @@ -7,6 +7,7 @@ #include #include #include +#include "intel_gpu/plugin/infer_request_legacy.hpp" #include "intel_gpu/plugin/infer_request.hpp" namespace ov { @@ -16,10 +17,11 @@ namespace intel_gpu { class AsyncInferRequest : public InferenceEngine::AsyncInferRequestThreadSafeDefault { public: using Parent = InferenceEngine::AsyncInferRequestThreadSafeDefault; - AsyncInferRequest(const InferRequest::Ptr &inferRequest, + AsyncInferRequest(const IInferRequestInternal::Ptr &inferRequest, const InferenceEngine::ITaskExecutor::Ptr& taskExecutor, const InferenceEngine::ITaskExecutor::Ptr& waitExecutor, - const InferenceEngine::ITaskExecutor::Ptr& callbackExecutor); + const InferenceEngine::ITaskExecutor::Ptr& callbackExecutor, + const bool isLegacy); ~AsyncInferRequest(); @@ -27,8 +29,9 @@ class AsyncInferRequest : public InferenceEngine::AsyncInferRequestThreadSafeDef void StartAsync_ThreadUnsafe() override; private: - InferRequest::Ptr _inferRequest; + IInferRequestInternal::Ptr _inferRequest; InferenceEngine::ITaskExecutor::Ptr _waitExecutor; + bool _isLegacy; }; } // namespace intel_gpu diff --git a/src/plugins/intel_gpu/src/plugin/async_infer_request.cpp b/src/plugins/intel_gpu/src/plugin/async_infer_request.cpp index e275355ea5c6e0..ac8186e1e296c9 100644 --- a/src/plugins/intel_gpu/src/plugin/async_infer_request.cpp +++ b/src/plugins/intel_gpu/src/plugin/async_infer_request.cpp @@ -10,45 +10,84 @@ namespace ov { namespace runtime { namespace intel_gpu { -AsyncInferRequest::AsyncInferRequest(const InferRequest::Ptr &inferRequest, +AsyncInferRequest::AsyncInferRequest(const IInferRequestInternal::Ptr &inferRequest, const InferenceEngine::ITaskExecutor::Ptr& taskExecutor, const InferenceEngine::ITaskExecutor::Ptr& waitExecutor, - const InferenceEngine::ITaskExecutor::Ptr& callbackExecutor) - : AsyncInferRequestThreadSafeDefault(inferRequest, taskExecutor, callbackExecutor), _inferRequest(inferRequest), _waitExecutor(waitExecutor) { + const InferenceEngine::ITaskExecutor::Ptr& callbackExecutor, + const bool isLegacy) + : AsyncInferRequestThreadSafeDefault(inferRequest, taskExecutor, callbackExecutor), + _inferRequest(inferRequest), + _waitExecutor(waitExecutor), + _isLegacy(isLegacy) { _pipeline = {}; - if (!_inferRequest->use_external_queue()) { - _pipeline.push_back({taskExecutor, - [this] { - OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "AsyncInferRequest::PreprocessingAndStartPipeline"); - _inferRequest->setup_stream_graph(); - _inferRequest->preprocess(); - _inferRequest->enqueue(); - _inferRequest->wait(); - } }); + if (_isLegacy) { + if (!std::static_pointer_cast(_inferRequest)->use_external_queue()) { + _pipeline.push_back({taskExecutor, + [this] { + OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "AsyncInferRequest::PreprocessingAndStartPipeline"); + std::static_pointer_cast(_inferRequest)->setup_stream_graph(); + std::static_pointer_cast(_inferRequest)->preprocess(); + std::static_pointer_cast(_inferRequest)->enqueue(); + std::static_pointer_cast(_inferRequest)->wait(); + } }); + } else { + _pipeline.push_back({ _waitExecutor, + [this] { + OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "AsyncInferRequest::WaitPipeline"); + std::static_pointer_cast(_inferRequest)->wait_notify(); + } }); + } } else { - _pipeline.push_back({ _waitExecutor, + if (!std::static_pointer_cast(_inferRequest)->use_external_queue()) { + _pipeline.push_back({taskExecutor, [this] { - OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "AsyncInferRequest::WaitPipeline"); - _inferRequest->wait_notify(); - } }); + OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "AsyncInferRequest::PreprocessingAndStartPipeline"); + std::static_pointer_cast(_inferRequest)->setup_stream_graph(); + std::static_pointer_cast(_inferRequest)->preprocess(); + std::static_pointer_cast(_inferRequest)->enqueue(); + std::static_pointer_cast(_inferRequest)->wait(); + } }); + } else { + _pipeline.push_back({ _waitExecutor, + [this] { + OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "AsyncInferRequest::WaitPipeline"); + std::static_pointer_cast(_inferRequest)->wait_notify(); + } }); + } } } void AsyncInferRequest::Infer_ThreadUnsafe() { - if (_inferRequest->use_external_queue()) { - _inferRequest->setup_stream_graph(); - _inferRequest->preprocess_notify(); - _inferRequest->enqueue_notify(); + if (_isLegacy) { + if (std::static_pointer_cast(_inferRequest)->use_external_queue()) { + std::static_pointer_cast(_inferRequest)->setup_stream_graph(); + std::static_pointer_cast(_inferRequest)->preprocess_notify(); + std::static_pointer_cast(_inferRequest)->enqueue_notify(); + } + } else { + if (std::static_pointer_cast(_inferRequest)->use_external_queue()) { + std::static_pointer_cast(_inferRequest)->setup_stream_graph(); + std::static_pointer_cast(_inferRequest)->preprocess_notify(); + std::static_pointer_cast(_inferRequest)->enqueue_notify(); + } } Parent::Infer_ThreadUnsafe(); } void AsyncInferRequest::StartAsync_ThreadUnsafe() { - if (_inferRequest->use_external_queue()) { - _inferRequest->setup_stream_graph(); - _inferRequest->preprocess_notify(); - _inferRequest->enqueue_notify(); + if (_isLegacy) { + if (std::static_pointer_cast(_inferRequest)->use_external_queue()) { + std::static_pointer_cast(_inferRequest)->setup_stream_graph(); + std::static_pointer_cast(_inferRequest)->preprocess_notify(); + std::static_pointer_cast(_inferRequest)->enqueue_notify(); + } + } else { + if (std::static_pointer_cast(_inferRequest)->use_external_queue()) { + std::static_pointer_cast(_inferRequest)->setup_stream_graph(); + std::static_pointer_cast(_inferRequest)->preprocess_notify(); + std::static_pointer_cast(_inferRequest)->enqueue_notify(); + } } Parent::StartAsync_ThreadUnsafe(); } diff --git a/src/plugins/intel_gpu/src/plugin/compiled_model.cpp b/src/plugins/intel_gpu/src/plugin/compiled_model.cpp index 75daabd1de1944..cc1a0147b58121 100644 --- a/src/plugins/intel_gpu/src/plugin/compiled_model.cpp +++ b/src/plugins/intel_gpu/src/plugin/compiled_model.cpp @@ -119,18 +119,22 @@ IInferRequestInternal::Ptr CompiledModel::CreateInferRequest() { } } + bool is_legacy = false; if (this->_plugin) { const auto& core = _plugin->GetCore(); if (core && core->isNewAPI()) internalRequest = CreateInferRequestImpl(_parameters, _results); } - if (!internalRequest) + if (!internalRequest) { internalRequest = CreateInferRequestImpl(_networkInputs, _networkOutputs); + is_legacy = true; + } internalRequest->setPointerToExecutableNetworkInternal(shared_from_this()); - return std::make_shared(std::static_pointer_cast(internalRequest), + return std::make_shared(internalRequest, m_taskExecutor, m_waitExecutor, - _callbackExecutor); + _callbackExecutor, + is_legacy); } std::shared_ptr CompiledModel::GetExecGraphInfo() {