From 3ffa35c03e31eece531dee913d7ac8dd92dc2cf2 Mon Sep 17 00:00:00 2001 From: myshevts Date: Mon, 23 Nov 2020 15:05:54 +0300 Subject: [PATCH] zero-copy (assuming determenistic app-level scheduling) for the multi-device, via "borrowing" the corresponding device-specific blobs and letting the app to implicitly use these --- .../multi_device_exec_network.cpp | 19 +++++++++++++++++-- .../multi_device_exec_network.hpp | 2 ++ .../multi_device_infer_request.cpp | 19 +++++++++++++++---- .../multi_device_infer_request.hpp | 5 +++-- 4 files changed, 37 insertions(+), 8 deletions(-) diff --git a/inference-engine/src/multi_device/multi_device_exec_network.cpp b/inference-engine/src/multi_device/multi_device_exec_network.cpp index 1f225232fb0228..271b7ebec78cb2 100644 --- a/inference-engine/src/multi_device/multi_device_exec_network.cpp +++ b/inference-engine/src/multi_device/multi_device_exec_network.cpp @@ -55,6 +55,7 @@ MultiDeviceExecutableNetwork::MultiDeviceExecutableNetwork(const DeviceMap()), _devicePriorities{networkDevices}, + _devicePrioritiesInitial{networkDevices}, _networksPerDevice{networksPerDevice}, _config{config}, _needPerfCounters{needPerfCounters} { @@ -92,7 +93,8 @@ MultiDeviceExecutableNetwork::MultiDeviceExecutableNetwork(const DeviceMap_task); capturedTask(); } - if (!_terminate) { + // check for termination status and there work availability before triggering the scheduling logic + if (!_terminate && !_inferPipelineTasks.empty()) { idleGuard.Release()->push(workerRequestPtr); ScheduleToWorkerInferRequest(); } @@ -143,7 +145,20 @@ MultiDeviceExecutableNetwork::~MultiDeviceExecutableNetwork() { InferenceEngine::InferRequestInternal::Ptr MultiDeviceExecutableNetwork::CreateInferRequestImpl(InferenceEngine::InputsDataMap networkInputs, InferenceEngine::OutputsDataMap networkOutputs) { - return std::make_shared(networkInputs, networkOutputs); + auto num = _numRequestsCreated++; + size_t sum = 0; + InferenceEngine::InferRequest request_to_share_blobs_with; + // borrowing device-specific blobs from the underlying requests for the device-agnostic, user-facing requests + // this allows to potentially save on the data-copy later (if the requests are scheduled in the same order) + for (const auto& device : _devicePrioritiesInitial) { + auto& dev_requests = _workerRequests[device.deviceName]; + if ((num - sum) < dev_requests.size()) { + request_to_share_blobs_with = dev_requests.at(num - sum)._inferRequest; + break; + } + sum += dev_requests.size(); + } + return std::make_shared(networkInputs, networkOutputs, request_to_share_blobs_with); } IInferRequest::Ptr MultiDeviceExecutableNetwork::CreateInferRequest() { diff --git a/inference-engine/src/multi_device/multi_device_exec_network.hpp b/inference-engine/src/multi_device/multi_device_exec_network.hpp index 6422ba2811fe2c..37e4f342cc9219 100644 --- a/inference-engine/src/multi_device/multi_device_exec_network.hpp +++ b/inference-engine/src/multi_device/multi_device_exec_network.hpp @@ -99,12 +99,14 @@ class MultiDeviceExecutableNetwork : public InferenceEngine::ExecutableNetworkTh std::atomic_bool _terminate = {false}; std::mutex _mutex; std::vector _devicePriorities; + const std::vector _devicePrioritiesInitial; DeviceMap _networksPerDevice; ThreadSafeQueue _inferPipelineTasks; DeviceMap _idleWorkerRequests; DeviceMap> _workerRequests; std::unordered_map _config; bool _needPerfCounters = false; + std::atomic_size_t _numRequestsCreated = {0}; }; } // namespace MultiDevicePlugin diff --git a/inference-engine/src/multi_device/multi_device_infer_request.cpp b/inference-engine/src/multi_device/multi_device_infer_request.cpp index d021e0a30624f0..0a363731e95381 100644 --- a/inference-engine/src/multi_device/multi_device_infer_request.cpp +++ b/inference-engine/src/multi_device/multi_device_infer_request.cpp @@ -10,8 +10,17 @@ namespace MultiDevicePlugin { using namespace InferenceEngine; // ------------------------------MultiDeviceInferRequest---------------------------- MultiDeviceInferRequest::MultiDeviceInferRequest(const InputsDataMap& networkInputs, - const OutputsDataMap& networkOutputs) + const OutputsDataMap& networkOutputs, + InferRequest request_to_share_blobs_with) : InferRequestInternal(networkInputs, networkOutputs) { + if (request_to_share_blobs_with) { + // borrow device-friendly blobs from the request + for (const auto &it : _networkInputs) + _inputs[it.first] = request_to_share_blobs_with.GetBlob(it.first); + for (const auto &it : _networkOutputs) + _outputs[it.first] = request_to_share_blobs_with.GetBlob(it.first); + return; + } // Allocate all input blobs for (const auto &it : networkInputs) { Layout l = it.second->getLayout(); @@ -40,14 +49,16 @@ void MultiDeviceInferRequest::SetBlobsToAnotherRequest(InferRequest& req) { auto &name = it.first; // this request is already in BUSY state, so using the internal functions safely GetBlob(name.c_str(), blob); - req.SetBlob(name.c_str(), blob); + if (req.GetBlob(name) != blob) + req.SetBlob(name, blob); } for (const auto &it : _networkOutputs) { Blob::Ptr blob; - auto &name = it.first; + auto& name = it.first; // this request is already in BUSY state, so using the internal functions safely GetBlob(name.c_str(), blob); - req.SetBlob(name.c_str(), blob); + if (req.GetBlob(name) != blob) + req.SetBlob(name, blob); } } diff --git a/inference-engine/src/multi_device/multi_device_infer_request.hpp b/inference-engine/src/multi_device/multi_device_infer_request.hpp index aebeb6784f6106..80270cd117c294 100644 --- a/inference-engine/src/multi_device/multi_device_infer_request.hpp +++ b/inference-engine/src/multi_device/multi_device_infer_request.hpp @@ -23,14 +23,15 @@ class MultiDeviceInferRequest : public InferenceEngine::InferRequestInternal { public: using Ptr = std::shared_ptr; explicit MultiDeviceInferRequest(const InferenceEngine::InputsDataMap& networkInputs, - const InferenceEngine::OutputsDataMap& networkOutputs); + const InferenceEngine::OutputsDataMap& networkOutputs, + InferenceEngine::InferRequest request_to_share_blobs_with); void GetPerformanceCounts(std::map&) const override { THROW_IE_EXCEPTION << NOT_IMPLEMENTED_str; } void InferImpl() override { THROW_IE_EXCEPTION << NOT_IMPLEMENTED_str; } - // Multi-Device impl specific: sets the data (blobs from the device-less requets to the specific device request) + // Multi-Device impl specific: sets the data (blobs from the device-less requests to the specific device request) void SetBlobsToAnotherRequest(InferenceEngine::InferRequest& req); };