Skip to content

Commit

Permalink
zero-copy (assuming determenistic app-level scheduling) for the multi…
Browse files Browse the repository at this point in the history
…-device, via "borrowing" the corresponding device-specific blobs and letting the app to implicitly use these
  • Loading branch information
myshevts committed Nov 23, 2020
1 parent 7e23055 commit 3ffa35c
Show file tree
Hide file tree
Showing 4 changed files with 37 additions and 8 deletions.
19 changes: 17 additions & 2 deletions inference-engine/src/multi_device/multi_device_exec_network.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ MultiDeviceExecutableNetwork::MultiDeviceExecutableNetwork(const DeviceMap<Infer
const bool needPerfCounters) :
InferenceEngine::ExecutableNetworkThreadSafeDefault(nullptr, std::make_shared<InferenceEngine::ImmediateExecutor>()),
_devicePriorities{networkDevices},
_devicePrioritiesInitial{networkDevices},
_networksPerDevice{networksPerDevice},
_config{config},
_needPerfCounters{needPerfCounters} {
Expand Down Expand Up @@ -92,7 +93,8 @@ MultiDeviceExecutableNetwork::MultiDeviceExecutableNetwork(const DeviceMap<Infer
auto capturedTask = std::move(workerRequestPtr->_task);
capturedTask();
}
if (!_terminate) {
// check for termination status and there work availability before triggering the scheduling logic
if (!_terminate && !_inferPipelineTasks.empty()) {
idleGuard.Release()->push(workerRequestPtr);
ScheduleToWorkerInferRequest();
}
Expand Down Expand Up @@ -143,7 +145,20 @@ MultiDeviceExecutableNetwork::~MultiDeviceExecutableNetwork() {

InferenceEngine::InferRequestInternal::Ptr MultiDeviceExecutableNetwork::CreateInferRequestImpl(InferenceEngine::InputsDataMap networkInputs,
InferenceEngine::OutputsDataMap networkOutputs) {
return std::make_shared<MultiDeviceInferRequest>(networkInputs, networkOutputs);
auto num = _numRequestsCreated++;
size_t sum = 0;
InferenceEngine::InferRequest request_to_share_blobs_with;
// borrowing device-specific blobs from the underlying requests for the device-agnostic, user-facing requests
// this allows to potentially save on the data-copy later (if the requests are scheduled in the same order)
for (const auto& device : _devicePrioritiesInitial) {
auto& dev_requests = _workerRequests[device.deviceName];
if ((num - sum) < dev_requests.size()) {
request_to_share_blobs_with = dev_requests.at(num - sum)._inferRequest;
break;
}
sum += dev_requests.size();
}
return std::make_shared<MultiDeviceInferRequest>(networkInputs, networkOutputs, request_to_share_blobs_with);
}

IInferRequest::Ptr MultiDeviceExecutableNetwork::CreateInferRequest() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -99,12 +99,14 @@ class MultiDeviceExecutableNetwork : public InferenceEngine::ExecutableNetworkTh
std::atomic_bool _terminate = {false};
std::mutex _mutex;
std::vector<DeviceInformation> _devicePriorities;
const std::vector<DeviceInformation> _devicePrioritiesInitial;
DeviceMap<InferenceEngine::ExecutableNetwork> _networksPerDevice;
ThreadSafeQueue<InferenceEngine::Task> _inferPipelineTasks;
DeviceMap<NotBusyWorkerRequests> _idleWorkerRequests;
DeviceMap<std::vector<WorkerInferRequest>> _workerRequests;
std::unordered_map<std::string, InferenceEngine::Parameter> _config;
bool _needPerfCounters = false;
std::atomic_size_t _numRequestsCreated = {0};
};

} // namespace MultiDevicePlugin
19 changes: 15 additions & 4 deletions inference-engine/src/multi_device/multi_device_infer_request.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,17 @@ namespace MultiDevicePlugin {
using namespace InferenceEngine;
// ------------------------------MultiDeviceInferRequest----------------------------
MultiDeviceInferRequest::MultiDeviceInferRequest(const InputsDataMap& networkInputs,
const OutputsDataMap& networkOutputs)
const OutputsDataMap& networkOutputs,
InferRequest request_to_share_blobs_with)
: InferRequestInternal(networkInputs, networkOutputs) {
if (request_to_share_blobs_with) {
// borrow device-friendly blobs from the request
for (const auto &it : _networkInputs)
_inputs[it.first] = request_to_share_blobs_with.GetBlob(it.first);
for (const auto &it : _networkOutputs)
_outputs[it.first] = request_to_share_blobs_with.GetBlob(it.first);
return;
}
// Allocate all input blobs
for (const auto &it : networkInputs) {
Layout l = it.second->getLayout();
Expand Down Expand Up @@ -40,14 +49,16 @@ void MultiDeviceInferRequest::SetBlobsToAnotherRequest(InferRequest& req) {
auto &name = it.first;
// this request is already in BUSY state, so using the internal functions safely
GetBlob(name.c_str(), blob);
req.SetBlob(name.c_str(), blob);
if (req.GetBlob(name) != blob)
req.SetBlob(name, blob);
}
for (const auto &it : _networkOutputs) {
Blob::Ptr blob;
auto &name = it.first;
auto& name = it.first;
// this request is already in BUSY state, so using the internal functions safely
GetBlob(name.c_str(), blob);
req.SetBlob(name.c_str(), blob);
if (req.GetBlob(name) != blob)
req.SetBlob(name, blob);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,15 @@ class MultiDeviceInferRequest : public InferenceEngine::InferRequestInternal {
public:
using Ptr = std::shared_ptr<MultiDeviceInferRequest>;
explicit MultiDeviceInferRequest(const InferenceEngine::InputsDataMap& networkInputs,
const InferenceEngine::OutputsDataMap& networkOutputs);
const InferenceEngine::OutputsDataMap& networkOutputs,
InferenceEngine::InferRequest request_to_share_blobs_with);
void GetPerformanceCounts(std::map<std::string, InferenceEngine::InferenceEngineProfileInfo>&) const override {
THROW_IE_EXCEPTION << NOT_IMPLEMENTED_str;
}
void InferImpl() override {
THROW_IE_EXCEPTION << NOT_IMPLEMENTED_str;
}
// Multi-Device impl specific: sets the data (blobs from the device-less requets to the specific device request)
// Multi-Device impl specific: sets the data (blobs from the device-less requests to the specific device request)
void SetBlobsToAnotherRequest(InferenceEngine::InferRequest& req);
};

Expand Down

0 comments on commit 3ffa35c

Please sign in to comment.