From a83fcdbf97f96171c21adf31e9fd2d3c1d50a38a Mon Sep 17 00:00:00 2001 From: "Hu, Yuan2" Date: Mon, 6 Sep 2021 14:31:04 +0800 Subject: [PATCH] Enable CPU accelerate FIL in MULTI Signed-off-by: Hu, Yuan2 --- .../src/inference_engine/src/ie_core.cpp | 2 + .../multi_device_exec_network.cpp | 357 +++++++++++++++--- .../multi_device_exec_network.hpp | 43 ++- .../src/multi_device/multi_device_plugin.cpp | 67 ++-- .../src/multi_device/multi_device_plugin.hpp | 5 +- .../behavior/config.cpp | 7 - .../behavior/infer_request/config.cpp | 13 - .../behavior/test_plugin.cpp | 8 +- .../behavior/config.cpp | 9 +- .../behavior/infer_request/callback.cpp | 13 +- .../behavior/infer_request/wait.cpp | 10 +- .../behavior/preprocessing/set_preprocess.cpp | 10 +- .../behavior/test_plugin.cpp | 12 +- .../behavior/version.cpp | 8 +- 14 files changed, 448 insertions(+), 116 deletions(-) diff --git a/inference-engine/src/inference_engine/src/ie_core.cpp b/inference-engine/src/inference_engine/src/ie_core.cpp index a568d2f62175f9..e36ad9ff8ba639 100644 --- a/inference-engine/src/inference_engine/src/ie_core.cpp +++ b/inference-engine/src/inference_engine/src/ie_core.cpp @@ -68,10 +68,12 @@ Parsed parseDeviceNameIntoConfig(const std::string& deviceName, const std::ma if (deviceName.find("AUTO:") == 0) { config_[InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES] = deviceName.substr(std::string("AUTO:").size()); + config_.insert({CONFIG_KEY_INTERNAL(MULTI_WORK_MODE_AS_AUTO), InferenceEngine::PluginConfigParams::YES}); } } else { if (deviceName_.empty()) { deviceName_ = "AUTO"; + config_.insert({CONFIG_KEY_INTERNAL(MULTI_WORK_MODE_AS_AUTO), InferenceEngine::PluginConfigParams::YES}); } InferenceEngine::DeviceIDParser parser(deviceName_); deviceName_ = parser.getDeviceName(); diff --git a/inference-engine/src/multi_device/multi_device_exec_network.cpp b/inference-engine/src/multi_device/multi_device_exec_network.cpp index 63d88bde9675ad..034dfac534faa8 100644 --- a/inference-engine/src/multi_device/multi_device_exec_network.cpp +++ b/inference-engine/src/multi_device/multi_device_exec_network.cpp @@ -11,16 +11,46 @@ #include #include - +#include "ie_icore.hpp" #include "ie_metric_helpers.hpp" #include #include "multi_device_exec_network.hpp" #include "multi_device_async_infer_request.hpp" #include "multi_device_plugin.hpp" +#include "ngraph/opsets/opset1.hpp" +#include "ngraph_ops/convolution_ie.hpp" +#include "ngraph_ops/deconvolution_ie.hpp" +#include "transformations/utils/utils.hpp" + // ------------------------------MultiDeviceExecutableNetwork---------------------------- namespace MultiDevicePlugin { - using namespace InferenceEngine; +using namespace InferenceEngine; + +namespace { +std::string GetNetworkPrecision(const InferenceEngine::CNNNetwork &network) { + auto nGraphFunc = network.getFunction(); + bool isINTModel = ngraph::op::util::has_op_with_type(nGraphFunc); + if (isINTModel) { + return METRIC_VALUE(INT8); + } + for (auto & node : nGraphFunc->get_ordered_ops()) { + if (std::dynamic_pointer_cast(node) || + std::dynamic_pointer_cast(node) || + std::dynamic_pointer_cast(node) || + std::dynamic_pointer_cast(node) || + std::dynamic_pointer_cast(node) || + std::dynamic_pointer_cast(node)) { + auto layerType = node->input(1).get_element_type().get_type_name(); + if (layerType == "f32") + return METRIC_VALUE(FP32); + if (layerType == "f16") + return METRIC_VALUE(FP16); + } + } + return METRIC_VALUE(FP32); +} +} // namespace thread_local MultiDeviceExecutableNetwork::WorkerInferRequest* MultiDeviceExecutableNetwork::_thisWorkerInferRequest = nullptr; // TODO: revert to the plain variable (see header file), when we moved to the next CentOS 8.x in our support matrix @@ -60,54 +90,235 @@ MultiDeviceExecutableNetwork::MultiDeviceExecutableNetwork(const DeviceMapGetMetric(METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS)).as(); + } catch (const InferenceEngine::Exception &iie) { + IE_THROW() + << "Every device used with the Multi-Device should " + << "support OPTIMAL_NUMBER_OF_INFER_REQUESTS ExecutableNetwork metric. " + << "Failed to query the metric for the " << device << " with error:" << iie.what(); + } + const auto numRequests = (_devicePriorities.end() == itNumRequests || + itNumRequests->numRequestsPerDevices == -1) ? optimalNum : itNumRequests->numRequestsPerDevices; + auto& workerRequests = _workerRequests[device]; + auto& idleWorkerRequests = _idleWorkerRequests[device]; + workerRequests.resize(numRequests); + _inferPipelineTasksDeviceSpecific[device] = std::unique_ptr>(new ThreadSafeQueue); + auto* idleWorkerRequestsPtr = &(idleWorkerRequests); + idleWorkerRequests.set_capacity(numRequests); + for (auto&& workerRequest : workerRequests) { + workerRequest._inferRequest = { executableNetwork, executableNetwork->CreateInferRequest() }; + auto* workerRequestPtr = &workerRequest; + IE_ASSERT(idleWorkerRequests.try_push(workerRequestPtr) == true); + workerRequest._inferRequest->SetCallback( + [workerRequestPtr, this, device, idleWorkerRequestsPtr] (std::exception_ptr exceptionPtr) mutable { + IdleGuard idleGuard{workerRequestPtr, *idleWorkerRequestsPtr}; + workerRequestPtr->_exceptionPtr = exceptionPtr; + { + auto capturedTask = std::move(workerRequestPtr->_task); + capturedTask(); + } + // try to return the request to the idle list (fails if the overall object destruction has began) + if (idleGuard.Release()->try_push(workerRequestPtr)) { + // let's try to pop a task, as we know there is at least one idle request, schedule if succeeded + // if no device-agnostic tasks, let's try pop the device specific task, schedule if succeeded + Task t; + if (_inferPipelineTasks.try_pop(t)) + ScheduleToWorkerInferRequest(std::move(t)); + else if (_inferPipelineTasksDeviceSpecific[device]->try_pop(t)) + ScheduleToWorkerInferRequest(std::move(t), device); + } + }); + } +} + +MultiDeviceExecutableNetwork::MultiDeviceExecutableNetwork(const std::string& modelPath, + const InferenceEngine::CNNNetwork& network, + const std::map& config, + MultiDeviceInferencePlugin* plugin) + : _multiPlugin(plugin) + , _workModeIsAUTO(true) { + if (_multiPlugin->GetCore() == nullptr) { + IE_THROW() << "Please, work with MULTI device via InferencEngine::Core object"; + } + + if (modelPath.empty() && network.getFunction() == nullptr) { + IE_THROW() << "MULTI device supports just ngraph network representation"; + } + + core = _multiPlugin->GetCore(); // shared_ptr that holds the Core + auto strDevices = _multiPlugin->GetDeviceList(config); + // collect the settings that are applicable to the devices we are loading the network to + _config[MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES] = strDevices; + + auto metaDevices = _multiPlugin->ParseMetaDevices(strDevices, config); + _devicePrioritiesInitial = metaDevices; + _devicePriorities = metaDevices; + + std::vector needLoadDevices; + + // check if have cpu device + const auto CPUIter = std::find_if(metaDevices.begin(), metaDevices.end(), + [=](const DeviceInformation& d)->bool{return d.deviceName.find("CPU") != std::string::npos;}); + if (CPUIter != metaDevices.end()) { + _cpuDevice = *CPUIter; + _config.insert(_cpuDevice.config.begin(), _cpuDevice.config.end()); + needLoadDevices.push_back(_cpuDevice); + _cpuFuture = _cpuPromise.get_future(); + } + + // get accelerator device, like GPU + auto networkPrecision = GetNetworkPrecision(network); + _acceleratorDevice = _multiPlugin->SelectDevice(metaDevices, networkPrecision); + bool isAccelerator = + _acceleratorDevice.deviceName.find("CPU") == std::string::npos; + if (isAccelerator) { + _config.insert(_acceleratorDevice.config.begin(), _acceleratorDevice.config.end()); + needLoadDevices.push_back(_acceleratorDevice); + _acceleratorFuture = _acceleratorPromise.get_future(); + } + + if (needLoadDevices.size() == 0) { + IE_THROW() << "No device set"; + } + + std::vector loads; + for (auto& p : needLoadDevices) { + // initialize these containers firstly to aovid insert operation in threads + _idleWorkerRequests[p.deviceName]; + _workerRequests[p.deviceName]; + _inferPipelineTasksDeviceSpecific[p.deviceName] = NULL; + const auto device = p.deviceName; + // will not wait for accelerator network load task, + // so some parameters need to be transferred by value + loads.push_back([&, modelPath, network, device]() { + SoExecutableNetworkInternal executableNetwork; + if (!modelPath.empty()) { + executableNetwork = core->LoadNetwork(modelPath, device, {}); + } else { + executableNetwork = core->LoadNetwork(network, device, {}); + } + + GenerateWorkers(device, executableNetwork); + + if (device.find("CPU") == std::string::npos) { + _alreadyActualNetwork = true; + _acceleratorPromise.set_value(executableNetwork); + } else { + _cpuPromise.set_value(executableNetwork); + } + }); + } + + // will not wait for accelerator network load task + // so the exector can't be destoried before the task finished + // so use executor as a member of MultiDeviceExecutableNetwork + _executor = InferenceEngine::ExecutorManager::getInstance()->getIdleCPUStreamsExecutor( + IStreamsExecutor::Config{"AutoDeviceAsyncLoad", + static_cast(std::thread::hardware_concurrency()) /* max possible #streams*/, + 1 /*single thread per stream*/, + IStreamsExecutor::ThreadBindingType::NONE}); + + for (auto& task : loads) { + _executor->run(task); + } + + WaitFirstNetworkReady(); +} - auto itNumRequests = std::find_if(_devicePriorities.cbegin(), _devicePriorities.cend(), - [&device](const DeviceInformation& d){ return d.deviceName == device;}); - unsigned int optimalNum = 0; +void MultiDeviceExecutableNetwork::WaitFirstNetworkReady() { + if (IsActualNetworkReady()) { + return; + } + if (_cpuFuture.valid() && _acceleratorFuture.valid()) { try { - optimalNum = network->GetMetric(METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS)).as(); - } catch (const InferenceEngine::Exception &iie) { - IE_THROW() - << "Every device used with the Multi-Device should " - << "support OPTIMAL_NUMBER_OF_INFER_REQUESTS ExecutableNetwork metric. " - << "Failed to query the metric for the " << device << " with error:" << iie.what(); - } - const auto numRequests = (_devicePriorities.end() == itNumRequests || - itNumRequests->numRequestsPerDevices == -1) ? optimalNum : itNumRequests->numRequestsPerDevices; - auto& workerRequests = _workerRequests[device]; - auto& idleWorkerRequests = _idleWorkerRequests[device]; - workerRequests.resize(numRequests); - _inferPipelineTasksDeviceSpecific[device] = std::unique_ptr>(new ThreadSafeQueue); - auto* idleWorkerRequestsPtr = &(idleWorkerRequests); - idleWorkerRequests.set_capacity(numRequests); - for (auto&& workerRequest : workerRequests) { - workerRequest._inferRequest = { network, network->CreateInferRequest() }; - auto* workerRequestPtr = &workerRequest; - IE_ASSERT(idleWorkerRequests.try_push(workerRequestPtr) == true); - workerRequest._inferRequest->SetCallback( - [workerRequestPtr, this, device, idleWorkerRequestsPtr] (std::exception_ptr exceptionPtr) mutable { - IdleGuard idleGuard{workerRequestPtr, *idleWorkerRequestsPtr}; - workerRequestPtr->_exceptionPtr = exceptionPtr; - { - auto capturedTask = std::move(workerRequestPtr->_task); - capturedTask(); - } - // try to return the request to the idle list (fails if the overall object destruction has began) - if (idleGuard.Release()->try_push(workerRequestPtr)) { - // let's try to pop a task, as we know there is at least one idle request, schedule if succeeded - // if no device-agnostic tasks, let's try pop the device specific task, schedule if succeeded - Task t; - if (_inferPipelineTasks.try_pop(t)) - ScheduleToWorkerInferRequest(std::move(t)); - else if (_inferPipelineTasksDeviceSpecific[device]->try_pop(t)) - ScheduleToWorkerInferRequest(std::move(t), device); - } - }); + _networkFirstReady = _cpuFuture.get(); + } catch (const std::exception& e) { + printf("Warning: load network to CPU failed: %s\n", e.what()); + _networkActualNeeded = _acceleratorFuture.get(); } + } else if (_acceleratorFuture.valid()) { // only accelerator is valid, like AUTO:GPU + _networkActualNeeded = _acceleratorFuture.get(); + } else if (_cpuFuture.valid()) { // only CPU is valid, like AUTO:CPU + _networkActualNeeded = _cpuFuture.get(); + } else { + IE_THROW() << "No device task available"; + } + + // if only one device or cpu device faild, so the ActualNetwork is ok + if (!_acceleratorFuture.valid()) { + _alreadyActualNetwork = true; + // setPerfcounts + SetPerfCounts(); + } +} + +bool MultiDeviceExecutableNetwork::IsActualNetworkReady() const { + if (_alreadyActualNetwork) { + WaitActualNetworkReady(); + return true; + } else { + return false; + } +} + +void MultiDeviceExecutableNetwork::WaitActualNetworkReady() const { + // Maybe differnt API will call this function, so add call once here + // for every MultiDeviceExecutableNetwork instance + std::call_once(_oc, [&] () { + if (_acceleratorFuture.valid()) { + _networkActualNeeded = _acceleratorFuture.get(); + SetPerfCounts(); + } + }); +} + +void MultiDeviceExecutableNetwork::SetPerfCounts() const { + try { + _needPerfCounters = _networkActualNeeded->GetMetric(PluginConfigParams::KEY_PERF_COUNT).as() == PluginConfigParams::YES; + } catch (...) { } } void MultiDeviceExecutableNetwork::ScheduleToWorkerInferRequest(Task inferPipelineTask, DeviceName preferred_device) { + // AUTO work mode + if (_workModeIsAUTO) { + if (!preferred_device.empty()) { + // the preferred_device should be the selected device in AUTO work mode + if (preferred_device != _acceleratorDevice.deviceName) { + IE_THROW(NotFound) << "The preferred_device should be the selected device"; + } + + // if the device needed by customer is not ready, need to wait for it + WaitActualNetworkReady(); + + auto &idleWorkerRequests = _idleWorkerRequests[preferred_device]; + if (!RunPipelineTask(inferPipelineTask, idleWorkerRequests, preferred_device)) { + // no vacant requests this time, storing the task to the respective queue + _inferPipelineTasksDeviceSpecific[preferred_device]->push(std::move(inferPipelineTask)); + } + } else { + // _acceleratorDevice could be the same as _cpuDevice, such as AUTO:CPU + auto &idleWorkerRequests = IsActualNetworkReady() + ? _idleWorkerRequests[_acceleratorDevice.deviceName] + : _idleWorkerRequests[_cpuDevice.deviceName]; + if (!RunPipelineTask(inferPipelineTask, idleWorkerRequests, preferred_device)) { + // no vacant requests this time, storing the task to the respective queue + _inferPipelineTasks.push(std::move(inferPipelineTask)); + } + } + return; + } + + // legacy MULTI work mode auto devices = [&] { std::lock_guard lock(_mutex); return _devicePriorities; @@ -115,16 +326,7 @@ void MultiDeviceExecutableNetwork::ScheduleToWorkerInferRequest(Task inferPipeli for (auto&& device : devices) { if (!preferred_device.empty() && (device.deviceName != preferred_device)) continue; - WorkerInferRequest* workerRequestPtr = nullptr; - NotBusyWorkerRequests& idleWorkerRequests = _idleWorkerRequests[device.deviceName]; - if (idleWorkerRequests.try_pop(workerRequestPtr)) { - IdleGuard idleGuard{workerRequestPtr, idleWorkerRequests}; - _thisWorkerInferRequest = workerRequestPtr; - { - auto capturedTask = std::move(inferPipelineTask); - capturedTask(); - } - idleGuard.Release(); + if (RunPipelineTask(inferPipelineTask, _idleWorkerRequests[device.deviceName], preferred_device)) { return; } } @@ -135,11 +337,33 @@ void MultiDeviceExecutableNetwork::ScheduleToWorkerInferRequest(Task inferPipeli _inferPipelineTasks.push(std::move(inferPipelineTask)); } +bool MultiDeviceExecutableNetwork::RunPipelineTask(Task& inferPipelineTask, + NotBusyWorkerRequests& idleWorkerRequests, + const DeviceName& preferred_device) { + WorkerInferRequest *workerRequestPtr = nullptr; + if (idleWorkerRequests.try_pop(workerRequestPtr)) { + IdleGuard idleGuard{workerRequestPtr, idleWorkerRequests}; + _thisWorkerInferRequest = workerRequestPtr; + { + auto capturedTask = std::move(inferPipelineTask); + capturedTask(); + } + idleGuard.Release(); + return true; + } + return false; +} + void MultiDeviceExecutableNetwork::run(Task inferPipelineTask) { ScheduleToWorkerInferRequest(std::move(inferPipelineTask), _thisPreferredDeviceName); } MultiDeviceExecutableNetwork::~MultiDeviceExecutableNetwork() { + // this is necessary to guarantee member destroyed after getting future + if (_workModeIsAUTO && !IsActualNetworkReady()) { + WaitActualNetworkReady(); + } + { std::lock_guard lock(_mutex); _devicePriorities.clear(); @@ -147,14 +371,19 @@ MultiDeviceExecutableNetwork::~MultiDeviceExecutableNetwork() { /* NOTE: The only threads that use `MultiDeviceExecutableNetwork` worker infer requests' threads. * But AsyncInferRequest destructor should wait for all asynchronous tasks by the request */ - for (auto&& networkValue : _networksPerDevice) { + for (auto&& idleWorker : _idleWorkerRequests) { // stop accepting any idle requests back (for re-scheduling) - _idleWorkerRequests.at(networkValue.first).set_capacity(0); + idleWorker.second.set_capacity(0); } _workerRequests.clear(); } std::shared_ptr MultiDeviceExecutableNetwork::GetContext() const { + if (_workModeIsAUTO) { + WaitActualNetworkReady(); + return _networkActualNeeded->GetContext(); + } + auto devices = [&] { std::lock_guard lock(_mutex); return _devicePriorities; @@ -177,6 +406,11 @@ InferenceEngine::IInferRequestInternal::Ptr MultiDeviceExecutableNetwork::Create auto num = _numRequestsCreated++; size_t sum = 0; InferenceEngine::SoIInferRequestInternal request_to_share_blobs_with; + + if (_workModeIsAUTO) { + return std::make_shared(networkInputs, networkOutputs, request_to_share_blobs_with); + } + // borrowing device-specific blobs from the underlying requests for the device-agnostic, user-facing requests // this allows to potentially save on the data-copy later (if the requests are scheduled in the same order) for (const auto& device : _devicePrioritiesInitial) { @@ -200,6 +434,10 @@ IInferRequestInternal::Ptr MultiDeviceExecutableNetwork::CreateInferRequest() { } void MultiDeviceExecutableNetwork::SetConfig(const std::map &config) { + if (_workModeIsAUTO) { + IE_THROW(NotImplemented); + } + auto priorities = config.find(MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES); if (priorities == config.end() || config.size() > 1) { IE_THROW() << "The only config supported for the Network's SetConfig is MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES"; @@ -242,6 +480,14 @@ InferenceEngine::Parameter MultiDeviceExecutableNetwork::GetConfig(const std::st } InferenceEngine::Parameter MultiDeviceExecutableNetwork::GetMetric(const std::string &name) const { + if (_workModeIsAUTO) { + // fixme: should we wait actual device? meanwhile it will block inference, how to fix? + if (IsActualNetworkReady()) { + return _networkActualNeeded->GetMetric(name); + } + return _networkFirstReady->GetMetric(name); + } + if (name == METRIC_KEY(OPTIMAL_NUMBER_OF_INFER_REQUESTS)) { unsigned int res = 0u; for (auto n : _networksPerDevice) { @@ -274,5 +520,4 @@ InferenceEngine::Parameter MultiDeviceExecutableNetwork::GetMetric(const std::st IE_THROW() << "Unsupported Network metric: " << name; } } - } // namespace MultiDevicePlugin diff --git a/inference-engine/src/multi_device/multi_device_exec_network.hpp b/inference-engine/src/multi_device/multi_device_exec_network.hpp index a771ec2531caae..e79ac5179e302d 100644 --- a/inference-engine/src/multi_device/multi_device_exec_network.hpp +++ b/inference-engine/src/multi_device/multi_device_exec_network.hpp @@ -16,14 +16,21 @@ #include #include #include +#include +#include "ie_icore.hpp" #if (IE_THREAD == IE_THREAD_TBB || IE_THREAD == IE_THREAD_TBB_AUTO) # include #endif + namespace MultiDevicePlugin { +class MultiDeviceInferencePlugin; + using DeviceName = std::string; +using NetworkFuture = std::future; +using NetworkPromise = std::promise; struct DeviceInformation { DeviceName deviceName; @@ -105,10 +112,14 @@ class MultiDeviceExecutableNetwork : public InferenceEngine::ExecutableNetworkTh }; using NotBusyWorkerRequests = ThreadSafeBoundedQueue; - explicit MultiDeviceExecutableNetwork(const DeviceMap& networksPerDevice, + explicit MultiDeviceExecutableNetwork(const DeviceMap& networksPerDevice, const std::vector& networkDevices, const std::unordered_map& config, const bool needPerfCounters = false); + MultiDeviceExecutableNetwork(const std::string& modelPath, + const InferenceEngine::CNNNetwork& network, + const std::map& config, + MultiDeviceInferencePlugin* plugin); void SetConfig(const std::map &config) override; InferenceEngine::Parameter GetConfig(const std::string &name) const override; @@ -129,15 +140,41 @@ class MultiDeviceExecutableNetwork : public InferenceEngine::ExecutableNetworkTh static thread_local const char* _thisPreferredDeviceName; mutable std::mutex _mutex; std::vector _devicePriorities; - const std::vector _devicePrioritiesInitial; + std::vector _devicePrioritiesInitial; DeviceMap _networksPerDevice; ThreadSafeQueue _inferPipelineTasks; DeviceMap>> _inferPipelineTasksDeviceSpecific; DeviceMap _idleWorkerRequests; DeviceMap> _workerRequests; std::unordered_map _config; - bool _needPerfCounters = false; + mutable bool _needPerfCounters = false; std::atomic_size_t _numRequestsCreated = {0}; + +private: + void GenerateWorkers(const std::string& device, const InferenceEngine::SoExecutableNetworkInternal& executableNetwork); + bool IsActualNetworkReady() const; + void WaitActualNetworkReady() const; + void WaitFirstNetworkReady(); + void SetPerfCounts() const; + static bool RunPipelineTask(InferenceEngine::Task& inferPipelineTask, + NotBusyWorkerRequests& idleWorkerRequests, + const DeviceName& preferred_device); + +private: + std::shared_ptr core; + InferenceEngine::IStreamsExecutor::Ptr _executor; + MultiDeviceInferencePlugin* _multiPlugin; + InferenceEngine::SoExecutableNetworkInternal _networkFirstReady; + mutable InferenceEngine::SoExecutableNetworkInternal _networkActualNeeded; + NetworkFuture _cpuFuture; + NetworkPromise _cpuPromise; + mutable NetworkFuture _acceleratorFuture; + mutable NetworkPromise _acceleratorPromise; + mutable bool _alreadyActualNetwork = {false}; + bool _workModeIsAUTO { false }; + DeviceInformation _cpuDevice; + DeviceInformation _acceleratorDevice; + mutable std::once_flag _oc; }; } // namespace MultiDevicePlugin diff --git a/inference-engine/src/multi_device/multi_device_plugin.cpp b/inference-engine/src/multi_device/multi_device_plugin.cpp index b0bda135224c5a..18823972e3cdd6 100644 --- a/inference-engine/src/multi_device/multi_device_plugin.cpp +++ b/inference-engine/src/multi_device/multi_device_plugin.cpp @@ -212,32 +212,18 @@ IExecutableNetworkInternal::Ptr MultiDeviceInferencePlugin::LoadNetworkImpl(cons bool workModeAuto = workMode != fullConfig.end() && workMode->second == InferenceEngine::PluginConfigParams::YES; auto priorities = fullConfig.find(MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES); - // not found device priorities for -d AUTO use case + // if workMode is AUTO + if (workModeAuto) { + CheckConfig(fullConfig); + return std::make_shared(modelPath, network, fullConfig, this); + } + if (priorities == fullConfig.end()) { - if (workModeAuto) { - std::string allDevices; - auto availableDevices = GetCore()->GetAvailableDevices(); - if (availableDevices.empty()) { - IE_THROW(NotFound) << "No available device found"; - } - for (auto&& device : availableDevices) { - allDevices += device; - allDevices += ((device == availableDevices[availableDevices.size()-1]) ? "" : ","); - } - metaDevices = ParseMetaDevices(allDevices, fullConfig); - multiNetworkConfig.insert({MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, allDevices}); - } else { - IE_THROW() << "KEY_MULTI_DEVICE_PRIORITIES key is not set for " << GetName() << " device"; - } + IE_THROW() << "KEY_MULTI_DEVICE_PRIORITIES key is not set for " << GetName() << " device"; } else { // for use case -d MULTI:xPU or -d AUTO:xPU metaDevices = ParseMetaDevices(priorities->second, fullConfig); multiNetworkConfig.insert(*priorities); } - // check if it is -d AUTO or -d AUTO:xPU use case - if (workModeAuto) { - auto targetDevice = SelectDevice(metaDevices, networkPrecision); - metaDevices = { targetDevice }; - } DeviceMap executableNetworkPerDevice; std::mutex load_mutex; @@ -456,4 +442,43 @@ DeviceInformation MultiDeviceInferencePlugin::SelectDevice(const std::vector& config) const { + std::string allDevices; + + auto deviceListConfig = config.find(MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES); + if (deviceListConfig == config.end()) { + auto deviceList = GetCore()->GetAvailableDevices(); + for (auto&& device : deviceList) { + allDevices += device; + allDevices += ((device == deviceList[deviceList.size()-1]) ? "" : ","); + } + } else { + allDevices = deviceListConfig->second; + } + + if (allDevices.empty()) { + IE_THROW() << "Please, check environment due to no supported devices can be used"; + } + + return allDevices; +} + +void MultiDeviceInferencePlugin::CheckConfig(const std::map& config) { + for (auto&& kvp : config) { + if (kvp.first.find("AUTO_") == 0) { + continue; + } else if (kvp.first == PluginConfigParams::KEY_PERF_COUNT) { + if (kvp.second == PluginConfigParams::YES || + kvp.second == PluginConfigParams::NO) { + continue; + } else { + IE_THROW() << "Unsupported config value: " << kvp.second + << " for key: " << kvp.first; + } + } else if (supported_configKeys.end() == std::find(supported_configKeys.begin(), supported_configKeys.end(), kvp.first)) { + IE_THROW() << "Unsupported config key: " << kvp.first; + } + } +} + } // namespace MultiDevicePlugin diff --git a/inference-engine/src/multi_device/multi_device_plugin.hpp b/inference-engine/src/multi_device/multi_device_plugin.hpp index 4021c5ec9e1aea..3ca436a1ef11d2 100644 --- a/inference-engine/src/multi_device/multi_device_plugin.hpp +++ b/inference-engine/src/multi_device/multi_device_plugin.hpp @@ -36,6 +36,9 @@ class MultiDeviceInferencePlugin : public InferenceEngine::IInferencePlugin { std::vector ParseMetaDevices(const std::string & devicesRequestsCfg, const std::map & config) const; + std::string GetDeviceList(const std::map& config) const; + DeviceInformation SelectDevice(const std::vector& metaDevices, const std::string& networkPrecision = METRIC_VALUE(FP32)); + protected: std::map GetSupportedConfig(const std::map& config, const MultiDevicePlugin::DeviceName & deviceName) const; @@ -45,7 +48,7 @@ class MultiDeviceInferencePlugin : public InferenceEngine::IInferencePlugin { InferenceEngine::CNNNetwork network, const std::map& config, const std::string &networkPrecision = METRIC_VALUE(FP32)); - DeviceInformation SelectDevice(const std::vector& metaDevices, const std::string& networkPrecision = METRIC_VALUE(FP32)); + static void CheckConfig(const std::map& config); }; } // namespace MultiDevicePlugin diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/behavior/config.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/behavior/config.cpp index aec20bce2cda5f..e74d5b417e60cb 100644 --- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/behavior/config.cpp +++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/behavior/config.cpp @@ -84,13 +84,6 @@ namespace { ::testing::ValuesIn(multiconf)), CorrectConfigAPITests::getTestCaseName); - INSTANTIATE_TEST_SUITE_P(smoke_Auto_BehaviorTests, CorrectConfigAPITests, - ::testing::Combine( - ::testing::ValuesIn(netPrecisions), - ::testing::Values(CommonTestUtils::DEVICE_AUTO), - ::testing::ValuesIn(multiconf)), - CorrectConfigAPITests::getTestCaseName); - INSTANTIATE_TEST_SUITE_P(smoke_BehaviorTests, IncorrectConfigTests, ::testing::Combine( ::testing::ValuesIn(netPrecisions), diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/behavior/infer_request/config.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/behavior/infer_request/config.cpp index 7013c3096ddd75..c289a5831c005e 100644 --- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/behavior/infer_request/config.cpp +++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/behavior/infer_request/config.cpp @@ -62,18 +62,5 @@ namespace { ::testing::ValuesIn(MultiInConfigs)), InferRequestConfigTest::getTestCaseName); - INSTANTIATE_TEST_SUITE_P(smoke_Auto_BehaviorTests, InferRequestConfigTest, - ::testing::Combine( - ::testing::Values(1u), - ::testing::Values(CommonTestUtils::DEVICE_AUTO), - ::testing::ValuesIn(multiConfigs)), - InferRequestConfigTest::getTestCaseName); - - INSTANTIATE_TEST_SUITE_P(smoke_Auto_BehaviorTests_, InferRequestConfigTest, - ::testing::Combine( - ::testing::Values(1u), - ::testing::Values(CommonTestUtils::DEVICE_AUTO), - ::testing::ValuesIn(MultiInConfigs)), - InferRequestConfigTest::getTestCaseName); } // namespace diff --git a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/behavior/test_plugin.cpp b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/behavior/test_plugin.cpp index c03c1a4f121cc2..f7656b81c760a7 100644 --- a/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/behavior/test_plugin.cpp +++ b/inference-engine/tests/functional/plugin/cpu/shared_tests_instances/behavior/test_plugin.cpp @@ -32,6 +32,10 @@ namespace { {InferenceEngine::PluginConfigParams::KEY_CPU_THROUGHPUT_STREAMS, InferenceEngine::PluginConfigParams::CPU_THROUGHPUT_AUTO}} }; + const std::vector> AutoConfigsInputOutput = { + {{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , CommonTestUtils::DEVICE_CPU}} + }; + const std::vector> configsOutput = { {}, {{InferenceEngine::PluginConfigParams::KEY_CPU_THROUGHPUT_STREAMS, InferenceEngine::PluginConfigParams::CPU_THROUGHPUT_AUTO}} @@ -56,7 +60,7 @@ namespace { ::testing::Combine( ::testing::ValuesIn(netPrecisions), ::testing::Values(CommonTestUtils::DEVICE_AUTO), - ::testing::ValuesIn(MultiConfigsInputOutput)), + ::testing::ValuesIn(AutoConfigsInputOutput)), BehaviorTestOutput::getTestCaseName); INSTANTIATE_TEST_SUITE_P(smoke_BehaviorTests, BehaviorTests, @@ -98,7 +102,7 @@ namespace { ::testing::Combine( ::testing::ValuesIn(netPrecisions), ::testing::Values(CommonTestUtils::DEVICE_AUTO), - ::testing::ValuesIn(MultiConfigsInputOutput)), + ::testing::ValuesIn(AutoConfigsInputOutput)), BehaviorTestInput::getTestCaseName); } // namespace diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/config.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/config.cpp index 22a13191dc5298..f4fa0f279f2c64 100644 --- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/config.cpp +++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/config.cpp @@ -90,6 +90,13 @@ namespace { {{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , CommonTestUtils::DEVICE_GPU}} }; + const std::vector> autoConfigs = { + {{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , CommonTestUtils::DEVICE_GPU}, + {InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, + CommonTestUtils::DEVICE_GPU + std::string(",") + CommonTestUtils::DEVICE_CPU}} + }; + + INSTANTIATE_TEST_SUITE_P(smoke_BehaviorTests, CorrectConfigAPITests, ::testing::Combine( ::testing::ValuesIn(netPrecisions), @@ -115,7 +122,7 @@ namespace { ::testing::Combine( ::testing::ValuesIn(netPrecisions), ::testing::Values(CommonTestUtils::DEVICE_AUTO), - ::testing::ValuesIn(multiconf)), + ::testing::ValuesIn(autoConfigs)), CorrectConfigAPITests::getTestCaseName); INSTANTIATE_TEST_SUITE_P(smoke_BehaviorTests, IncorrectConfigAPITests, diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/infer_request/callback.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/infer_request/callback.cpp index 90a22c2435cb86..dfaa591dd96376 100644 --- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/infer_request/callback.cpp +++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/infer_request/callback.cpp @@ -14,6 +14,11 @@ const std::vector> multiConfigs = { {{ MULTI_CONFIG_KEY(DEVICE_PRIORITIES) , CommonTestUtils::DEVICE_GPU}} }; +const std::vector> autoConfigs = { + {{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , CommonTestUtils::DEVICE_GPU}, + {InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , CommonTestUtils::DEVICE_GPU + std::string(",") + CommonTestUtils::DEVICE_CPU}} +}; + INSTANTIATE_TEST_SUITE_P(smoke_BehaviorTests, InferRequestCallbackTests, ::testing::Combine( ::testing::Values(CommonTestUtils::DEVICE_GPU), @@ -27,8 +32,8 @@ INSTANTIATE_TEST_SUITE_P(smoke_Multi_BehaviorTests, InferRequestCallbackTests, InferRequestCallbackTests::getTestCaseName); INSTANTIATE_TEST_SUITE_P(smoke_Auto_BehaviorTests, InferRequestCallbackTests, - ::testing::Combine( - ::testing::Values(CommonTestUtils::DEVICE_AUTO), - ::testing::ValuesIn(multiConfigs)), - InferRequestCallbackTests::getTestCaseName); + ::testing::Combine( + ::testing::Values(CommonTestUtils::DEVICE_AUTO), + ::testing::ValuesIn(autoConfigs)), + InferRequestCallbackTests::getTestCaseName); } // namespace diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/infer_request/wait.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/infer_request/wait.cpp index 07fe3ddd855a3a..41da3069a871ea 100644 --- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/infer_request/wait.cpp +++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/infer_request/wait.cpp @@ -13,6 +13,12 @@ namespace { {{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES, CommonTestUtils::DEVICE_GPU}} }; + const std::vector> autoConfigs = { + {{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , CommonTestUtils::DEVICE_GPU}, + {InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , + CommonTestUtils::DEVICE_GPU + std::string(",") + CommonTestUtils::DEVICE_CPU}} + }; + INSTANTIATE_TEST_SUITE_P(smoke_BehaviorTests, InferRequestWaitTests, ::testing::Combine( ::testing::Values(CommonTestUtils::DEVICE_GPU), @@ -28,7 +34,7 @@ namespace { INSTANTIATE_TEST_SUITE_P(smoke_Auto_BehaviorTests, InferRequestWaitTests, ::testing::Combine( ::testing::Values(CommonTestUtils::DEVICE_AUTO), - ::testing::ValuesIn(configs)), + ::testing::ValuesIn(autoConfigs)), InferRequestWaitTests::getTestCaseName); -} // namespace \ No newline at end of file +} // namespace diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/preprocessing/set_preprocess.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/preprocessing/set_preprocess.cpp index c6a17bb4bf7761..950425675bc2d4 100644 --- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/preprocessing/set_preprocess.cpp +++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/preprocessing/set_preprocess.cpp @@ -22,6 +22,12 @@ namespace { {{ InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , CommonTestUtils::DEVICE_GPU}} }; + const std::vector> autoConfigs = { + {{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , CommonTestUtils::DEVICE_GPU}, + {InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , + CommonTestUtils::DEVICE_GPU + std::string(",") + CommonTestUtils::DEVICE_CPU}} + }; + INSTANTIATE_TEST_SUITE_P(smoke_BehaviorTests, InferRequestPreprocessTest, ::testing::Combine( ::testing::ValuesIn(netPrecisions), @@ -40,7 +46,7 @@ namespace { ::testing::Combine( ::testing::ValuesIn(netPrecisions), ::testing::Values(CommonTestUtils::DEVICE_AUTO), - ::testing::ValuesIn(multiConfigs)), + ::testing::ValuesIn(autoConfigs)), InferRequestPreprocessTest::getTestCaseName); const std::vector ioPrecisions = { @@ -85,4 +91,4 @@ namespace { ::testing::ValuesIn(configs)), InferRequestPreprocessDynamicallyInSetBlobTest::getTestCaseName); -} // namespace \ No newline at end of file +} // namespace diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/test_plugin.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/test_plugin.cpp index 98069d07303168..bfe1d09c36be7d 100644 --- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/test_plugin.cpp +++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/test_plugin.cpp @@ -31,6 +31,12 @@ namespace { {{ MULTI_CONFIG_KEY(DEVICE_PRIORITIES) , CommonTestUtils::DEVICE_GPU}} }; + const std::vector> autoConfigs = { + {{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , CommonTestUtils::DEVICE_GPU}, + {InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , + CommonTestUtils::DEVICE_GPU + std::string(",") + CommonTestUtils::DEVICE_CPU}} + }; + const std::vector> configsInput = { {}, {{InferenceEngine::PluginConfigParams::KEY_GPU_THROUGHPUT_STREAMS, InferenceEngine::PluginConfigParams::GPU_THROUGHPUT_AUTO}} @@ -65,7 +71,7 @@ namespace { ::testing::Combine( ::testing::ValuesIn(netPrecisions), ::testing::Values(CommonTestUtils::DEVICE_AUTO), - ::testing::ValuesIn(MultiConfigsInputOutput)), + ::testing::ValuesIn(autoConfigs)), BehaviorTestOutput::getTestCaseName); INSTANTIATE_TEST_SUITE_P(smoke_BehaviorTests, BehaviorTests, @@ -86,7 +92,7 @@ namespace { ::testing::Combine( ::testing::Values(InferenceEngine::Precision::FP32), ::testing::Values(CommonTestUtils::DEVICE_AUTO), - ::testing::ValuesIn(MultiConfigs)), + ::testing::ValuesIn(autoConfigs)), BehaviorTests::getTestCaseName); INSTANTIATE_TEST_SUITE_P(smoke_BehaviorTests, BehaviorTestInput, @@ -107,7 +113,7 @@ namespace { ::testing::Combine( ::testing::ValuesIn(netPrecisions), ::testing::Values(CommonTestUtils::DEVICE_AUTO), - ::testing::ValuesIn(MultiConfigsInputOutput)), + ::testing::ValuesIn(autoConfigs)), BehaviorTestInput::getTestCaseName); } // namespace diff --git a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/version.cpp b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/version.cpp index c02a209e9d59a9..fe7bbfa5c099f1 100644 --- a/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/version.cpp +++ b/inference-engine/tests/functional/plugin/gpu/shared_tests_instances/behavior/version.cpp @@ -14,6 +14,12 @@ namespace { {{ MULTI_CONFIG_KEY(DEVICE_PRIORITIES) , CommonTestUtils::DEVICE_GPU}} }; + const std::vector> autoConfigs = { + {{InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , CommonTestUtils::DEVICE_GPU}, + {InferenceEngine::MultiDeviceConfigParams::KEY_MULTI_DEVICE_PRIORITIES , + CommonTestUtils::DEVICE_GPU + std::string(",") + CommonTestUtils::DEVICE_CPU}} + }; + const std::vector> Heteroconfigs = { {{ HETERO_CONFIG_KEY(DUMP_GRAPH_DOT) , CommonTestUtils::DEVICE_GPU}} }; @@ -36,7 +42,7 @@ namespace { ::testing::Combine( ::testing::Values(InferenceEngine::Precision::FP32), ::testing::Values(CommonTestUtils::DEVICE_AUTO), - ::testing::ValuesIn(Multiconfigs)), + ::testing::ValuesIn(autoConfigs)), VersionTest::getTestCaseName); INSTANTIATE_TEST_SUITE_P(smoke_Hetero_BehaviorTests, VersionTest,