diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp b/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp index a335153e28a48f..810f1c34b0aa78 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp +++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph.cpp @@ -347,6 +347,8 @@ void MKLDNNGraph::InitGraph() { graphNode->cleanup(); } #endif + ExtractConstantNodes(); + ExecuteConstantNodesOnly(); } @@ -390,6 +392,16 @@ void MKLDNNGraph::InitOptimalPrimitiveDescriptors() { } } +void MKLDNNGraph::ExtractConstantNodes() { + OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::MKLDNN_LT, "MKLDNNGraph::ExtractConstantNodes"); + for (auto& graphNode : graphNodes) { + if (graphNode->isConstant()) + constantGraphNodes.emplace_back(graphNode); + else + mutableGraphNodes.emplace_back(graphNode); + } +} + void MKLDNNGraph::ExecuteConstantNodesOnly() { OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::MKLDNN_LT, "MKLDNNGraph::ExecuteConstantNodesOnly"); mkldnn::stream stream(eng); @@ -418,10 +430,7 @@ void MKLDNNGraph::ExecuteConstantNodesOnly() { return std::make_tuple(hasExternalInvalidEdges, hasLocalAllocatedEdges, outputs); }; - for (auto &graphNode : graphNodes) { - if (!graphNode->isConstant()) - continue; - + for (auto &graphNode : constantGraphNodes) { if (weightsCache) { auto sharedOutputs = acquireSharedOutputs(graphNode); @@ -810,24 +819,30 @@ void MKLDNNGraph::Infer(MKLDNNInferRequest* request, int batch) { ENABLE_CPU_DEBUG_CAP(NodeDumper nd(config.debugCaps, infer_count)); - for (int i = 0; i < graphNodes.size(); i++) { - if (request != nullptr) { +#ifdef CPU_DEBUG_CAPS + for (const auto& node : constantGraphNodes) { + if (request != nullptr) request->ThrowIfCanceled(); - } - PERF(graphNodes[i]); + ENABLE_CPU_DEBUG_CAP(nd.dumpInputBlobs(node)); + ENABLE_CPU_DEBUG_CAP(nd.dumpOutputBlobs(node)); + } +#endif + + for (const auto& node : mutableGraphNodes) { + PERF(config.collectPerfCounters, node); + if (request != nullptr) + request->ThrowIfCanceled(); if (batch > 0) - graphNodes[i]->setDynamicBatchLim(batch); + node->setDynamicBatchLim(batch); - ENABLE_CPU_DEBUG_CAP(nd.dumpInputBlobs(graphNodes[i])); + ENABLE_CPU_DEBUG_CAP(nd.dumpInputBlobs(node)); - if (!graphNodes[i]->isConstant()) { - OV_ITT_SCOPED_TASK(itt::domains::MKLDNNPlugin, graphNodes[i]->profiling.execute); - graphNodes[i]->execute(stream); - } + OV_ITT_SCOPED_TASK(itt::domains::MKLDNNPlugin, node->profiling.execute); + node->execute(stream); - ENABLE_CPU_DEBUG_CAP(nd.dumpOutputBlobs(graphNodes[i])); + ENABLE_CPU_DEBUG_CAP(nd.dumpOutputBlobs(node)); } if (infer_count != -1) infer_count++; diff --git a/inference-engine/src/mkldnn_plugin/mkldnn_graph.h b/inference-engine/src/mkldnn_plugin/mkldnn_graph.h index ae73d8da2999e4..2c7db727151dba 100644 --- a/inference-engine/src/mkldnn_plugin/mkldnn_graph.h +++ b/inference-engine/src/mkldnn_plugin/mkldnn_graph.h @@ -218,6 +218,7 @@ class MKLDNNGraph { void Allocate(); void AllocateWithReuse(); void CreatePrimitives(); + void ExtractConstantNodes(); void ExecuteConstantNodesOnly(); friend class MKLDNNInferRequest; @@ -225,6 +226,11 @@ class MKLDNNGraph { friend InferenceEngine::CNNNetwork dump_graph_as_ie_ngraph_net(const MKLDNNGraph &graph); private: + // these node pointers (from graphNodes) are to avoid regular checking for + // constant node in ExecuteConstantNodesOnly and Infer methods + std::vector constantGraphNodes; + std::vector mutableGraphNodes; + void EnforceBF16(); }; diff --git a/inference-engine/src/mkldnn_plugin/perf_count.h b/inference-engine/src/mkldnn_plugin/perf_count.h index 3fce79b5e689d0..0f230c4c76f301 100644 --- a/inference-engine/src/mkldnn_plugin/perf_count.h +++ b/inference-engine/src/mkldnn_plugin/perf_count.h @@ -46,4 +46,5 @@ class PerfHelper { } // namespace MKLDNNPlugin -#define PERF(_counter) PerfHelper __helper##__counter (_counter->PerfCounter()); +#define GET_PERF(_counter) std::unique_ptr(new PerfHelper(_counter->PerfCounter())) +#define PERF(_need, _counter) auto pc = _need ? GET_PERF(_counter) : nullptr;