Skip to content

Commit

Permalink
splitting the Transformation to general and CPU specific.
Browse files Browse the repository at this point in the history
Now hopefully,this fully mimics the  baseline c4df94d of the 2021.3 (before experiments), as the streams reduce num (as well as early exit on GRU/LSTM/TensorIterator) is deisabled
  • Loading branch information
myshevts committed Jul 5, 2021
1 parent 050a415 commit e98b2c1
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 25 deletions.
61 changes: 37 additions & 24 deletions inference-engine/src/mkldnn_plugin/mkldnn_plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -113,9 +113,7 @@ Engine::~Engine() {
ExecutorManager::getInstance()->clear("CPUCallbackExecutor");
}

static void Transformation(CNNNetwork& clonedNetwork, const bool _enableLPT) {
auto nGraphFunc = clonedNetwork.getFunction();

static void TransformationUpToCPUSpecificOpSet(std::shared_ptr<ngraph::Function> nGraphFunc, const bool _enableLPT) {
ngraph::pass::Manager manager;
manager.register_pass<ngraph::pass::InitNodeInfo>();

Expand Down Expand Up @@ -366,7 +364,11 @@ static void Transformation(CNNNetwork& clonedNetwork, const bool _enableLPT) {
});

postLPTPassManager.run_passes(nGraphFunc);
}

static void Transformation(CNNNetwork& clonedNetwork, const bool _enableLPT) {
auto nGraphFunc = clonedNetwork.getFunction();
TransformationUpToCPUSpecificOpSet(nGraphFunc, _enableLPT);
ConvertToCPUSpecificOpset(nGraphFunc);
}

Expand Down Expand Up @@ -422,27 +424,35 @@ Engine::NetworkPerfStats Engine::NetworkMemBandwidthTolerance(const InferenceEng
// Traverse nGraph Function in topological order
for (auto & node : nGraphFunc->get_ordered_ops()) {
// todo : bias data size (always fp)
if (std::strcmp("MatMul", node->get_type_info().name) && std::strcmp("Convolution", node->get_type_info().name)
&& std::strcmp("ConvolutionBackpropData", node->get_type_info().name)) {
int inputs_data_size_bytes = 0;
for (int i = 0; i < node->get_input_size(); i++) {
auto type = node->input_value(i).get_element_type();
const bool isINT8 = isLowPrecision(type); // bf16 tbd
const bool isBF16 = isHalfPrecision(type); // bf16 tbd
const int data_type_size = isINT8 ? 1 : isBF16 ? 2 : 4;
ngraph::Input<ngraph::Node> input = node->input(i);
const auto shapeInput = input.get_shape();
const auto non_const = !get_constant_from_source(node->input_value(i));
const auto dataSizeInput = std::accumulate(shapeInput.begin(), shapeInput.end(), 1,
std::multiplies<int>());
const auto not_amortized = non_const || (dataSizeInput * data_type_size) > L3_cache_size;
inputs_data_size_bytes += not_amortized * (dataSizeInput * data_type_size);
const auto node_name = node->get_type_info().name;
if (std::strcmp("MatMul", node_name) && std::strcmp("Convolution", node_name)
&& std::strcmp("ConvolutionBackpropData", node_name)) {
int inputs_data_size_bytes = 0;
if (!std::strcmp("GRUSequence", node_name)
|| !std::strcmp("TensorIterator", node_name)
|| !std::strcmp("LSTMSequence", node_name)) {
// RNN and alikes are not considered
std::cout << "TYPE: " << node_name << " Name: " << node->get_friendly_name()
<< " considering non-supported! falling back..." << std::endl;
}
for (int i = 0; i < node->get_input_size(); i++) {
auto type = node->input_value(i).get_element_type();
const bool isINT8 = isLowPrecision(type); // bf16 tbd
const bool isBF16 = isHalfPrecision(type); // bf16 tbd
const int data_type_size = isINT8 ? 1 : isBF16 ? 2 : 4;
ngraph::Input<ngraph::Node> input = node->input(i);
const auto shapeInput = input.get_shape();
const auto non_const = !get_constant_from_source(node->input_value(i));
const auto dataSizeInput = std::accumulate(shapeInput.begin(), shapeInput.end(), 1,
std::multiplies<int>());
const auto not_amortized = non_const || (dataSizeInput * data_type_size) > L3_cache_size;
inputs_data_size_bytes += not_amortized * (dataSizeInput * data_type_size);
}
// no need to track outputs, as these are inputs to some layers
const auto factor = memLimitedFactor(inputs_data_size_bytes, 1 /*already in bytes*/);
if (factor < worst_case_all) {
worst_case_all = factor;
std::cout << "TYPE: " << node->get_type_info().name << " Name: " << node->get_friendly_name()
std::cout << "TYPE: " << node_name << " Name: " << node_name
<< " inputs_data_size_bytes " << inputs_data_size_bytes << ", factor: " << factor << std::endl;
}
continue;
Expand All @@ -455,9 +465,9 @@ Engine::NetworkPerfStats Engine::NetworkMemBandwidthTolerance(const InferenceEng
const int data_type_size = isINT8 ? 1 : isBF16 ? 2 : 4;

int dataSizeInput = 0, dataSizeOutput = 0;
std::cout << "Type: " << node->get_type_info().name << " Name: "
std::cout << "Type: " << node_name << " Name: "
<< node->get_friendly_name();
if (!std::strcmp("MatMul", node->get_type_info().name)) {
if (!std::strcmp("MatMul", node_name)) {
ngraph::Input<ngraph::Node> input0 = node->input(0);
ngraph::Input<ngraph::Node> input1 = node->input(1);
ngraph::Output<ngraph::Node> output = node->output(0);
Expand Down Expand Up @@ -511,7 +521,7 @@ Engine::NetworkPerfStats Engine::NetworkMemBandwidthTolerance(const InferenceEng
// << " L2_cache_size: " << L2_cache_size << " L3_cache_size: " << L3_cache_size
// << " FACTOR: " << factor << std::endl;
}
} else if (!std::strcmp("Convolution", node->get_type_info().name)) {
} else if (!std::strcmp("Convolution", node_name)) {
// Check that input and output shape a fully defined (not dynamic)
ngraph::Input<ngraph::Node> input = node->input(0);
ngraph::Output<ngraph::Node> output = node->output(0);
Expand Down Expand Up @@ -556,7 +566,7 @@ Engine::NetworkPerfStats Engine::NetworkMemBandwidthTolerance(const InferenceEng
<< ", dataSize: " << dataSizeInput + dataSizeOutput
<< ", L2_cache_size: " << L2_cache_size << " FACTOR: " << factor << std::endl;
}
} else if (!std::strcmp("ConvolutionBackpropData", node->get_type_info().name)) {
} else if (!std::strcmp("ConvolutionBackpropData", node_name)) {
// Check that input and output shape a fully defined (not dynamic)
ngraph::Input<ngraph::Node> input = node->input(0);
ngraph::Output<ngraph::Node> output = node->output(0);
Expand Down Expand Up @@ -638,7 +648,8 @@ Engine::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork &network, const std
const auto& lptProp = config.find(InferenceEngine::PluginConfigInternalParams::KEY_LP_TRANSFORMS_MODE);
const bool enableLPT = (lptProp != config.end() && lptProp->second == PluginConfigParams::YES) /* enabled in the orig_config*/
|| Config::LPTransformsMode::On == engConfig.lpTransformsMode /* or already enabled */;
Transformation(clonedNetwork, enableLPT);
auto nGraphFunc = clonedNetwork.getFunction();
TransformationUpToCPUSpecificOpSet(nGraphFunc, enableLPT);

// Here the OV perf modes are turned into specific settings (as we need the network for better params selection)
//const auto& mode = config.find(PluginConfigParams::KEY_OV_PERFORMANCE_MODE);
Expand Down Expand Up @@ -696,6 +707,8 @@ Engine::LoadExeNetworkImpl(const InferenceEngine::CNNNetwork &network, const std
//}
//}
//}
ConvertToCPUSpecificOpset(nGraphFunc);

// update the props after the perf mode translated to configs
// TODO: Clarify the behavior of SetConfig method. Skip eng_config or not?
Config conf = engConfig;
Expand Down
2 changes: 1 addition & 1 deletion inference-engine/src/mkldnn_plugin/mkldnn_plugin.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ class Engine : public InferenceEngine::IInferencePlugin {
static constexpr float memThresholdNotLimited = 1.0f;
static constexpr float memThresholdAssumeLimited = 0.5f;
static constexpr float memThresholdAssumeLimitedAVX512 = memThresholdAssumeLimited/2;
static constexpr float memThresholdAssumeLimitedMuch = memThresholdAssumeLimited/4;
static constexpr float memThresholdAssumeLimitedMuch = memThresholdAssumeLimited/8;
static constexpr float memThresholdUnknown = FLT_MAX;

static constexpr float memLimitedRatioThresholdAVX512 = 0.10;
Expand Down

0 comments on commit e98b2c1

Please sign in to comment.