Skip to content

Commit

Permalink
psroi unrolled to the primary state; code clean (#12)
Browse files Browse the repository at this point in the history
  • Loading branch information
yury-intel authored and maxnick committed Nov 19, 2020
1 parent a0c883b commit 2624601
Show file tree
Hide file tree
Showing 3 changed files with 65 additions and 79 deletions.
64 changes: 37 additions & 27 deletions inference-engine/src/mkldnn_plugin/bf16transformer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,33 +66,7 @@ void BF16Transformer::convertToBFloat16(InferenceEngine::CNNNetwork &network) {
iter->insData[0].lock()->getPrecision() == Precision::FP32) {
iter->insData[0].lock()->setPrecision(Precision::BF16);
}
if (_initbf16.find(iter->type) != _initbf16.end()) {
for (size_t o = 0; o < iter->insData.size(); o++) {
if (inputs.find(iter->insData[o].lock()->getName()) != inputs.end()) {
std::string iterType = iter->type;
std::transform(iterType.begin(), iterType.end(), iterType.begin(),
[](unsigned char c){ return std::tolower(c); });
if (iterType == "convolution") {
// TODO: have to be removed after adding suitable implementation for convolution
break;
}
if (iter->insData[o].lock()->getPrecision() != Precision::FP32 &&
iter->insData[o].lock()->getPrecision() != Precision::BF16) {
break;
}
// insert convert
std::string layerName = iter->insData[o].lock()->getName() + "_" + std::to_string(o);
LayerParams cnnLayerParams{ layerName, "Convert", Precision::FP32 };
auto lay = new CNNLayer(cnnLayerParams);
std::map<std::string, std::string> par = {{"name", layerName}, {"type", "Convert"}, {"precision", "FP32"}};
lay->params = par;
CNNLayerPtr convertLayer(lay);
BF16Transformer::addLayerToCNNNetworkAfterData(iter->insData[o].lock(), convertLayer, iter->name, network);
// set conv input as bf
iter->insData[o].lock()->setPrecision(Precision::BF16);
}
}
}

for (size_t o = 0; o < iter->outData.size(); o++) {
if (inputs.find(iter->outData[o]->getName()) == inputs.end()
&& outputs.find(iter->outData[o]->getName()) == outputs.end()
Expand All @@ -102,6 +76,10 @@ void BF16Transformer::convertToBFloat16(InferenceEngine::CNNNetwork &network) {
}
}
}

// insert convert after input if necessary
insertConvertAfterInput(network);

// convert all edges back to FP32 on demand
optimizeToFloat(network);
}
Expand Down Expand Up @@ -363,4 +341,36 @@ void BF16Transformer::addLayerToCNNNetworkAfterData(
} else {
THROW_IE_EXCEPTION << "Invalid argument";
}
}

void BF16Transformer::insertConvertAfterInput(InferenceEngine::CNNNetwork &network) {
std::vector<CNNLayerPtr> sortedLayers = CNNNetSortTopologically(network);
InputsDataMap inputs = network.getInputsInfo();
OutputsDataMap outputs = network.getOutputsInfo();
for (auto iter : sortedLayers) {
if (_initbf16.find(iter->type) != _initbf16.end()) {
for (size_t o = 0; o < iter->insData.size(); o++) {
if (inputs.find(iter->insData[o].lock()->getName()) != inputs.end()) {
std::string iterType = iter->type;
if (CaselessEq<std::string>()(iterType, "convolution")) {
// TODO: have to be removed after adding suitable implementation for convolution
break;
}
// insert convert
std::string layerName = iter->insData[o].lock()->getName() + "_" + std::to_string(o);
LayerParams cnnLayerParams{layerName, "Convert", Precision::FP32};
auto lay = std::make_shared<InferenceEngine::CNNLayer>(cnnLayerParams);
std::map<std::string, std::string> par = {{"name", layerName},
{"type", "Convert"},
{"precision", "FP32"}};
lay->params = par;
CNNLayerPtr convertLayer(lay);
BF16Transformer::addLayerToCNNNetworkAfterData(iter->insData[o].lock(), convertLayer, iter->name,
network);
// set conv input as bf
iter->insData[o].lock()->setPrecision(Precision::BF16);
}
}
}
}
}
9 changes: 8 additions & 1 deletion inference-engine/src/mkldnn_plugin/bf16transformer.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ class BF16Transformer {
"broadcast", "convert", "BatchToSpace", "DepthToSpace", "ExtractImagePatches", "concat", "power", "lrn",
"permute", "ScatterUpdate", "ScatterElementsUpdate", "ScatterNDUpdate", "depthwise",
"select", "ShuffleChannels", "SpaceToBatch", "SpaceToDepth", "squeeze", "StridedSlice", "unsqueeze", "eltwise",
"ReduceAnd", "ReduceOr", "ReduceMax", "ReduceMin", "psroipooling" };
"ReduceAnd", "ReduceOr", "ReduceMax", "ReduceMin" };

const InferenceEngine::details::caseless_set<std::string> _multiinput =
{ "concat", "eltwise" };
Expand All @@ -40,6 +40,13 @@ class BF16Transformer {
*/
bool tryToMarkFP32(InferenceEngine::DataPtr data, const std::set<InferenceEngine::DataPtr> &immutable);

/**
* Because of singularity of input node, layer, following input doesn't support bf16 itself.
* We fix it by insertion of convert layer, which has to be replaced to reorder in graph optimizer.
*
*/
void insertConvertAfterInput(InferenceEngine::CNNNetwork &network);

public:
/**
* Restores Float point data types on edges which goes to non supported layers
Expand Down
71 changes: 20 additions & 51 deletions inference-engine/src/mkldnn_plugin/nodes/psroi.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

// Copyright (C) 2018-2020 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
Expand All @@ -8,11 +7,7 @@
#include <vector>
#include <string>
#include <algorithm>
#include <mkldnn_types.h>
#include "ie_parallel.hpp"
#include "utils/bfloat16.hpp"

using namespace MKLDNNPlugin;

namespace InferenceEngine {
namespace Extensions {
Expand Down Expand Up @@ -51,25 +46,22 @@ class PSROIPoolingImpl: public ExtLayerBase {
part_size_ = layer->GetParamAsInt("part_size", 1);
trans_std_ = layer->GetParamAsFloat("trans_std", 1);

bool isBf16Input = layer->insData[0].lock()->getTensorDesc().getPrecision() == Precision::BF16;
if (no_trans_) {
addConfig(layer, {DataConfigurator(ConfLayout::PLN, isBf16Input ? Precision::BF16 : Precision::FP32), DataConfigurator(ConfLayout::PLN)},
{DataConfigurator(ConfLayout::PLN, isBf16Input ? Precision::BF16 : Precision::FP32)});
addConfig(layer, {DataConfigurator(ConfLayout::PLN), DataConfigurator(ConfLayout::PLN)}, {DataConfigurator(ConfLayout::PLN)});
} else {
addConfig(layer, {DataConfigurator(ConfLayout::PLN, isBf16Input ? Precision::BF16 : Precision::FP32), DataConfigurator(ConfLayout::PLN),
DataConfigurator(ConfLayout::PLN)}, {DataConfigurator(ConfLayout::PLN, isBf16Input ? Precision::BF16 : Precision::FP32)});
addConfig(layer, {DataConfigurator(ConfLayout::PLN), DataConfigurator(ConfLayout::PLN),
DataConfigurator(ConfLayout::PLN)}, {DataConfigurator(ConfLayout::PLN)});
}
} catch (InferenceEngine::details::InferenceEngineException &ex) {
errorMsg = ex.what();
}
}

template <typename inputType, typename outputType>
StatusCode executeSpecified(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs,
ResponseDesc *resp) {
StatusCode execute(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs,
ResponseDesc *resp) noexcept override {
float* dst_data = outputs[0]->buffer();
const float *bottom_data_beginning = inputs[0]->buffer();
const float *bottom_rois_beginning = inputs[1]->buffer();
const auto *src_data = inputs[0]->cbuffer().as<const inputType*>();
auto *dst_data = outputs[0]->buffer().as<outputType*>();

int real_rois = 0;
for (; real_rois < nn; real_rois++) {
Expand All @@ -93,7 +85,7 @@ class PSROIPoolingImpl: public ExtLayerBase {
size_t num_bins = spatial_bins_x_*spatial_bins_y_;

parallel_for(real_rois, [&](int n) {
const float *bottom_rois = bottom_rois_beginning + n * 5;
const float* bottom_rois = bottom_rois_beginning + n * 5;
int roi_batch_ind = static_cast<int>(bottom_rois[0]);
float roi_start_w = 0.0f;
float roi_start_h = 0.0f;
Expand Down Expand Up @@ -151,17 +143,17 @@ class PSROIPoolingImpl: public ExtLayerBase {
float bin_area = static_cast<float>((hend - hstart) * (wend - wstart));
if (bin_area) {
int gc = (c * group_size_ + h) * group_size_ + w;
const auto *bottom_data =
src_data + ((roi_batch_ind * channels + gc) * height * width);
const float *bottom_data =
bottom_data_beginning + ((roi_batch_ind * channels + gc) * height * width);

float out_sum = 0.0f;
for (int hh = hstart; hh < hend; ++hh)
for (int ww = wstart; ww < wend; ++ww) {
for (int ww = wstart; ww < wend; ++ww)
out_sum += bottom_data[hh * width + ww];
}

dst_data[index] = out_sum / bin_area;
}
} else if (mode_ == "bilinear") {
float accum = 0.0f;
for (size_t bin_y = 0; bin_y < spatial_bins_y_; bin_y++) {
for (size_t bin_x = 0; bin_x < spatial_bins_x_; bin_x++) {
float box_xmin = roi_start_w + (bin_x + 0) * (roi_width / spatial_bins_x_);
Expand All @@ -171,7 +163,7 @@ class PSROIPoolingImpl: public ExtLayerBase {

size_t gc = c + (bin_y*spatial_bins_x_ + bin_x)*nc;
size_t src_idx = (roi_batch_ind * channels + gc) * height * width;
const auto *bottom_data = src_data + src_idx;
const float *bottom_data = bottom_data_beginning + src_idx;

float height_scale = nh > 1 ? (box_ymax - box_ymin) * (height - 1) / (pooled_height_ - 1)
: 0.0f;
Expand Down Expand Up @@ -203,12 +195,11 @@ class PSROIPoolingImpl: public ExtLayerBase {
const float top = top_left + (top_right - top_left) * (in_x - left_x_index);
const float bottom = bottom_left + (bottom_right - bottom_left) * (in_x - left_x_index);

accum += top + (bottom - top) * (in_y - top_y_index);
dst_data[index] += top + (bottom - top) * (in_y - top_y_index);
}
}
}
accum /= num_bins;
dst_data[index] = accum;
dst_data[index] /= num_bins;
} else if (mode_ == "bilinear_deformable") {
// Compute w and h at bottom
float bin_size_h = roi_height / static_cast<float>(pooled_height_);
Expand Down Expand Up @@ -237,7 +228,7 @@ class PSROIPoolingImpl: public ExtLayerBase {
gw = (std::min)((std::max)(gw, 0), static_cast<int>(group_size_ - 1));
gh = (std::min)((std::max)(gh, 0), static_cast<int>(group_size_ - 1));

const inputType* offset_bottom_data = src_data + (roi_batch_ind * channels) * height * width;
const float* offset_bottom_data = bottom_data_beginning + (roi_batch_ind * channels) * height * width;
for (size_t ih = 0; ih < spatial_bins_y_; ih++) {
for (size_t iw = 0; iw < spatial_bins_x_; iw++) {
float w1 = wstart + iw * sub_bin_size_w;
Expand All @@ -248,13 +239,12 @@ class PSROIPoolingImpl: public ExtLayerBase {
w1 = static_cast<float>((std::min)((std::max)(static_cast<double>(w1), 0.0), width - 1.0));
h1 = static_cast<float>((std::min)((std::max)(static_cast<double>(h1), 0.0), height - 1.0));
int c1 = static_cast<int>((c * group_size_ + gh) * group_size_ + gw);
float val = bilinear_interp<inputType>(offset_bottom_data +
c1 * height * width, w1, h1, width);
float val = bilinear_interp(offset_bottom_data + c1 * height * width, w1, h1, width);
sum += val;
count++;
}
}
dst_data[index] = count == 0 ? 0.0f : sum / count;
dst_data[index] = count == 0 ? 0 : sum / count;
}
}
}
Expand All @@ -271,34 +261,13 @@ class PSROIPoolingImpl: public ExtLayerBase {
return OK;
}

StatusCode execute(std::vector<Blob::Ptr>& inputs, std::vector<Blob::Ptr>& outputs,
ResponseDesc *resp) noexcept override {
auto inputPrec = inputs[0]->getTensorDesc().getPrecision();
auto outputPrec = outputs[0]->getTensorDesc().getPrecision();
if (inputPrec == Precision::BF16) {
if (outputPrec == Precision::BF16) {
return executeSpecified<bfloat16_t, bfloat16_t>(inputs, outputs, resp);
} else {
return executeSpecified<bfloat16_t, float>(inputs, outputs, resp);
}
} else {
if (outputPrec == Precision::BF16) {
return executeSpecified<float, bfloat16_t>(inputs, outputs, resp);
} else {
return executeSpecified<float, float>(inputs, outputs, resp);
}
}
}

template <typename inputType>
inline float bilinear_interp(const inputType* data, const float x, const float y, const int width) {
inline float bilinear_interp(const float* data, const float x, const float y, const int width) {
int x1 = static_cast<int>(std::floor(x));
int x2 = static_cast<int>(std::ceil(x));
int y1 = static_cast<int>(std::floor(y));
int y2 = static_cast<int>(std::ceil(y));
float dist_x = x - x1;
float dist_y = y - y1;

float value11 = data[y1 * width + x1];
float value12 = data[y2 * width + x1];
float value21 = data[y1 * width + x2];
Expand Down

0 comments on commit 2624601

Please sign in to comment.