From 0f4c7332969bdb057f855cd4a37174f3c06de281 Mon Sep 17 00:00:00 2001 From: guosheng Date: Thu, 20 Jul 2017 12:03:23 +0800 Subject: [PATCH 1/7] add ROIPooling for Fast(er) R-CNN --- paddle/gserver/layers/ROIPoolLayer.cpp | 154 ++++++++++++++++++ paddle/gserver/layers/ROIPoolLayer.h | 53 ++++++ paddle/gserver/tests/test_LayerGrad.cpp | 34 ++++ proto/ModelConfig.proto | 9 + python/paddle/trainer/config_parser.py | 11 ++ .../paddle/trainer_config_helpers/layers.py | 37 +++++ 6 files changed, 298 insertions(+) create mode 100644 paddle/gserver/layers/ROIPoolLayer.cpp create mode 100644 paddle/gserver/layers/ROIPoolLayer.h diff --git a/paddle/gserver/layers/ROIPoolLayer.cpp b/paddle/gserver/layers/ROIPoolLayer.cpp new file mode 100644 index 0000000000000..04763fd152f2d --- /dev/null +++ b/paddle/gserver/layers/ROIPoolLayer.cpp @@ -0,0 +1,154 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "ROIPoolLayer.h" + +namespace paddle { + +REGISTER_LAYER(roi_pool, ROIPoolLayer); + +bool ROIPoolLayer::init(const LayerMap& layerMap, + const ParameterMap& parameterMap) { + Layer::init(layerMap, parameterMap); + + const ROIPoolConfig& layerConf = config_.inputs(0).roi_pool_conf(); + pooledWidth_ = layerConf.pooled_width(); + pooledHeight_ = layerConf.pooled_height(); + spatialScale_ = layerConf.spatial_scale(); + + return true; +} + +void ROIPoolLayer::forward(PassType passType) { + Layer::forward(passType); + + const ROIPoolConfig& layerConf = config_.inputs(0).roi_pool_conf(); + height_ = getInput(0).getFrameHeight(); + if (!height_) height_ = layerConf.height(); + width_ = getInput(0).getFrameWidth(); + if (!width_) width_ = layerConf.width(); + channels_ = getInputValue(0)->getWidth() / width_ / height_; + + size_t batchSize = getInput(0).getBatchSize(); + size_t numROIs = getInput(1).getBatchSize(); + + real* bottomData = getInputValue(0)->getData(); + size_t batchOffset = getInputValue(0)->getWidth(); + size_t channelOffset = height_ * width_; + real* bottomROIs = getInputValue(1)->getData(); + size_t roiOffset = getInputValue(1)->getWidth(); + size_t poolChannelOffset = pooledHeight_ * pooledWidth_; + + resetOutput(numROIs, channels_ * pooledHeight_ * pooledWidth_); + real* outputData = getOutputValue()->getData(); + Matrix::resizeOrCreate(maxIdxs_, + numROIs, + channels_ * pooledHeight_ * pooledWidth_, + false, + false); + real* argmaxData = maxIdxs_->getData(); + + size_t uZero = 0; + size_t uOne = 1; + + for (size_t n = 0; n < numROIs; ++n) { + size_t roiBatchIdx = bottomROIs[0]; + size_t roiStartW = std::round(bottomROIs[1] * spatialScale_); + size_t roiStartH = std::round(bottomROIs[2] * spatialScale_); + size_t roiEndW = std::round(bottomROIs[3] * spatialScale_); + size_t roiEndH = std::round(bottomROIs[4] * spatialScale_); + CHECK_GE(roiBatchIdx, 0); + CHECK_LT(roiBatchIdx, batchSize); + size_t roiHeight = std::max(roiEndH - roiStartH + 1, uOne); + size_t roiWidth = std::max(roiEndW - roiStartW + 1, uOne); + real binSizeH = + static_cast(roiHeight) / static_cast(pooledHeight_); + real binSizeW = + static_cast(roiWidth) / static_cast(pooledWidth_); + real* batchData = bottomData + batchOffset * roiBatchIdx; + for (size_t c = 0; c < channels_; ++c) { + for (size_t ph = 0; ph < pooledHeight_; ++ph) { + for (size_t pw = 0; pw < pooledWidth_; ++pw) { + size_t hstart = static_cast(std::floor(ph * binSizeH)); + size_t wstart = static_cast(std::floor(pw * binSizeW)); + size_t hend = static_cast(std::ceil((ph + 1) * binSizeH)); + size_t wend = static_cast(std::ceil((pw + 1) * binSizeW)); + hstart = std::min(std::max(hstart + roiStartH, uZero), height_); + wstart = std::min(std::max(wstart + roiStartW, uZero), width_); + hend = std::min(std::max(hend + roiStartH, uZero), height_); + wend = std::min(std::max(wend + roiStartW, uZero), width_); + + bool isEmpty = (hend <= hstart) || (wend <= wstart); + size_t poolIndex = ph * pooledWidth_ + pw; + if (isEmpty) { + outputData[poolIndex] = 0; + argmaxData[poolIndex] = -1; + } + + for (size_t h = hstart; h < hend; ++h) { + for (size_t w = wstart; w < wend; ++w) { + size_t index = h * width_ + w; + if (batchData[index] > outputData[poolIndex]) { + outputData[poolIndex] = batchData[index]; + argmaxData[poolIndex] = index; + } + } + } + } + } + batchData += channelOffset; + outputData += poolChannelOffset; + argmaxData += poolChannelOffset; + } + bottomROIs += roiOffset; + } +} + +void ROIPoolLayer::backward(const UpdateCallback& callback) { + real* bottomROIs = getInputValue(1)->getData(); + size_t numROIs = getInput(1).getBatchSize(); + size_t roiOffset = getInputValue(1)->getWidth(); + + MatrixPtr inGrad = getInputGrad(0); + real* inDiffData = inGrad->getData(); + size_t batchOffset = getInputValue(0)->getWidth(); + size_t channelOffset = height_ * width_; + + MatrixPtr outGrad = getOutputGrad(); + real* outDiffData = outGrad->getData(); + size_t poolChannelOffset = pooledHeight_ * pooledWidth_; + real* argmaxData = maxIdxs_->getData(); + + for (size_t n = 0; n < numROIs; ++n) { + size_t roiBatchIdx = bottomROIs[0]; + real* batchDiffData = inDiffData + batchOffset * roiBatchIdx; + for (size_t c = 0; c < channels_; ++c) { + for (size_t ph = 0; ph < pooledHeight_; ++ph) { + for (size_t pw = 0; pw < pooledWidth_; ++pw) { + size_t poolIndex = ph * pooledWidth_ + pw; + if (argmaxData[poolIndex] > 0) { + size_t index = static_cast(argmaxData[poolIndex]); + batchDiffData[index] += outDiffData[poolIndex]; + } + } + } + batchDiffData += channelOffset; + outDiffData += poolChannelOffset; + argmaxData += poolChannelOffset; + } + bottomROIs += roiOffset; + } +} + +} // namespace paddle diff --git a/paddle/gserver/layers/ROIPoolLayer.h b/paddle/gserver/layers/ROIPoolLayer.h new file mode 100644 index 0000000000000..ca412d2845fa8 --- /dev/null +++ b/paddle/gserver/layers/ROIPoolLayer.h @@ -0,0 +1,53 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "Layer.h" + +namespace paddle { + +/** + * A layer used by Fast R-CNN to extract feature maps of ROIs from the last + * feature map. + * - Input: This layer needs two input layers: The first input layer is a + * convolution layer; The second input layer contains the ROI data which is the + * output of ProposalLayer in Faster R-CNN. layers for generating bbox + * location offset and the classification confidence. - Output: The + * ROIs' feature map. Reference: Shaoqing Ren, Kaiming He, Ross Girshick, and + * Jian Sun. Faster R-CNN: Towards Real-Time Object Detection with Region + * Proposal + */ + +class ROIPoolLayer : public Layer { +protected: + size_t channels_; + size_t width_; + size_t height_; + size_t pooledWidth_; + size_t pooledHeight_; + real spatialScale_; + + MatrixPtr maxIdxs_; + +public: + explicit ROIPoolLayer(const LayerConfig& config) : Layer(config) {} + + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; + + void forward(PassType passType) override; + void backward(const UpdateCallback& callback = nullptr) override; +}; +} // namespace paddle diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp index 9af083468c0f0..77feb6d4c9efe 100644 --- a/paddle/gserver/tests/test_LayerGrad.cpp +++ b/paddle/gserver/tests/test_LayerGrad.cpp @@ -1830,6 +1830,40 @@ TEST(Layer, CropLayer) { } } +TEST(Layer, roi_pool) { + TestConfig config; + config.layerConfig.set_type("roi_pool"); + config.biasSize = 0; + LayerInputConfig* input = config.layerConfig.add_inputs(); + ROIPoolConfig* roiPoolConf = input->mutable_roi_pool_conf(); + roiPoolConf->set_pooled_width(7); + roiPoolConf->set_pooled_height(7); + roiPoolConf->set_spatial_scale(1. / 16); + roiPoolConf->set_width(14); + roiPoolConf->set_height(14); + + MatrixPtr roiValue = Matrix::create(10, 10, false, false); + roiValue->zeroMem(); + real* roiData = roiValue->getData(); + for (size_t i = 0; i < roiValue->getElementCnt() / 5; ++i) { + *roiData++ = std::rand() % 2; + *roiData++ = std::rand() % 224; + *roiData++ = std::rand() % 224; + size_t xMin = static_cast(*(roiData - 2)); + size_t yMin = static_cast(*(roiData - 1)); + *roiData++ = xMin + std::rand() % (224 - xMin); + *roiData++ = yMin + std::rand() % (224 - yMin); + } + + config.inputDefs.push_back({INPUT_DATA, "input", 3 * 14 * 14, {}}); + config.inputDefs.push_back({INPUT_SELF_DEFINE_DATA, "rois", roiValue, {}}); + config.layerConfig.add_inputs(); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, "roi_pool", 5, false, useGpu, false); + } +} + int main(int argc, char** argv) { testing::InitGoogleTest(&argc, argv); initMain(argc, argv); diff --git a/proto/ModelConfig.proto b/proto/ModelConfig.proto index 83f72c137bdf5..275723272b3ff 100644 --- a/proto/ModelConfig.proto +++ b/proto/ModelConfig.proto @@ -289,6 +289,14 @@ message DetectionOutputConfig { optional uint32 width = 9 [default = 1]; } +message ROIPoolConfig { + required uint32 pooled_width = 1; + required uint32 pooled_height = 2; + required float spatial_scale = 3; + optional uint32 height = 4 [default = 1]; + optional uint32 width = 5 [default = 1]; +} + message LayerInputConfig { required string input_layer_name = 1; optional string input_parameter_name = 2; @@ -309,6 +317,7 @@ message LayerInputConfig { optional RowConvConfig row_conv_conf = 15; optional MultiBoxLossConfig multibox_loss_conf = 16; optional DetectionOutputConfig detection_output_conf = 17; + optional ROIPoolConfig roi_pool_conf = 18; } message LayerConfig { diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index ab81e67579e39..bfb9dd7f1d4c3 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -1732,6 +1732,17 @@ def __init__(self, name, inputs, size, input_num, num_classes, self.config.size = size +@config_layer('roi_pool') +class ROIPoolLayer(LayerBase): + def __init__(self, name, inputs, pooled_width, pooled_height, + spatial_scale): + super(ROIPoolLayer, self).__init__(name, 'roi_pool', 0, inputs) + config_assert(len(inputs) == 2, 'ROIPoolLayer must have 2 inputs') + self.config.inputs[0].roi_pool_conf.pooled_width = pooled_width + self.config.inputs[0].roi_pool_conf.pooled_height = pooled_height + self.config.inputs[0].roi_pool_conf.spatial_scale = spatial_scale + + @config_layer('data') class DataLayer(LayerBase): def __init__(self, name, size, height=None, width=None, device=None): diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index fdb6f83f2ba51..c1bdeb680888f 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -117,6 +117,7 @@ 'cross_channel_norm_layer', 'multibox_loss_layer', 'detection_output_layer', + 'roi_pool_layer', 'spp_layer', 'pad_layer', 'eos_layer', @@ -201,6 +202,7 @@ class LayerType(object): PRIORBOX_LAYER = 'priorbox' MULTIBOX_LOSS_LAYER = 'multibox_loss' DETECTION_OUTPUT_LAYER = 'detection_output' + ROI_POOL_LAYER = 'roi_pool' CTC_LAYER = 'ctc' WARP_CTC_LAYER = 'warp_ctc' @@ -1200,6 +1202,41 @@ def detection_output_layer(input_loc, name, LayerType.DETECTION_OUTPUT_LAYER, parents=parents, size=size) +@wrap_name_default("roi_pool") +def roi_pool_layer(input, + rois, + pooled_width, + pooled_height, + spatial_scale, + name=None): + """ + A layer used by Fast R-CNN to extract feature maps of ROIs from the last + feature map. + + :param name: The Layer Name. + :type name: basestring + :param input: The input layer. + :type input: LayerOutput. + :param rois: The input ROIs' data. + :type rois: LayerOutput. + :param pooled_width: The width after pooling. + :type pooled_width: int + :param pooled_height: The height after pooling. + :type pooled_height: int + :param spatial_scale: The spatial scale between the image and feature map. + :type spatial_scale: float + :return: LayerOutput + """ + Layer( + name=name, + type=LayerType.ROI_POOL_LAYER, + inputs=[input.name, rois.name], + pooled_width=pooled_width, + pooled_height=pooled_height, + spatial_scale=spatial_scale) + return LayerOutput(name, LayerType.ROI_POOL_LAYER, parents=[input, rois]) + + @wrap_name_default("cross_channel_norm") def cross_channel_norm_layer(input, name=None, param_attr=None): """ From d5384e640f1f972e9685e51cf018d0ff478c4362 Mon Sep 17 00:00:00 2001 From: guosheng Date: Thu, 20 Jul 2017 13:12:10 +0800 Subject: [PATCH 2/7] refine layer gradient test of ROIPoolLayer --- paddle/gserver/tests/test_LayerGrad.cpp | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp index 77feb6d4c9efe..b6282b472fd54 100644 --- a/paddle/gserver/tests/test_LayerGrad.cpp +++ b/paddle/gserver/tests/test_LayerGrad.cpp @@ -1842,17 +1842,20 @@ TEST(Layer, roi_pool) { roiPoolConf->set_width(14); roiPoolConf->set_height(14); - MatrixPtr roiValue = Matrix::create(10, 10, false, false); + const size_t roiNum = 10; + const size_t roiDim = 10; + const size_t batchSize = 5; + MatrixPtr roiValue = Matrix::create(roiNum, roiDim, false, false); roiValue->zeroMem(); real* roiData = roiValue->getData(); - for (size_t i = 0; i < roiValue->getElementCnt() / 5; ++i) { - *roiData++ = std::rand() % 2; - *roiData++ = std::rand() % 224; - *roiData++ = std::rand() % 224; - size_t xMin = static_cast(*(roiData - 2)); - size_t yMin = static_cast(*(roiData - 1)); - *roiData++ = xMin + std::rand() % (224 - xMin); - *roiData++ = yMin + std::rand() % (224 - yMin); + for (size_t i = 0; i < roiNum; ++i) { + roiData[i * roiDim + 0] = std::rand() % batchSize; + roiData[i * roiDim + 1] = std::rand() % 224; // xMin + roiData[i * roiDim + 2] = std::rand() % 224; // yMin + size_t xMin = static_cast(roiData[i * roiDim + 1]); + size_t yMin = static_cast(roiData[i * roiDim + 2]); + roiData[i * roiDim + 3] = xMin + std::rand() % (224 - xMin); // xMax + roiData[i * roiDim + 4] = yMin + std::rand() % (224 - yMin); // yMax } config.inputDefs.push_back({INPUT_DATA, "input", 3 * 14 * 14, {}}); @@ -1860,7 +1863,7 @@ TEST(Layer, roi_pool) { config.layerConfig.add_inputs(); for (auto useGpu : {false, true}) { - testLayerGrad(config, "roi_pool", 5, false, useGpu, false); + testLayerGrad(config, "roi_pool", batchSize, false, useGpu, false); } } From 1c00767731e2cf6d16abfd7b3c5002015fe5fd27 Mon Sep 17 00:00:00 2001 From: guosheng Date: Thu, 20 Jul 2017 15:21:45 +0800 Subject: [PATCH 3/7] fix ci bug on andriod building --- paddle/gserver/layers/ROIPoolLayer.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/paddle/gserver/layers/ROIPoolLayer.cpp b/paddle/gserver/layers/ROIPoolLayer.cpp index 04763fd152f2d..34ba9030f709b 100644 --- a/paddle/gserver/layers/ROIPoolLayer.cpp +++ b/paddle/gserver/layers/ROIPoolLayer.cpp @@ -64,10 +64,10 @@ void ROIPoolLayer::forward(PassType passType) { for (size_t n = 0; n < numROIs; ++n) { size_t roiBatchIdx = bottomROIs[0]; - size_t roiStartW = std::round(bottomROIs[1] * spatialScale_); - size_t roiStartH = std::round(bottomROIs[2] * spatialScale_); - size_t roiEndW = std::round(bottomROIs[3] * spatialScale_); - size_t roiEndH = std::round(bottomROIs[4] * spatialScale_); + size_t roiStartW = round(bottomROIs[1] * spatialScale_); + size_t roiStartH = round(bottomROIs[2] * spatialScale_); + size_t roiEndW = round(bottomROIs[3] * spatialScale_); + size_t roiEndH = round(bottomROIs[4] * spatialScale_); CHECK_GE(roiBatchIdx, 0); CHECK_LT(roiBatchIdx, batchSize); size_t roiHeight = std::max(roiEndH - roiStartH + 1, uOne); From 687b3749b4a4217c7f5d8b7e85c7b0c922cc4f6c Mon Sep 17 00:00:00 2001 From: guosheng Date: Sat, 22 Jul 2017 13:57:21 +0800 Subject: [PATCH 4/7] fix bug on GPU test --- paddle/gserver/layers/ROIPoolLayer.cpp | 89 ++++++++++++++++++++++---- 1 file changed, 78 insertions(+), 11 deletions(-) diff --git a/paddle/gserver/layers/ROIPoolLayer.cpp b/paddle/gserver/layers/ROIPoolLayer.cpp index 34ba9030f709b..3d26286376e41 100644 --- a/paddle/gserver/layers/ROIPoolLayer.cpp +++ b/paddle/gserver/layers/ROIPoolLayer.cpp @@ -43,15 +43,46 @@ void ROIPoolLayer::forward(PassType passType) { size_t batchSize = getInput(0).getBatchSize(); size_t numROIs = getInput(1).getBatchSize(); - real* bottomData = getInputValue(0)->getData(); - size_t batchOffset = getInputValue(0)->getWidth(); + MatrixPtr dataValue = getInputValue(0); + MatrixPtr roiValue = getInputValue(1); + resetOutput(numROIs, channels_ * pooledHeight_ * pooledWidth_); + MatrixPtr outputValue = getOutputValue(); + + if (useGpu_) { + MatrixPtr dataCpuBuffer; + Matrix::resizeOrCreate(dataCpuBuffer, + dataValue->getHeight(), + dataValue->getWidth(), + false, + false); + MatrixPtr roiCpuBuffer; + Matrix::resizeOrCreate(roiCpuBuffer, + roiValue->getHeight(), + roiValue->getWidth(), + false, + false); + dataCpuBuffer->copyFrom(*dataValue); + roiCpuBuffer->copyFrom(*roiValue); + dataValue = dataCpuBuffer; + roiValue = roiCpuBuffer; + MatrixPtr outputCpuBuffer; + Matrix::resizeOrCreate(outputCpuBuffer, + outputValue->getHeight(), + outputValue->getWidth(), + false, + false); + outputCpuBuffer->copyFrom(*outputValue); + outputValue = outputCpuBuffer; + } + + real* bottomData = dataValue->getData(); + size_t batchOffset = dataValue->getWidth(); size_t channelOffset = height_ * width_; - real* bottomROIs = getInputValue(1)->getData(); - size_t roiOffset = getInputValue(1)->getWidth(); + real* bottomROIs = roiValue->getData(); + size_t roiOffset = roiValue->getWidth(); size_t poolChannelOffset = pooledHeight_ * pooledWidth_; - resetOutput(numROIs, channels_ * pooledHeight_ * pooledWidth_); - real* outputData = getOutputValue()->getData(); + real* outputData = outputValue->getData(); Matrix::resizeOrCreate(maxIdxs_, numROIs, channels_ * pooledHeight_ * pooledWidth_, @@ -113,20 +144,52 @@ void ROIPoolLayer::forward(PassType passType) { } bottomROIs += roiOffset; } + if (useGpu_) { + getOutputValue()->copyFrom(*outputValue); + } } void ROIPoolLayer::backward(const UpdateCallback& callback) { - real* bottomROIs = getInputValue(1)->getData(); + MatrixPtr inGradValue = getInputGrad(0); + MatrixPtr outGradValue = getOutputGrad(); + MatrixPtr roiValue = getInputValue(1); + + if (useGpu_) { + MatrixPtr inGradCpuBuffer; + Matrix::resizeOrCreate(inGradCpuBuffer, + inGradValue->getHeight(), + inGradValue->getWidth(), + false, + false); + MatrixPtr outGradCpuBuffer; + Matrix::resizeOrCreate(outGradCpuBuffer, + outGradValue->getHeight(), + outGradValue->getWidth(), + false, + false); + MatrixPtr roiCpuBuffer; + Matrix::resizeOrCreate(roiCpuBuffer, + roiValue->getHeight(), + roiValue->getWidth(), + false, + false); + inGradCpuBuffer->copyFrom(*inGradValue); + outGradCpuBuffer->copyFrom(*outGradValue); + roiCpuBuffer->copyFrom(*roiValue); + inGradValue = inGradCpuBuffer; + outGradValue = outGradCpuBuffer; + roiValue = roiCpuBuffer; + } + + real* bottomROIs = roiValue->getData(); size_t numROIs = getInput(1).getBatchSize(); size_t roiOffset = getInputValue(1)->getWidth(); - MatrixPtr inGrad = getInputGrad(0); - real* inDiffData = inGrad->getData(); + real* inDiffData = inGradValue->getData(); size_t batchOffset = getInputValue(0)->getWidth(); size_t channelOffset = height_ * width_; - MatrixPtr outGrad = getOutputGrad(); - real* outDiffData = outGrad->getData(); + real* outDiffData = outGradValue->getData(); size_t poolChannelOffset = pooledHeight_ * pooledWidth_; real* argmaxData = maxIdxs_->getData(); @@ -149,6 +212,10 @@ void ROIPoolLayer::backward(const UpdateCallback& callback) { } bottomROIs += roiOffset; } + + if (useGpu_) { + getInputGrad(0)->copyFrom(*inGradValue); + } } } // namespace paddle From 3cf01b5d52616e1605d3d089ceb798bb16ab8f80 Mon Sep 17 00:00:00 2001 From: guosheng Date: Wed, 16 Aug 2017 17:19:02 +0800 Subject: [PATCH 5/7] refine ROIPoolLayer --- doc/api/v2/config/layer.rst | 5 +++ paddle/gserver/layers/ROIPoolLayer.cpp | 17 +++---- paddle/gserver/layers/ROIPoolLayer.h | 1 + .../paddle/trainer_config_helpers/layers.py | 10 ++++- .../tests/configs/file_list.sh | 2 +- .../protostr/test_roi_pool_layer.protostr | 45 +++++++++++++++++++ .../tests/configs/test_roi_pool_layer.py | 14 ++++++ 7 files changed, 82 insertions(+), 12 deletions(-) create mode 100644 python/paddle/trainer_config_helpers/tests/configs/protostr/test_roi_pool_layer.protostr create mode 100644 python/paddle/trainer_config_helpers/tests/configs/test_roi_pool_layer.py diff --git a/doc/api/v2/config/layer.rst b/doc/api/v2/config/layer.rst index cb330ea5e1b91..3b2ee37628da6 100644 --- a/doc/api/v2/config/layer.rst +++ b/doc/api/v2/config/layer.rst @@ -82,6 +82,11 @@ maxout .. autoclass:: paddle.v2.layer.maxout :noindex: +roi_pool +-------- +.. autoclass:: paddle.v2.layer.roi_pool + :noindex: + Norm Layer ========== diff --git a/paddle/gserver/layers/ROIPoolLayer.cpp b/paddle/gserver/layers/ROIPoolLayer.cpp index 3d26286376e41..131fd7e52bbed 100644 --- a/paddle/gserver/layers/ROIPoolLayer.cpp +++ b/paddle/gserver/layers/ROIPoolLayer.cpp @@ -48,7 +48,7 @@ void ROIPoolLayer::forward(PassType passType) { resetOutput(numROIs, channels_ * pooledHeight_ * pooledWidth_); MatrixPtr outputValue = getOutputValue(); - if (useGpu_) { + if (useGpu_) { // TODO(guosheng): implement on GPU later MatrixPtr dataCpuBuffer; Matrix::resizeOrCreate(dataCpuBuffer, dataValue->getHeight(), @@ -90,9 +90,6 @@ void ROIPoolLayer::forward(PassType passType) { false); real* argmaxData = maxIdxs_->getData(); - size_t uZero = 0; - size_t uOne = 1; - for (size_t n = 0; n < numROIs; ++n) { size_t roiBatchIdx = bottomROIs[0]; size_t roiStartW = round(bottomROIs[1] * spatialScale_); @@ -101,8 +98,8 @@ void ROIPoolLayer::forward(PassType passType) { size_t roiEndH = round(bottomROIs[4] * spatialScale_); CHECK_GE(roiBatchIdx, 0); CHECK_LT(roiBatchIdx, batchSize); - size_t roiHeight = std::max(roiEndH - roiStartH + 1, uOne); - size_t roiWidth = std::max(roiEndW - roiStartW + 1, uOne); + size_t roiHeight = std::max(roiEndH - roiStartH + 1, 1UL); + size_t roiWidth = std::max(roiEndW - roiStartW + 1, 1UL); real binSizeH = static_cast(roiHeight) / static_cast(pooledHeight_); real binSizeW = @@ -115,10 +112,10 @@ void ROIPoolLayer::forward(PassType passType) { size_t wstart = static_cast(std::floor(pw * binSizeW)); size_t hend = static_cast(std::ceil((ph + 1) * binSizeH)); size_t wend = static_cast(std::ceil((pw + 1) * binSizeW)); - hstart = std::min(std::max(hstart + roiStartH, uZero), height_); - wstart = std::min(std::max(wstart + roiStartW, uZero), width_); - hend = std::min(std::max(hend + roiStartH, uZero), height_); - wend = std::min(std::max(wend + roiStartW, uZero), width_); + hstart = std::min(std::max(hstart + roiStartH, 0UL), height_); + wstart = std::min(std::max(wstart + roiStartW, 0UL), width_); + hend = std::min(std::max(hend + roiStartH, 0UL), height_); + wend = std::min(std::max(wend + roiStartW, 0UL), width_); bool isEmpty = (hend <= hstart) || (wend <= wstart); size_t poolIndex = ph * pooledWidth_ + pw; diff --git a/paddle/gserver/layers/ROIPoolLayer.h b/paddle/gserver/layers/ROIPoolLayer.h index d04362f0d481a..796467a5c86e6 100644 --- a/paddle/gserver/layers/ROIPoolLayer.h +++ b/paddle/gserver/layers/ROIPoolLayer.h @@ -29,6 +29,7 @@ namespace paddle { * Reference: * Shaoqing Ren, Kaiming He, Ross Girshick, and Jian Sun. * Faster R-CNN: Towards Real-Time Object Detection with Region Proposal + * Networks */ class ROIPoolLayer : public Layer { diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 590097b96b3bd..6703db5f0b50e 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -1257,6 +1257,7 @@ def roi_pool_layer(input, pooled_width, pooled_height, spatial_scale, + num_channels=None, name=None): """ A layer used by Fast R-CNN to extract feature maps of ROIs from the last @@ -1274,8 +1275,14 @@ def roi_pool_layer(input, :type pooled_height: int :param spatial_scale: The spatial scale between the image and feature map. :type spatial_scale: float + :param num_channels: number of input channel. + :type num_channels: int :return: LayerOutput """ + if num_channels is None: + assert input.num_filters is not None + num_channels = input.num_filters + size = num_channels * pooled_width * pooled_height Layer( name=name, type=LayerType.ROI_POOL_LAYER, @@ -1283,7 +1290,8 @@ def roi_pool_layer(input, pooled_width=pooled_width, pooled_height=pooled_height, spatial_scale=spatial_scale) - return LayerOutput(name, LayerType.ROI_POOL_LAYER, parents=[input, rois]) + return LayerOutput( + name, LayerType.ROI_POOL_LAYER, parents=[input, rois], size=size) @wrap_name_default("cross_channel_norm") diff --git a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh index a61beb871ad06..58e36eb333aa8 100755 --- a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh +++ b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh @@ -8,6 +8,6 @@ test_spp_layer test_bilinear_interp test_maxout test_bi_grumemory math_ops test_seq_concat_reshape test_pad test_smooth_l1 test_multiplex_layer test_prelu_layer test_row_conv test_detection_output_layer test_multibox_loss_layer test_recursive_topology test_gated_unit_layer test_clip_layer test_row_l2_norm_layer -test_kmax_seq_socre_layer test_seq_select_layers) +test_kmax_seq_socre_layer test_seq_select_layers test_roi_pool_layer) export whole_configs=(test_split_datasource) diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_roi_pool_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_roi_pool_layer.protostr new file mode 100644 index 0000000000000..e8c379b17b0f2 --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_roi_pool_layer.protostr @@ -0,0 +1,45 @@ +type: "nn" +layers { + name: "data" + type: "data" + size: 588 + active_type: "" + height: 14 + width: 14 +} +layers { + name: "rois" + type: "data" + size: 10 + active_type: "" +} +layers { + name: "__roi_pool_0__" + type: "roi_pool" + active_type: "" + inputs { + input_layer_name: "data" + roi_pool_conf { + pooled_width: 7 + pooled_height: 7 + spatial_scale: 0.0625 + } + } + inputs { + input_layer_name: "rois" + } +} +input_layer_names: "data" +input_layer_names: "rois" +output_layer_names: "__roi_pool_0__" +sub_models { + name: "root" + layer_names: "data" + layer_names: "rois" + layer_names: "__roi_pool_0__" + input_layer_names: "data" + input_layer_names: "rois" + output_layer_names: "__roi_pool_0__" + is_recurrent_layer_group: false +} + diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_roi_pool_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_roi_pool_layer.py new file mode 100644 index 0000000000000..0d6ca9f1bbade --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/test_roi_pool_layer.py @@ -0,0 +1,14 @@ +from paddle.trainer_config_helpers import * + +data = data_layer(name='data', size=3 * 14 * 14, height=14, width=14) + +rois = data_layer(name='rois', size=10) + +roi_pool = roi_pool_layer( + input=data, + rois=rois, + pooled_width=7, + pooled_height=7, + spatial_scale=1. / 16) + +outputs(roi_pool) From 7829034da441ab3eddbc111c19ff433f8f843e0a Mon Sep 17 00:00:00 2001 From: guosheng Date: Fri, 10 Nov 2017 18:40:55 +0800 Subject: [PATCH 6/7] Refine ROIPoolLayer by following comments --- paddle/gserver/layers/ROIPoolLayer.cpp | 2 ++ paddle/gserver/layers/ROIPoolLayer.h | 1 + python/paddle/trainer/config_parser.py | 5 +++-- python/paddle/trainer_config_helpers/layers.py | 3 ++- 4 files changed, 8 insertions(+), 3 deletions(-) diff --git a/paddle/gserver/layers/ROIPoolLayer.cpp b/paddle/gserver/layers/ROIPoolLayer.cpp index 131fd7e52bbed..99cfddb0cf333 100644 --- a/paddle/gserver/layers/ROIPoolLayer.cpp +++ b/paddle/gserver/layers/ROIPoolLayer.cpp @@ -91,6 +91,8 @@ void ROIPoolLayer::forward(PassType passType) { real* argmaxData = maxIdxs_->getData(); for (size_t n = 0; n < numROIs; ++n) { + // the first five elememts of each RoI should be: + // batch_idx, roi_x_start, roi_y_start, roi_x_end, roi_y_end size_t roiBatchIdx = bottomROIs[0]; size_t roiStartW = round(bottomROIs[1] * spatialScale_); size_t roiStartH = round(bottomROIs[2] * spatialScale_); diff --git a/paddle/gserver/layers/ROIPoolLayer.h b/paddle/gserver/layers/ROIPoolLayer.h index 796467a5c86e6..4f07e49d6fd1e 100644 --- a/paddle/gserver/layers/ROIPoolLayer.h +++ b/paddle/gserver/layers/ROIPoolLayer.h @@ -41,6 +41,7 @@ class ROIPoolLayer : public Layer { size_t pooledHeight_; real spatialScale_; + // Since there is no int matrix, use real maxtrix instead. MatrixPtr maxIdxs_; public: diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index f31252882e516..43d02bf70e74c 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -1971,13 +1971,14 @@ def __init__(self, name, inputs, size, input_num, num_classes, @config_layer('roi_pool') class ROIPoolLayer(LayerBase): - def __init__(self, name, inputs, pooled_width, pooled_height, - spatial_scale): + def __init__(self, name, inputs, pooled_width, pooled_height, spatial_scale, + num_channels, **xargs): super(ROIPoolLayer, self).__init__(name, 'roi_pool', 0, inputs) config_assert(len(inputs) == 2, 'ROIPoolLayer must have 2 inputs') self.config.inputs[0].roi_pool_conf.pooled_width = pooled_width self.config.inputs[0].roi_pool_conf.pooled_height = pooled_height self.config.inputs[0].roi_pool_conf.spatial_scale = spatial_scale + self.set_cnn_layer(name, pooled_height, pooled_width, num_channels) @config_layer('data') diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 623ca047cd913..617fbff948bf0 100644 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -1345,7 +1345,8 @@ def roi_pool_layer(input, inputs=[input.name, rois.name], pooled_width=pooled_width, pooled_height=pooled_height, - spatial_scale=spatial_scale) + spatial_scale=spatial_scale, + num_channels=num_channels) return LayerOutput( name, LayerType.ROI_POOL_LAYER, parents=[input, rois], size=size) From 79e0a26a6472a047ff5b3ebaedc3da6c6eeb6d2a Mon Sep 17 00:00:00 2001 From: guosheng Date: Fri, 10 Nov 2017 20:03:36 +0800 Subject: [PATCH 7/7] Fix test_roi_pool_layer.py --- .../protostr/test_roi_pool_layer.protostr | 55 ++++++++++++++++++- .../tests/configs/test_roi_pool_layer.py | 11 +++- 2 files changed, 64 insertions(+), 2 deletions(-) diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_roi_pool_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_roi_pool_layer.protostr index e8c379b17b0f2..f1bc65b3aee74 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_roi_pool_layer.protostr +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_roi_pool_layer.protostr @@ -13,12 +13,44 @@ layers { size: 10 active_type: "" } +layers { + name: "__conv_0__" + type: "exconv" + size: 3136 + active_type: "" + inputs { + input_layer_name: "data" + input_parameter_name: "___conv_0__.w0" + conv_conf { + filter_size: 3 + channels: 3 + stride: 1 + padding: 1 + groups: 1 + filter_channels: 3 + output_x: 14 + img_size: 14 + caffe_mode: true + filter_size_y: 3 + padding_y: 1 + stride_y: 1 + output_y: 14 + img_size_y: 14 + } + } + bias_parameter_name: "___conv_0__.wbias" + num_filters: 16 + shared_biases: true + height: 14 + width: 14 +} layers { name: "__roi_pool_0__" type: "roi_pool" + size: 784 active_type: "" inputs { - input_layer_name: "data" + input_layer_name: "__conv_0__" roi_pool_conf { pooled_width: 7 pooled_height: 7 @@ -28,6 +60,26 @@ layers { inputs { input_layer_name: "rois" } + height: 7 + width: 7 +} +parameters { + name: "___conv_0__.w0" + size: 432 + initial_mean: 0.0 + initial_std: 0.272165526976 + initial_strategy: 0 + initial_smart: false +} +parameters { + name: "___conv_0__.wbias" + size: 16 + initial_mean: 0.0 + initial_std: 0.0 + dims: 16 + dims: 1 + initial_strategy: 0 + initial_smart: false } input_layer_names: "data" input_layer_names: "rois" @@ -36,6 +88,7 @@ sub_models { name: "root" layer_names: "data" layer_names: "rois" + layer_names: "__conv_0__" layer_names: "__roi_pool_0__" input_layer_names: "data" input_layer_names: "rois" diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_roi_pool_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_roi_pool_layer.py index 0d6ca9f1bbade..b739a81b8505c 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/test_roi_pool_layer.py +++ b/python/paddle/trainer_config_helpers/tests/configs/test_roi_pool_layer.py @@ -4,8 +4,17 @@ rois = data_layer(name='rois', size=10) -roi_pool = roi_pool_layer( +conv = img_conv_layer( input=data, + filter_size=3, + num_channels=3, + num_filters=16, + padding=1, + act=LinearActivation(), + bias_attr=True) + +roi_pool = roi_pool_layer( + input=conv, rois=rois, pooled_width=7, pooled_height=7,