Skip to content

Commit

Permalink
Merge pull request #3560 from guoshengCS/add-ShiftLayer
Browse files Browse the repository at this point in the history
Add ScaleShiftLayer
  • Loading branch information
guoshengCS authored Aug 21, 2017
2 parents 9871c6d + f6dc56a commit b7a6cc9
Show file tree
Hide file tree
Showing 8 changed files with 265 additions and 1 deletion.
5 changes: 5 additions & 0 deletions doc/api/v2/config/layer.rst
Original file line number Diff line number Diff line change
Expand Up @@ -362,6 +362,11 @@ trans
.. autoclass:: paddle.v2.layer.trans
:noindex:

scale_shift
-----------
.. autoclass:: paddle.v2.layer.scale_shift
:noindex:

Sampling Layers
===============

Expand Down
107 changes: 107 additions & 0 deletions paddle/gserver/layers/ScaleShiftLayer.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#include "Layer.h"

namespace paddle {

/**
* A layer applies a linear transformation to each element in each row of
* the input matrix. For each element, the layer first re-scale it and then
* adds a bias to it.
*
* \f[
* y = wx + b
* \f]
*
* Here, w is the scale and b is the bias. Both w and b are trainable scalars.
*
*/

class ScaleShiftLayer : public Layer {
protected:
std::unique_ptr<Weight> scale_;
std::unique_ptr<Weight> offset_;

public:
explicit ScaleShiftLayer(const LayerConfig& config) : Layer(config) {}

bool init(const LayerMap& layerMap,
const ParameterMap& parameterMap) override;

void forward(PassType passType) override;
void backward(const UpdateCallback& callback = nullptr) override;
};

REGISTER_LAYER(scale_shift, ScaleShiftLayer);

bool ScaleShiftLayer::init(const LayerMap& layerMap,
const ParameterMap& parameterMap) {
Layer::init(layerMap, parameterMap);
CHECK_EQ(inputLayers_.size(), 1U);
scale_.reset(new Weight(1, 1, parameters_[0]));
if (biasParameter_.get() != NULL) {
offset_ = std::unique_ptr<Weight>(new Weight(1, 1, biasParameter_));
}
return true;
}

void ScaleShiftLayer::forward(PassType passType) {
Layer::forward(passType);

MatrixPtr inV = getInputValue(0);
resetOutput(inV->getHeight(), inV->getWidth());
MatrixPtr outV = getOutputValue();
real scaleValue = scale_->getW()->getElement(0, 0);
outV->mulScalar(*inV, scaleValue);
if (offset_) {
real offsetValue = offset_->getW()->getElement(0, 0);
outV->add(offsetValue);
}
}

void ScaleShiftLayer::backward(const UpdateCallback& callback) {
MatrixPtr inV = getInputValue(0);
MatrixPtr inG = getInputGrad(0);
MatrixPtr outV = getOutputValue();
MatrixPtr outG = getOutputGrad();

/* Calculate the parameter gradient for the current layer */
if (scale_->getWGrad()) {
MatrixPtr rowSumMtx;
Matrix::resizeOrCreate(rowSumMtx, outG->getHeight(), 1, false, useGpu_);
// this_i = scaleDest * this_i + scaleSum * \sum_j b_{ij} * c_{ij}
rowSumMtx->sumOfProducts(
/* b= */ *inV, /* c= */ *outG, /* scaleSum= */ 1, /* scaleDest= */ 0.);
// this_i = scaleDest * this_i + scaleSum * \sum_j b_{ji}
scale_->getWGrad()->sumCols(
/* b= */ *rowSumMtx, /* scaleSum= */ 1., /* scaleDest= */ 1.);
scale_->getParameterPtr()->incUpdate(callback);
}
if (offset_ && offset_->getWGrad()) {
MatrixPtr rowSumMtx;
Matrix::resizeOrCreate(rowSumMtx, outG->getHeight(), 1, false, useGpu_);
rowSumMtx->sumRows(*outG, 1., 0.);
offset_->getWGrad()->sumCols(*rowSumMtx, 1., 1.);
offset_->getParameterPtr()->incUpdate(callback);
}

/* Calculate the input layers error */
if (inG) {
real scaleValue = scale_->getW()->getElement(0, 0);
inG->add(*outG, scaleValue);
}
}

} // namespace paddle
15 changes: 15 additions & 0 deletions paddle/gserver/tests/test_LayerGrad.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2007,6 +2007,21 @@ TEST(Layer, RowL2NormLayer) {
}
}

TEST(Layer, ScaleShiftLayer) {
const size_t batchSize = 16;
const size_t size = 32;
TestConfig config;
config.layerConfig.set_type("scale_shift");
config.layerConfig.set_size(size);
config.biasSize = 1;
config.inputDefs.push_back(
{INPUT_DATA, "input", /* dim= */ size, /* paraSize= */ 1});
config.layerConfig.add_inputs();
for (auto useGpu : {false, true}) {
testLayerGrad(config, "scale_shift", batchSize, false, useGpu, false);
}
}

int main(int argc, char** argv) {
testing::InitGoogleTest(&argc, argv);
initMain(argc, argv);
Expand Down
14 changes: 14 additions & 0 deletions python/paddle/trainer/config_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -2232,6 +2232,20 @@ def __init__(self, name, inputs, min, max, **xargs):
self.config.inputs[0].clip_conf.max = max


@config_layer('scale_shift')
class ScaleShiftLayer(LayerBase):
def __init__(self, name, inputs, bias=True, **xargs):
super(ScaleShiftLayer, self).__init__(
name, 'scale_shift', 0, inputs=inputs, **xargs)
config_assert(
len(self.inputs) == 1,
'ScaleShiftLayer must have one and only one input.')
input_layer = self.get_input_layer(0)
self.set_layer_size(input_layer.size)
self.create_input_parameter(0, 1, [1, 1])
self.create_bias_parameter(bias, 1)


# key: cost type
# value: cost class
g_cost_map = {}
Expand Down
42 changes: 42 additions & 0 deletions python/paddle/trainer_config_helpers/layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,7 @@
'clip_layer',
'slice_projection',
'kmax_sequence_score_layer',
'scale_shift_layer',
]


Expand Down Expand Up @@ -230,6 +231,7 @@ class LayerType(object):
CLIP_LAYER = 'clip'

KMAX_SEQ_SCORE = 'kmax_seq_score'
SCALE_SHIFT_LAYER = 'scale_shift'

@staticmethod
def is_layer_type(type_name):
Expand Down Expand Up @@ -6210,3 +6212,43 @@ def kmax_sequence_score_layer(input, name=None, beam_size=1):

return LayerOutput(
name, LayerType.KMAX_SEQ_SCORE, parents=[input], size=input.size)


@wrap_name_default("scale_shift")
@wrap_param_attr_default()
@wrap_bias_attr_default()
def scale_shift_layer(input, name=None, param_attr=None, bias_attr=None):
"""
A layer applies a linear transformation to each element in each row of
the input matrix. For each element, the layer first re-scale it and then
adds a bias to it.
This layer is very like the SlopeInterceptLayer, except the scale and
bias are trainable.
.. math::
y = w * x + b
.. code-block:: python
scale_shift = scale_shift_layer(input=input_layer, bias_attr=False)
:param name: The Layer Name.
:type name: basestring
:param input: The input layer.
:type input: LayerOutput.
:param param_attr: The parameter attribute of scaling.
:type param_attr: ParameterAttribute
:param bias_attr: The parameter attribute of shifting.
:type bias_attr: ParameterAttribute
:return: LayerOutput object.
:rtype: LayerOutput
"""
Layer(
name=name,
type=LayerType.SCALE_SHIFT_LAYER,
inputs=Input(input.name, **param_attr.attr),
bias=ParamAttr.to_bias(bias_attr))
return LayerOutput(
name, LayerType.SCALE_SHIFT_LAYER, parents=[input], size=input.size)
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,6 @@ test_spp_layer test_bilinear_interp test_maxout test_bi_grumemory math_ops
test_seq_concat_reshape test_pad test_smooth_l1 test_multiplex_layer
test_prelu_layer test_row_conv test_detection_output_layer test_multibox_loss_layer
test_recursive_topology test_gated_unit_layer test_clip_layer test_row_l2_norm_layer
test_kmax_seq_socre_layer test_seq_select_layers)
test_kmax_seq_socre_layer test_seq_select_layers test_scale_shift_layer)

export whole_configs=(test_split_datasource)
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
type: "nn"
layers {
name: "data"
type: "data"
size: 100
active_type: ""
}
layers {
name: "__scale_shift_0__"
type: "scale_shift"
size: 100
active_type: ""
inputs {
input_layer_name: "data"
input_parameter_name: "___scale_shift_0__.w0"
}
}
layers {
name: "__scale_shift_1__"
type: "scale_shift"
size: 100
active_type: ""
inputs {
input_layer_name: "data"
input_parameter_name: "___scale_shift_1__.w0"
}
bias_parameter_name: "___scale_shift_1__.wbias"
}
parameters {
name: "___scale_shift_0__.w0"
size: 1
initial_mean: 0.0
initial_std: 1.0
dims: 1
dims: 1
initial_strategy: 0
initial_smart: true
}
parameters {
name: "___scale_shift_1__.w0"
size: 1
initial_mean: 0.0
initial_std: 1.0
dims: 1
dims: 1
initial_strategy: 0
initial_smart: true
}
parameters {
name: "___scale_shift_1__.wbias"
size: 1
initial_mean: 0.0
initial_std: 0.0
dims: 1
dims: 1
initial_strategy: 0
initial_smart: false
}
input_layer_names: "data"
output_layer_names: "__scale_shift_0__"
output_layer_names: "__scale_shift_1__"
sub_models {
name: "root"
layer_names: "data"
layer_names: "__scale_shift_0__"
layer_names: "__scale_shift_1__"
input_layer_names: "data"
output_layer_names: "__scale_shift_0__"
output_layer_names: "__scale_shift_1__"
is_recurrent_layer_group: false
}

Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from paddle.trainer_config_helpers import *

data = data_layer(name='data', size=100)

scale = scale_shift_layer(input=data, bias_attr=False)

scale_shift = scale_shift_layer(input=data)

outputs(scale, scale_shift)

0 comments on commit b7a6cc9

Please sign in to comment.