Merge pull request #3560 from guoshengCS/add-ShiftLayer

Add ScaleShiftLayer
PaddlePaddle · Aug 21, 2017 · b7a6cc9 · b7a6cc9
2 parents 9871c6d + f6dc56a
commit b7a6cc9
Show file tree

Hide file tree

Showing 8 changed files with 265 additions and 1 deletion.
diff --git a/doc/api/v2/config/layer.rst b/doc/api/v2/config/layer.rst
@@ -362,6 +362,11 @@ trans
 ..  autoclass:: paddle.v2.layer.trans
     :noindex:
 
+scale_shift
+-----------
+..  autoclass:: paddle.v2.layer.scale_shift
+    :noindex:
+
 Sampling Layers
 ===============
 

diff --git a/paddle/gserver/layers/ScaleShiftLayer.cpp b/paddle/gserver/layers/ScaleShiftLayer.cpp
@@ -0,0 +1,107 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "Layer.h"
+
+namespace paddle {
+
+/**
+ * A layer applies a linear transformation to each element in each row of
+ * the input matrix. For each element, the layer first re-scale it and then
+ * adds a bias to it.
+ *
+ * \f[
+ *    y = wx + b
+ * \f]
+ *
+ * Here, w is the scale and b is the bias. Both w and b are trainable scalars.
+ *
+ */
+
+class ScaleShiftLayer : public Layer {
+protected:
+  std::unique_ptr<Weight> scale_;
+  std::unique_ptr<Weight> offset_;
+
+public:
+  explicit ScaleShiftLayer(const LayerConfig& config) : Layer(config) {}
+
+  bool init(const LayerMap& layerMap,
+            const ParameterMap& parameterMap) override;
+
+  void forward(PassType passType) override;
+  void backward(const UpdateCallback& callback = nullptr) override;
+};
+
+REGISTER_LAYER(scale_shift, ScaleShiftLayer);
+
+bool ScaleShiftLayer::init(const LayerMap& layerMap,
+                           const ParameterMap& parameterMap) {
+  Layer::init(layerMap, parameterMap);
+  CHECK_EQ(inputLayers_.size(), 1U);
+  scale_.reset(new Weight(1, 1, parameters_[0]));
+  if (biasParameter_.get() != NULL) {
+    offset_ = std::unique_ptr<Weight>(new Weight(1, 1, biasParameter_));
+  }
+  return true;
+}
+
+void ScaleShiftLayer::forward(PassType passType) {
+  Layer::forward(passType);
+
+  MatrixPtr inV = getInputValue(0);
+  resetOutput(inV->getHeight(), inV->getWidth());
+  MatrixPtr outV = getOutputValue();
+  real scaleValue = scale_->getW()->getElement(0, 0);
+  outV->mulScalar(*inV, scaleValue);
+  if (offset_) {
+    real offsetValue = offset_->getW()->getElement(0, 0);
+    outV->add(offsetValue);
+  }
+}
+
+void ScaleShiftLayer::backward(const UpdateCallback& callback) {
+  MatrixPtr inV = getInputValue(0);
+  MatrixPtr inG = getInputGrad(0);
+  MatrixPtr outV = getOutputValue();
+  MatrixPtr outG = getOutputGrad();
+
+  /* Calculate the parameter gradient for the current layer */
+  if (scale_->getWGrad()) {
+    MatrixPtr rowSumMtx;
+    Matrix::resizeOrCreate(rowSumMtx, outG->getHeight(), 1, false, useGpu_);
+    // this_i = scaleDest * this_i + scaleSum * \sum_j b_{ij} * c_{ij}
+    rowSumMtx->sumOfProducts(
+        /* b= */ *inV, /* c= */ *outG, /* scaleSum= */ 1, /* scaleDest= */ 0.);
+    // this_i = scaleDest * this_i + scaleSum * \sum_j b_{ji}
+    scale_->getWGrad()->sumCols(
+        /* b= */ *rowSumMtx, /* scaleSum= */ 1., /* scaleDest= */ 1.);
+    scale_->getParameterPtr()->incUpdate(callback);
+  }
+  if (offset_ && offset_->getWGrad()) {
+    MatrixPtr rowSumMtx;
+    Matrix::resizeOrCreate(rowSumMtx, outG->getHeight(), 1, false, useGpu_);
+    rowSumMtx->sumRows(*outG, 1., 0.);
+    offset_->getWGrad()->sumCols(*rowSumMtx, 1., 1.);
+    offset_->getParameterPtr()->incUpdate(callback);
+  }
+
+  /* Calculate the input layers error */
+  if (inG) {
+    real scaleValue = scale_->getW()->getElement(0, 0);
+    inG->add(*outG, scaleValue);
+  }
+}
+
+}  // namespace paddle
diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp
@@ -2007,6 +2007,21 @@ TEST(Layer, RowL2NormLayer) {
   }
 }
 
+TEST(Layer, ScaleShiftLayer) {
+  const size_t batchSize = 16;
+  const size_t size = 32;
+  TestConfig config;
+  config.layerConfig.set_type("scale_shift");
+  config.layerConfig.set_size(size);
+  config.biasSize = 1;
+  config.inputDefs.push_back(
+      {INPUT_DATA, "input", /* dim= */ size, /* paraSize= */ 1});
+  config.layerConfig.add_inputs();
+  for (auto useGpu : {false, true}) {
+    testLayerGrad(config, "scale_shift", batchSize, false, useGpu, false);
+  }
+}
+
 int main(int argc, char** argv) {
   testing::InitGoogleTest(&argc, argv);
   initMain(argc, argv);

diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py
@@ -2232,6 +2232,20 @@ def __init__(self, name, inputs, min, max, **xargs):
         self.config.inputs[0].clip_conf.max = max
 
 
+@config_layer('scale_shift')
+class ScaleShiftLayer(LayerBase):
+    def __init__(self, name, inputs, bias=True, **xargs):
+        super(ScaleShiftLayer, self).__init__(
+            name, 'scale_shift', 0, inputs=inputs, **xargs)
+        config_assert(
+            len(self.inputs) == 1,
+            'ScaleShiftLayer must have one and only one input.')
+        input_layer = self.get_input_layer(0)
+        self.set_layer_size(input_layer.size)
+        self.create_input_parameter(0, 1, [1, 1])
+        self.create_bias_parameter(bias, 1)
+
+
 # key: cost type
 # value: cost class
 g_cost_map = {}

diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py
@@ -133,6 +133,7 @@
     'clip_layer',
     'slice_projection',
     'kmax_sequence_score_layer',
+    'scale_shift_layer',
 ]
 
 
@@ -230,6 +231,7 @@ class LayerType(object):
     CLIP_LAYER = 'clip'
 
     KMAX_SEQ_SCORE = 'kmax_seq_score'
+    SCALE_SHIFT_LAYER = 'scale_shift'
 
     @staticmethod
     def is_layer_type(type_name):
@@ -6210,3 +6212,43 @@ def kmax_sequence_score_layer(input, name=None, beam_size=1):
 
     return LayerOutput(
         name, LayerType.KMAX_SEQ_SCORE, parents=[input], size=input.size)
+
+
+@wrap_name_default("scale_shift")
+@wrap_param_attr_default()
+@wrap_bias_attr_default()
+def scale_shift_layer(input, name=None, param_attr=None, bias_attr=None):
+    """
+    A layer applies a linear transformation to each element in each row of 
+    the input matrix. For each element, the layer first re-scale it and then 
+    adds a bias to it.
+
+    This layer is very like the SlopeInterceptLayer, except the scale and 
+    bias are trainable.
+
+    .. math::
+
+        y = w * x + b
+
+    .. code-block:: python
+
+        scale_shift = scale_shift_layer(input=input_layer, bias_attr=False)
+
+    :param name: The Layer Name.
+    :type name: basestring
+    :param input: The input layer.
+    :type input: LayerOutput.
+    :param param_attr: The parameter attribute of scaling.
+    :type param_attr: ParameterAttribute
+    :param bias_attr: The parameter attribute of shifting.
+    :type bias_attr: ParameterAttribute
+    :return: LayerOutput object.
+    :rtype: LayerOutput
+    """
+    Layer(
+        name=name,
+        type=LayerType.SCALE_SHIFT_LAYER,
+        inputs=Input(input.name, **param_attr.attr),
+        bias=ParamAttr.to_bias(bias_attr))
+    return LayerOutput(
+        name, LayerType.SCALE_SHIFT_LAYER, parents=[input], size=input.size)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh
@@ -8,6 +8,6 @@ test_spp_layer test_bilinear_interp test_maxout test_bi_grumemory math_ops
 test_seq_concat_reshape test_pad test_smooth_l1 test_multiplex_layer
 test_prelu_layer test_row_conv test_detection_output_layer test_multibox_loss_layer
 test_recursive_topology test_gated_unit_layer test_clip_layer test_row_l2_norm_layer
-test_kmax_seq_socre_layer test_seq_select_layers)
+test_kmax_seq_socre_layer test_seq_select_layers test_scale_shift_layer)
 
 export whole_configs=(test_split_datasource)
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_scale_shift_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_scale_shift_layer.protostr
@@ -0,0 +1,72 @@
+type: "nn"
+layers {
+  name: "data"
+  type: "data"
+  size: 100
+  active_type: ""
+}
+layers {
+  name: "__scale_shift_0__"
+  type: "scale_shift"
+  size: 100
+  active_type: ""
+  inputs {
+    input_layer_name: "data"
+    input_parameter_name: "___scale_shift_0__.w0"
+  }
+}
+layers {
+  name: "__scale_shift_1__"
+  type: "scale_shift"
+  size: 100
+  active_type: ""
+  inputs {
+    input_layer_name: "data"
+    input_parameter_name: "___scale_shift_1__.w0"
+  }
+  bias_parameter_name: "___scale_shift_1__.wbias"
+}
+parameters {
+  name: "___scale_shift_0__.w0"
+  size: 1
+  initial_mean: 0.0
+  initial_std: 1.0
+  dims: 1
+  dims: 1
+  initial_strategy: 0
+  initial_smart: true
+}
+parameters {
+  name: "___scale_shift_1__.w0"
+  size: 1
+  initial_mean: 0.0
+  initial_std: 1.0
+  dims: 1
+  dims: 1
+  initial_strategy: 0
+  initial_smart: true
+}
+parameters {
+  name: "___scale_shift_1__.wbias"
+  size: 1
+  initial_mean: 0.0
+  initial_std: 0.0
+  dims: 1
+  dims: 1
+  initial_strategy: 0
+  initial_smart: false
+}
+input_layer_names: "data"
+output_layer_names: "__scale_shift_0__"
+output_layer_names: "__scale_shift_1__"
+sub_models {
+  name: "root"
+  layer_names: "data"
+  layer_names: "__scale_shift_0__"
+  layer_names: "__scale_shift_1__"
+  input_layer_names: "data"
+  output_layer_names: "__scale_shift_0__"
+  output_layer_names: "__scale_shift_1__"
+  is_recurrent_layer_group: false
+}
+
diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_scale_shift_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_scale_shift_layer.py
@@ -0,0 +1,9 @@
+from paddle.trainer_config_helpers import *
+
+data = data_layer(name='data', size=100)
+
+scale = scale_shift_layer(input=data, bias_attr=False)
+
+scale_shift = scale_shift_layer(input=data)
+
+outputs(scale, scale_shift)