Skip to content

Commit

Permalink
Merge pull request #4859 from will-am/factorization_machine_layer
Browse files Browse the repository at this point in the history
Add Factorization Machine Layer
  • Loading branch information
will-am authored Nov 27, 2017
2 parents ef3420e + 8a283db commit 95cdbfe
Show file tree
Hide file tree
Showing 12 changed files with 445 additions and 5 deletions.
15 changes: 11 additions & 4 deletions doc/api/v2/config/layer.rst
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ img_conv

.. _api_v2.layer_context_projection:

context_projection
context_projection
------------------
.. autoclass:: paddle.v2.layer.context_projection
:noindex:
Expand All @@ -70,7 +70,7 @@ Image Pooling Layer
img_pool
--------
.. autoclass:: paddle.v2.layer.img_pool
:noindex:
:noindex:

spp
---
Expand Down Expand Up @@ -104,7 +104,7 @@ sum_to_one_norm
---------------
.. autoclass:: paddle.v2.layer.sum_to_one_norm
:noindex:

cross_channel_norm
------------------
.. autoclass:: paddle.v2.layer.cross_channel_norm
Expand All @@ -114,7 +114,7 @@ row_l2_norm
-----------
.. autoclass:: paddle.v2.layer.row_l2_norm
:noindex:

Recurrent Layers
================

Expand Down Expand Up @@ -415,6 +415,13 @@ multiplex
.. autoclass:: paddle.v2.layer.multiplex
:noindex:

Factorization Machine Layer
============================

factorization_machine
---------------------
.. autoclass:: paddle.v2.layer.factorization_machine
:noindex:

Slicing and Joining Layers
==========================
Expand Down
158 changes: 158 additions & 0 deletions paddle/gserver/layers/FactorizationMachineLayer.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#include "FactorizationMachineLayer.h"
#include <algorithm>
#include <vector>
#include "paddle/math/SparseMatrix.h"
#include "paddle/utils/Logging.h"
#include "paddle/utils/Stat.h"

namespace paddle {

REGISTER_LAYER(factorization_machine, FactorizationMachineLayer);

bool FactorizationMachineLayer::init(const LayerMap& layerMap,
const ParameterMap& parameterMap) {
/* Initialize the basic parent class */
Layer::init(layerMap, parameterMap);

factorSize_ = config_.factor_size();

/* initialize the latentVectors_ */
CHECK_EQ(inputLayers_.size(), 1UL);
size_t inputSize = inputLayers_[0]->getSize();
CHECK_EQ(parameters_[0]->getSize(), inputSize * factorSize_);
latentVectors_ = std::unique_ptr<Weight>(
new Weight(inputSize, factorSize_, parameters_[0]));

return true;
}

void FactorizationMachineLayer::forward(PassType passType) {
Layer::forward(passType);

const MatrixPtr& inputV = getInputValue(0);

size_t batchSize = inputV->getHeight();
size_t outputSize = getSize();
size_t inputSize = inputLayers_[0]->getSize();
reserveOutput(batchSize, outputSize);

MatrixPtr outV = getOutputValue();

Matrix::resizeOrCreate(
latentVectorsSquare_, inputSize, factorSize_, false, useGpu_);
Matrix::resizeOrCreate(
inputMulFactor_, batchSize, factorSize_, false, useGpu_);
Matrix::resizeOrCreate(tmpOut_, batchSize, factorSize_, false, useGpu_);

REGISTER_TIMER_INFO("FmInputMulFactorTimer", getName().c_str());
inputMulFactor_->mul(*inputV, *latentVectors_->getW());
inputMulFactor_->square2(*tmpOut_);
outV->sumRows(*tmpOut_, 0.5, 0);

if (dynamic_cast<CpuSparseMatrix*>(inputV.get())) {
Matrix::resizeOrCreateSparseMatrix(inputSquare_,
inputV->getHeight(),
inputV->getWidth(),
inputV->getElementCnt(),
inputV->getValueType());
inputSquare_->copyFrom(*inputV);
(dynamic_cast<CpuSparseMatrix*>(inputSquare_.get()))->square2();
} else {
Matrix::resizeOrCreate(
inputSquare_, inputV->getHeight(), inputV->getWidth(), false, useGpu_);
inputV->square2(*inputSquare_);
}
latentVectors_->getW()->square2(*latentVectorsSquare_);
tmpOut_->mul(*inputSquare_, *latentVectorsSquare_);
outV->sumRows(*tmpOut_, -0.5, 1.0);

/* activation */ {
REGISTER_TIMER_INFO("FmFwAtvTimer", getName().c_str());
forwardActivation();
}
}

void FactorizationMachineLayer::backward(const UpdateCallback& callback) {
/* Do derivation */ { backwardActivation(); }

const MatrixPtr& inputV = getInputValue(0);
const MatrixPtr& oGrad = getOutputGrad();

Matrix::resizeOrCreate(
tmpSum_, 1, latentVectors_->getW()->getHeight(), false, useGpu_);
MatrixPtr tmpSumTrans = Matrix::create(tmpSum_->getRowBuf(0),
latentVectors_->getW()->getHeight(),
1,
false,
useGpu_);

/* Calculate the gradients of the latentVectors_ matrix */
if (latentVectors_->getWGrad()) {
if (dynamic_cast<CpuSparseMatrix*>(inputV.get())) {
Matrix::resizeOrCreateSparseMatrix(tmpInput_,
inputV->getHeight(),
inputV->getWidth(),
inputV->getElementCnt());

CpuSparseMatrix* sparseInputV =
dynamic_cast<CpuSparseMatrix*>(inputV.get());
CpuSparseMatrix* sparseInputSquare =
dynamic_cast<CpuSparseMatrix*>(inputSquare_.get());
CpuSparseMatrix* sparseTmpInput =
dynamic_cast<CpuSparseMatrix*>(tmpInput_.get());
sparseTmpInput->copyFrom(*sparseInputV);

sparseTmpInput->rowScale(0, *sparseInputV, *oGrad);
latentVectors_->getWGrad()->mul(
*sparseTmpInput->getTranspose(), *inputMulFactor_, 1, 1);
sparseTmpInput->rowScale(0, *sparseInputSquare, *oGrad);

Matrix::resizeOrCreate(negOnes_, 1, inputV->getHeight(), false, useGpu_);
negOnes_->zeroMem();
negOnes_->add(-1);
tmpSum_->mul(*negOnes_, *sparseTmpInput, 1, 0);
} else {
Matrix::resizeOrCreate(
tmpInput_, inputV->getHeight(), inputV->getWidth(), false, useGpu_);

tmpInput_->rowScale(0, *inputV, *oGrad);
latentVectors_->getWGrad()->mul(
*tmpInput_->getTranspose(), *inputMulFactor_, 1, 1);
tmpInput_->rowScale(0, *inputSquare_, *oGrad);

tmpSum_->sumCols(*tmpInput_, -1, 0);
}

latentVectors_->getWGrad()->addRowScale(
0, *latentVectors_->getW(), *tmpSumTrans);

/* Increasing the number of gradient */
latentVectors_->getParameterPtr()->incUpdate(callback);
}

/* Calculate the input layers gradient */
MatrixPtr inGrad = getInputGrad(0);
if (inGrad != NULL) {
inGrad->mul(
*inputMulFactor_, *latentVectors_->getW()->getTranspose(), 1, 1);
tmpSumTrans->sumRows(*latentVectorsSquare_, -1, 0);
inGrad->addColScale(0, *inputV, *tmpSum_);
inGrad->rowScale(0, *inGrad, *oGrad);
}
}

} // namespace paddle
80 changes: 80 additions & 0 deletions paddle/gserver/layers/FactorizationMachineLayer.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#pragma once

#include "Layer.h"
#include "paddle/math/Matrix.h"
#include "paddle/utils/ThreadLocal.h"

namespace paddle {
/**
* @brief The Factorization Machine models pairwise (order-2) feature
* interactions as inner product of the learned latent vectors corresponding
* to each input feature.
*
* The Factorization Machine can effectively capture feature interactions
* especially when the input is sparse. While in principle FM can model higher
* order feature interaction, in practice usually only order-2 feature
* interactions are considered. The Factorization Machine Layer here only
* computes the order-2 interations with the formula:
*
* \f[
* y = \sum_{i=1}^{n-1}\sum_{j=i+1}^n\langle v_i, v_j \rangle x_i x_j
* \f]
*
* The detailed calculation for forward and backward can be found at this paper:
*
* Factorization machines.
*
* The config file api is factorization_machine.
*/

class FactorizationMachineLayer : public Layer {
protected:
// The latent vectors, shape: (size, factorSize_)
// Each row of the latentVectors_ matrix is the latent vector
// corresponding to one input feature dimension
std::unique_ptr<Weight> latentVectors_;
// The hyperparameter that defines the dimensionality of the factorization
size_t factorSize_;

private:
// Store the square values of the letent vectors matrix
MatrixPtr latentVectorsSquare_;
// Store the square values of input matrix
MatrixPtr inputSquare_;
// The result of input matrix * latent vector matrix that will be used in
// both forward and backward step
MatrixPtr inputMulFactor_;
// Store temporary calculation result
MatrixPtr tmpOut_;
MatrixPtr tmpSum_;
MatrixPtr tmpInput_;
// Negative identity matrix
MatrixPtr negOnes_;

public:
explicit FactorizationMachineLayer(const LayerConfig& config)
: Layer(config) {}
~FactorizationMachineLayer() {}

bool init(const LayerMap& layerMap,
const ParameterMap& parameterMap) override;

void forward(PassType passType) override;
void backward(const UpdateCallback& callback = nullptr) override;
};

} // namespace paddle
19 changes: 19 additions & 0 deletions paddle/gserver/tests/test_LayerGrad.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2464,6 +2464,25 @@ TEST(Layer, L2DistanceLayer) {
}
}

void testFactorizationMachineLayer(InputType type, bool useGpu) {
const int FACTOR_SIZE = 10;
TestConfig config;
config.layerConfig.set_type("factorization_machine");
config.layerConfig.set_factor_size(FACTOR_SIZE);
config.layerConfig.set_size(1);
config.biasSize = 0;
config.inputDefs.push_back({type, "layer_0", 128, 1280});
config.layerConfig.add_inputs();
testLayerGrad(config, "factorization_machine", 16, false, useGpu, false);
}

TEST(Layer, FactorizationMachineLayer) {
for (auto useGpu : {false, true}) {
testFactorizationMachineLayer(INPUT_DATA, useGpu);
}
testFactorizationMachineLayer(INPUT_SPARSE_FLOAT_VALUE_DATA, false);
}

int main(int argc, char** argv) {
testing::InitGoogleTest(&argc, argv);
initMain(argc, argv);
Expand Down
29 changes: 29 additions & 0 deletions paddle/math/CpuSparseMatrix.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,35 @@ void CpuSparseMatrix::printOneRow(std::ostream& os, size_t idx) const {
os << ";";
}

void CpuSparseMatrix::rowScale(size_t cCol, CpuSparseMatrix& b, Matrix& c) {
CHECK(getFormat() != SPARSE_CSC) << "Not supported";
CHECK_EQ(height_, b.getHeight());
CHECK_EQ(width_, b.getWidth());
real* A = getValue();
real* B = b.getValue();
if (b.getValueType() == FLOAT_VALUE) {
for (size_t i = 0; i < height_; i++) {
size_t start = getRowStartIdx(i);
size_t end = getRowStartIdx(i + 1);
CHECK_EQ(start, b.getRowStartIdx(i));
CHECK_EQ(end, b.getRowStartIdx(i + 1));
for (size_t j = start; j < end; j++) {
A[j] = B[j] * c.getElement(i, cCol);
}
}
} else if (b.getValueType() == NO_VALUE) {
for (size_t i = 0; i < height_; i++) {
size_t start = getRowStartIdx(i);
size_t end = getRowStartIdx(i + 1);
CHECK_EQ(start, b.getRowStartIdx(i));
CHECK_EQ(end, b.getRowStartIdx(i + 1));
for (size_t j = start; j < end; j++) {
A[j] = c.getElement(i, cCol);
}
}
}
}

void CpuSparseMatrix::randomizeUniform() {
CHECK_LE(elementCnt_, height_ * width_);
if (valueType_ == FLOAT_VALUE) {
Expand Down
9 changes: 9 additions & 0 deletions paddle/math/CpuSparseMatrix.h
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,15 @@ class CpuSparseMatrix : public Matrix {
const unsigned int* cols,
const real* values);

/**
* @brief this_row = b_row * c_row[cCol]
*
* @param[in] cCol the column of matrix c used to scale each row of b
* @param[in] b CpuSparseMatrix
* @param[in] c Matrix
*/
void rowScale(size_t cCol, CpuSparseMatrix& b, Matrix& c);

void randomizeUniform();

void copyFrom(const GpuSparseMatrix& src, hl_stream_t stream);
Expand Down
3 changes: 3 additions & 0 deletions proto/ModelConfig.proto
Original file line number Diff line number Diff line change
Expand Up @@ -544,6 +544,9 @@ message LayerConfig {
// for batch normalization layer
// The small constant added to the variance to improve numeric stability.
optional double epsilon = 60 [ default = 0.00001 ];

// for factorization machine layer
optional uint32 factor_size = 61;
}

message EvaluatorConfig {
Expand Down
15 changes: 15 additions & 0 deletions python/paddle/trainer/config_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -3870,6 +3870,21 @@ def __init__(self, name, inputs, value, **xargs):
image_conf.channels)


@config_layer('factorization_machine')
class FactorizationMachineLayer(LayerBase):
def __init__(self, name, inputs, factor_size, **xargs):
super(FactorizationMachineLayer, self).__init__(
name, 'factorization_machine', size=1, inputs=inputs, **xargs)
config_assert(
len(self.inputs) == 1,
'factorization machine layer must have one and only one input.')
self.config.factor_size = factor_size
input_layer = self.get_input_layer(0)
psize = input_layer.size * factor_size
dims = [input_layer.size, factor_size]
self.create_input_parameter(0, psize, dims)


# Deprecated, use a new layer specific class instead
@config_func
def Layer(name, type, **xargs):
Expand Down
Loading

0 comments on commit 95cdbfe

Please sign in to comment.