-
Notifications
You must be signed in to change notification settings - Fork 5.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add Factorization Machine Layer #4859
Changes from all commits
1644c72
5e78c7a
28c9810
f504c8a
947b6a7
22c5d1f
2ce8f18
b3cd679
86053e7
0574915
9741ade
8654e8a
a30d53b
4c72b06
a8526f1
822ff38
601c1a3
d9062cd
509ae79
477e3eb
4172fc0
f7941db
3ff683f
e5135e8
7a1a586
d6e35ec
0b6afb5
09f4f92
571ef90
5ee63bb
6a0cfd9
d5a6c81
5392a50
6fed6f2
13ec6f9
74a699a
b80cdce
89e63b1
8a283db
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||
---|---|---|---|---|
@@ -0,0 +1,158 @@ | ||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. | ||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); | ||||
you may not use this file except in compliance with the License. | ||||
You may obtain a copy of the License at | ||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0 | ||||
|
||||
Unless required by applicable law or agreed to in writing, software | ||||
distributed under the License is distributed on an "AS IS" BASIS, | ||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
See the License for the specific language governing permissions and | ||||
limitations under the License. */ | ||||
|
||||
#include "FactorizationMachineLayer.h" | ||||
#include <algorithm> | ||||
#include <vector> | ||||
#include "paddle/math/SparseMatrix.h" | ||||
#include "paddle/utils/Logging.h" | ||||
#include "paddle/utils/Stat.h" | ||||
|
||||
namespace paddle { | ||||
|
||||
REGISTER_LAYER(factorization_machine, FactorizationMachineLayer); | ||||
|
||||
bool FactorizationMachineLayer::init(const LayerMap& layerMap, | ||||
const ParameterMap& parameterMap) { | ||||
/* Initialize the basic parent class */ | ||||
Layer::init(layerMap, parameterMap); | ||||
|
||||
factorSize_ = config_.factor_size(); | ||||
|
||||
/* initialize the latentVectors_ */ | ||||
CHECK_EQ(inputLayers_.size(), 1UL); | ||||
size_t inputSize = inputLayers_[0]->getSize(); | ||||
CHECK_EQ(parameters_[0]->getSize(), inputSize * factorSize_); | ||||
latentVectors_ = std::unique_ptr<Weight>( | ||||
new Weight(inputSize, factorSize_, parameters_[0])); | ||||
|
||||
return true; | ||||
} | ||||
|
||||
void FactorizationMachineLayer::forward(PassType passType) { | ||||
Layer::forward(passType); | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 不支持GPU上运行请加检查并提示错误。 |
||||
|
||||
const MatrixPtr& inputV = getInputValue(0); | ||||
|
||||
size_t batchSize = inputV->getHeight(); | ||||
size_t outputSize = getSize(); | ||||
size_t inputSize = inputLayers_[0]->getSize(); | ||||
reserveOutput(batchSize, outputSize); | ||||
|
||||
MatrixPtr outV = getOutputValue(); | ||||
|
||||
Matrix::resizeOrCreate( | ||||
latentVectorsSquare_, inputSize, factorSize_, false, useGpu_); | ||||
Matrix::resizeOrCreate( | ||||
inputMulFactor_, batchSize, factorSize_, false, useGpu_); | ||||
Matrix::resizeOrCreate(tmpOut_, batchSize, factorSize_, false, useGpu_); | ||||
|
||||
REGISTER_TIMER_INFO("FmInputMulFactorTimer", getName().c_str()); | ||||
inputMulFactor_->mul(*inputV, *latentVectors_->getW()); | ||||
inputMulFactor_->square2(*tmpOut_); | ||||
outV->sumRows(*tmpOut_, 0.5, 0); | ||||
|
||||
if (dynamic_cast<CpuSparseMatrix*>(inputV.get())) { | ||||
Matrix::resizeOrCreateSparseMatrix(inputSquare_, | ||||
inputV->getHeight(), | ||||
inputV->getWidth(), | ||||
inputV->getElementCnt(), | ||||
inputV->getValueType()); | ||||
inputSquare_->copyFrom(*inputV); | ||||
(dynamic_cast<CpuSparseMatrix*>(inputSquare_.get()))->square2(); | ||||
} else { | ||||
Matrix::resizeOrCreate( | ||||
inputSquare_, inputV->getHeight(), inputV->getWidth(), false, useGpu_); | ||||
inputV->square2(*inputSquare_); | ||||
} | ||||
latentVectors_->getW()->square2(*latentVectorsSquare_); | ||||
tmpOut_->mul(*inputSquare_, *latentVectorsSquare_); | ||||
outV->sumRows(*tmpOut_, -0.5, 1.0); | ||||
|
||||
/* activation */ { | ||||
REGISTER_TIMER_INFO("FmFwAtvTimer", getName().c_str()); | ||||
forwardActivation(); | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. FM 层可以加非线性激活吗?如果原理上不可以(我记得不可以,可以再确认下),这里可以删掉。如果允许,就保留。 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 可以加非线性的激活~ There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 这里算的只是二阶交叉项,你的意思是如果我在二阶交叉项使用非线性激活A,一阶项使用非线性激活B,这样也可以吗 ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 虽然没有看到这样用的,但理论上应该是可以的~ |
||||
} | ||||
} | ||||
|
||||
void FactorizationMachineLayer::backward(const UpdateCallback& callback) { | ||||
/* Do derivation */ { backwardActivation(); } | ||||
|
||||
const MatrixPtr& inputV = getInputValue(0); | ||||
const MatrixPtr& oGrad = getOutputGrad(); | ||||
|
||||
Matrix::resizeOrCreate( | ||||
tmpSum_, 1, latentVectors_->getW()->getHeight(), false, useGpu_); | ||||
MatrixPtr tmpSumTrans = Matrix::create(tmpSum_->getRowBuf(0), | ||||
latentVectors_->getW()->getHeight(), | ||||
1, | ||||
false, | ||||
useGpu_); | ||||
|
||||
/* Calculate the gradients of the latentVectors_ matrix */ | ||||
if (latentVectors_->getWGrad()) { | ||||
if (dynamic_cast<CpuSparseMatrix*>(inputV.get())) { | ||||
Matrix::resizeOrCreateSparseMatrix(tmpInput_, | ||||
inputV->getHeight(), | ||||
inputV->getWidth(), | ||||
inputV->getElementCnt()); | ||||
|
||||
CpuSparseMatrix* sparseInputV = | ||||
dynamic_cast<CpuSparseMatrix*>(inputV.get()); | ||||
CpuSparseMatrix* sparseInputSquare = | ||||
dynamic_cast<CpuSparseMatrix*>(inputSquare_.get()); | ||||
CpuSparseMatrix* sparseTmpInput = | ||||
dynamic_cast<CpuSparseMatrix*>(tmpInput_.get()); | ||||
sparseTmpInput->copyFrom(*sparseInputV); | ||||
|
||||
sparseTmpInput->rowScale(0, *sparseInputV, *oGrad); | ||||
latentVectors_->getWGrad()->mul( | ||||
*sparseTmpInput->getTranspose(), *inputMulFactor_, 1, 1); | ||||
sparseTmpInput->rowScale(0, *sparseInputSquare, *oGrad); | ||||
|
||||
Matrix::resizeOrCreate(negOnes_, 1, inputV->getHeight(), false, useGpu_); | ||||
negOnes_->zeroMem(); | ||||
negOnes_->add(-1); | ||||
tmpSum_->mul(*negOnes_, *sparseTmpInput, 1, 0); | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. // this = scaleAB*(a*b) + scaleT*this
mul(const Matrix& a, const Matrix& b, real scaleAB, real scaleT) 125 ~ 127 行为什么不能是: ones_->ones();
tmpSum_->mul(*ones_, *sparseTmpInput, -1, 0); There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Line 2944 in b28b2f1
因为b是sparse的时候mul只支持scaleAB是1,不支持其他value |
||||
} else { | ||||
Matrix::resizeOrCreate( | ||||
tmpInput_, inputV->getHeight(), inputV->getWidth(), false, useGpu_); | ||||
|
||||
tmpInput_->rowScale(0, *inputV, *oGrad); | ||||
latentVectors_->getWGrad()->mul( | ||||
*tmpInput_->getTranspose(), *inputMulFactor_, 1, 1); | ||||
tmpInput_->rowScale(0, *inputSquare_, *oGrad); | ||||
|
||||
tmpSum_->sumCols(*tmpInput_, -1, 0); | ||||
} | ||||
|
||||
latentVectors_->getWGrad()->addRowScale( | ||||
0, *latentVectors_->getW(), *tmpSumTrans); | ||||
|
||||
/* Increasing the number of gradient */ | ||||
latentVectors_->getParameterPtr()->incUpdate(callback); | ||||
} | ||||
|
||||
/* Calculate the input layers gradient */ | ||||
MatrixPtr inGrad = getInputGrad(0); | ||||
if (inGrad != NULL) { | ||||
inGrad->mul( | ||||
*inputMulFactor_, *latentVectors_->getW()->getTranspose(), 1, 1); | ||||
tmpSumTrans->sumRows(*latentVectorsSquare_, -1, 0); | ||||
inGrad->addColScale(0, *inputV, *tmpSum_); | ||||
inGrad->rowScale(0, *inGrad, *oGrad); | ||||
} | ||||
} | ||||
|
||||
} // namespace paddle |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. | ||
|
||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
|
||
http://www.apache.org/licenses/LICENSE-2.0 | ||
|
||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. */ | ||
|
||
#pragma once | ||
|
||
#include "Layer.h" | ||
#include "paddle/math/Matrix.h" | ||
#include "paddle/utils/ThreadLocal.h" | ||
|
||
namespace paddle { | ||
/** | ||
* @brief The Factorization Machine models pairwise (order-2) feature | ||
* interactions as inner product of the learned latent vectors corresponding | ||
* to each input feature. | ||
* | ||
* The Factorization Machine can effectively capture feature interactions | ||
* especially when the input is sparse. While in principle FM can model higher | ||
* order feature interaction, in practice usually only order-2 feature | ||
* interactions are considered. The Factorization Machine Layer here only | ||
* computes the order-2 interations with the formula: | ||
* | ||
* \f[ | ||
* y = \sum_{i=1}^{n-1}\sum_{j=i+1}^n\langle v_i, v_j \rangle x_i x_j | ||
* \f] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You can cite the inference paper here. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 已加 |
||
* | ||
* The detailed calculation for forward and backward can be found at this paper: | ||
* | ||
* Factorization machines. | ||
* | ||
* The config file api is factorization_machine. | ||
*/ | ||
|
||
class FactorizationMachineLayer : public Layer { | ||
protected: | ||
// The latent vectors, shape: (size, factorSize_) | ||
// Each row of the latentVectors_ matrix is the latent vector | ||
// corresponding to one input feature dimension | ||
std::unique_ptr<Weight> latentVectors_; | ||
// The hyperparameter that defines the dimensionality of the factorization | ||
size_t factorSize_; | ||
|
||
private: | ||
// Store the square values of the letent vectors matrix | ||
MatrixPtr latentVectorsSquare_; | ||
// Store the square values of input matrix | ||
MatrixPtr inputSquare_; | ||
// The result of input matrix * latent vector matrix that will be used in | ||
// both forward and backward step | ||
MatrixPtr inputMulFactor_; | ||
// Store temporary calculation result | ||
MatrixPtr tmpOut_; | ||
MatrixPtr tmpSum_; | ||
MatrixPtr tmpInput_; | ||
// Negative identity matrix | ||
MatrixPtr negOnes_; | ||
|
||
public: | ||
explicit FactorizationMachineLayer(const LayerConfig& config) | ||
: Layer(config) {} | ||
~FactorizationMachineLayer() {} | ||
|
||
bool init(const LayerMap& layerMap, | ||
const ParameterMap& parameterMap) override; | ||
|
||
void forward(PassType passType) override; | ||
void backward(const UpdateCallback& callback = nullptr) override; | ||
}; | ||
|
||
} // namespace paddle |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2464,6 +2464,25 @@ TEST(Layer, L2DistanceLayer) { | |
} | ||
} | ||
|
||
void testFactorizationMachineLayer(InputType type, bool useGpu) { | ||
const int FACTOR_SIZE = 10; | ||
TestConfig config; | ||
config.layerConfig.set_type("factorization_machine"); | ||
config.layerConfig.set_factor_size(FACTOR_SIZE); | ||
config.layerConfig.set_size(1); | ||
config.biasSize = 0; | ||
config.inputDefs.push_back({type, "layer_0", 128, 1280}); | ||
config.layerConfig.add_inputs(); | ||
testLayerGrad(config, "factorization_machine", 16, false, useGpu, false); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. SparseMatrix 作为输时请添加单测。 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 已加 |
||
} | ||
|
||
TEST(Layer, FactorizationMachineLayer) { | ||
for (auto useGpu : {false, true}) { | ||
testFactorizationMachineLayer(INPUT_DATA, useGpu); | ||
} | ||
testFactorizationMachineLayer(INPUT_SPARSE_FLOAT_VALUE_DATA, false); | ||
} | ||
|
||
int main(int argc, char** argv) { | ||
testing::InitGoogleTest(&argc, argv); | ||
initMain(argc, argv); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
35 ~ 40 行不要在
init
里面做,移到 forward 里面。There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
已改
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
已改