From 904eefaf8a82ea10c0a804c58a11110fa296a74a Mon Sep 17 00:00:00 2001 From: hedaoyuan Date: Tue, 3 Jan 2017 15:51:07 +0800 Subject: [PATCH 01/11] add TensorShape use to represent tensor of any dimension. --- paddle/function/TensorType.h | 125 +++++++++++++++++++++++++++++ paddle/function/TensorTypeTest.cpp | 53 ++++++++++++ 2 files changed, 178 insertions(+) create mode 100644 paddle/function/TensorType.h create mode 100644 paddle/function/TensorTypeTest.cpp diff --git a/paddle/function/TensorType.h b/paddle/function/TensorType.h new file mode 100644 index 0000000000000..0b860f204606c --- /dev/null +++ b/paddle/function/TensorType.h @@ -0,0 +1,125 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include + +namespace paddle { + +enum ValueType { + VALUE_TYPE_INT32 = 0, + VALUE_TYPE_FLOAT = 1, + VALUE_TYPE_DOUBLE = 2, + VALUE_TYPE_BYTE = 3 +}; + +enum DeviceType { + DEVICE_TYPE_UNSPECIFIED = 0, + DEVICE_TYPE_CPU = 1, + DEVICE_TYPE_GPU = 2 +}; + +inline int sizeOfValuType(ValueType valueType) { + if (valueType == VALUE_TYPE_INT32) { + return 4; + } else if (valueType == VALUE_TYPE_FLOAT) { + return 4; + } else if (valueType == VALUE_TYPE_DOUBLE) { + return 8; + } else { + LOG(FATAL) << "Unknown type: " << valueType; + return 0; + } +} + +template +struct DataType; + +template <> +struct DataType { + static const ValueType value = VALUE_TYPE_FLOAT; +}; + +template <> +struct DataType { + static const ValueType value = VALUE_TYPE_DOUBLE; +}; + +/** + * TensorShape used to represent shape of normal tensor. + */ +class TensorShape { +public: + TensorShape() : ndims_(0), nelements_(0) { initDims(0); } + + TensorShape(size_t ndims) : ndims_(ndims), nelements_(1) { initDims(ndims); }; + + TensorShape(std::initializer_list dims) { + ndims_ = dims.size(); + initDims(ndims_); + std::copy(dims.begin(), dims.end(), dims_.begin()); + numElements(); + }; + + TensorShape(const TensorShape& t) + : ndims_(t.ndims_), nelements_(t.nelements_) { + initDims(ndims_); + std::copy(t.dims_.begin(), t.dims_.end(), dims_.begin()); + }; + + // get the size of specified dimension + size_t operator[](size_t dim) const { + CHECK_GE(dim, 0); + CHECK_LT(dim, ndims_); + return dims_[dim]; + } + + // set the size of specified dimension + void setDim(size_t dim, size_t size) { + CHECK_GE(dim, 0); + CHECK_LT(dim, ndims_); + dims_[dim] = size; + numElements(); + } + + // number of dimensions of the tensor + size_t ndims() const { return ndims_; } + + size_t getElements() const { return nelements_; } + +private: + // compute number of elements + void numElements() { + nelements_ = 1; + for (size_t n = 0; n < ndims_; n++) { + nelements_ *= dims_[n]; + } + } + + // init dims_ + void initDims(size_t ndims) { + size_t count = ndims < 4 ? 4 : ndims; + dims_.assign(count, 1); + } + + // number of dimensions + // ndims_ may be not equeal dims_.size() + size_t ndims_; + // number of elements + size_t nelements_; + std::vector dims_; +}; + +} // namespace paddle diff --git a/paddle/function/TensorTypeTest.cpp b/paddle/function/TensorTypeTest.cpp new file mode 100644 index 0000000000000..99c25f42a1e6c --- /dev/null +++ b/paddle/function/TensorTypeTest.cpp @@ -0,0 +1,53 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "TensorType.h" +#include + +namespace paddle { + +TEST(TensorShape, Constructor) { + TensorShape t1; + EXPECT_EQ(t1.ndims(), 0); + EXPECT_EQ(t1.getElements(), 0); + + TensorShape t2(3); + EXPECT_EQ(t2.ndims(), 3); + EXPECT_EQ(t2.getElements(), 1); + + TensorShape t3({8, 10}); + EXPECT_EQ(t3.ndims(), 2); + EXPECT_EQ(t3.getElements(), 80); + + TensorShape t4(t3); + EXPECT_EQ(t4.ndims(), t3.ndims()); + EXPECT_EQ(t4.getElements(), t3.getElements()); + + TensorShape t5({1, 2, 3, 4, 5}); + EXPECT_EQ(t5.ndims(), 5); + EXPECT_EQ(t5.getElements(), 120); +} + +TEST(TensorShape, GetAndSet) { + TensorShape t({1, 2, 3}); + EXPECT_EQ(t.ndims(), 3); + EXPECT_EQ(t.getElements(), 6); + + EXPECT_EQ(t[1], 2); + t.setDim(1, 100); + EXPECT_EQ(t.getElements(), 300); + EXPECT_EQ(t[1], 100); +} + +} // namespace paddle From 0c4be7e6a687b5ec9a722fc1c9dbded70b1aa8ea Mon Sep 17 00:00:00 2001 From: hedaoyuan Date: Wed, 4 Jan 2017 16:51:49 +0800 Subject: [PATCH 02/11] add TensorType.h --- paddle/function/TensorShape.h | 97 +++++++++++++++++++++++++ paddle/function/TensorShapeTest.cpp | 53 ++++++++++++++ paddle/function/TensorType.h | 107 +++++++++++++--------------- paddle/function/TensorTypeTest.cpp | 52 ++++++-------- 4 files changed, 222 insertions(+), 87 deletions(-) create mode 100644 paddle/function/TensorShape.h create mode 100644 paddle/function/TensorShapeTest.cpp diff --git a/paddle/function/TensorShape.h b/paddle/function/TensorShape.h new file mode 100644 index 0000000000000..e70484a1afd99 --- /dev/null +++ b/paddle/function/TensorShape.h @@ -0,0 +1,97 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include + +namespace paddle { + +/** + * TensorShape used to represent shape of normal tensor. + */ +class TensorShape { +public: + TensorShape() : ndims_(0), nelements_(0) { initDims(0); } + + TensorShape(size_t ndims) : ndims_(ndims), nelements_(1) { initDims(ndims); }; + + TensorShape(std::initializer_list dims) { + ndims_ = dims.size(); + initDims(ndims_); + std::copy(dims.begin(), dims.end(), dims_.begin()); + numElements(); + }; + + TensorShape(const TensorShape& t) + : ndims_(t.ndims_), nelements_(t.nelements_) { + initDims(ndims_); + std::copy(t.dims_.begin(), t.dims_.end(), dims_.begin()); + }; + + // get the size of specified dimension + size_t operator[](size_t dim) const { + CHECK_GE(dim, 0); + CHECK_LT(dim, ndims_); + return dims_[dim]; + } + + // set the size of specified dimension + void setDim(size_t dim, size_t size) { + CHECK_GE(dim, 0); + CHECK_LT(dim, ndims_); + dims_[dim] = size; + numElements(); + } + + // number of dimensions of the tensor + size_t ndims() const { return ndims_; } + + size_t getElements() const { return nelements_; } + + bool operator==(const TensorShape& t) const { + if (ndims() != t.ndims()) return false; + for (size_t i = 0; i < ndims(); i++) { + if (dims_[i] != t.dims_[i]) return false; + } + + return true; + } + + bool operator!=(const TensorShape& t) const { return !(*this == t); } + +private: + // compute number of elements + void numElements() { + nelements_ = 1; + for (size_t n = 0; n < ndims_; n++) { + nelements_ *= dims_[n]; + } + } + + // init dims_ + void initDims(size_t ndims) { + size_t count = ndims < 4 ? 4 : ndims; + dims_.assign(count, 1); + } + + // number of dimensions + // ndims_ may be not equeal dims_.size() + size_t ndims_; + // number of elements + size_t nelements_; + std::vector dims_; +}; + +} // namespace paddle diff --git a/paddle/function/TensorShapeTest.cpp b/paddle/function/TensorShapeTest.cpp new file mode 100644 index 0000000000000..45a2e106e7fc3 --- /dev/null +++ b/paddle/function/TensorShapeTest.cpp @@ -0,0 +1,53 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "TensorShape.h" +#include + +namespace paddle { + +TEST(TensorShape, Constructor) { + TensorShape t1; + EXPECT_EQ(t1.ndims(), 0); + EXPECT_EQ(t1.getElements(), 0); + + TensorShape t2(3); + EXPECT_EQ(t2.ndims(), 3); + EXPECT_EQ(t2.getElements(), 1); + + TensorShape t3({8, 10}); + EXPECT_EQ(t3.ndims(), 2); + EXPECT_EQ(t3.getElements(), 80); + + TensorShape t4(t3); + EXPECT_EQ(t4.ndims(), t3.ndims()); + EXPECT_EQ(t4.getElements(), t3.getElements()); + + TensorShape t5({1, 2, 3, 4, 5}); + EXPECT_EQ(t5.ndims(), 5); + EXPECT_EQ(t5.getElements(), 120); +} + +TEST(TensorShape, GetAndSet) { + TensorShape t({1, 2, 3}); + EXPECT_EQ(t.ndims(), 3); + EXPECT_EQ(t.getElements(), 6); + + EXPECT_EQ(t[1], 2); + t.setDim(1, 100); + EXPECT_EQ(t.getElements(), 300); + EXPECT_EQ(t[1], 100); +} + +} // namespace paddle diff --git a/paddle/function/TensorType.h b/paddle/function/TensorType.h index 0b860f204606c..800f71a5b974c 100644 --- a/paddle/function/TensorType.h +++ b/paddle/function/TensorType.h @@ -14,7 +14,7 @@ limitations under the License. */ #pragma once -#include +#include "paddle/math/Matrix.h" namespace paddle { @@ -57,69 +57,60 @@ struct DataType { static const ValueType value = VALUE_TYPE_DOUBLE; }; -/** - * TensorShape used to represent shape of normal tensor. - */ -class TensorShape { -public: - TensorShape() : ndims_(0), nelements_(0) { initDims(0); } - - TensorShape(size_t ndims) : ndims_(ndims), nelements_(1) { initDims(ndims); }; - - TensorShape(std::initializer_list dims) { - ndims_ = dims.size(); - initDims(ndims_); - std::copy(dims.begin(), dims.end(), dims_.begin()); - numElements(); - }; - - TensorShape(const TensorShape& t) - : ndims_(t.ndims_), nelements_(t.nelements_) { - initDims(ndims_); - std::copy(t.dims_.begin(), t.dims_.end(), dims_.begin()); - }; - - // get the size of specified dimension - size_t operator[](size_t dim) const { - CHECK_GE(dim, 0); - CHECK_LT(dim, ndims_); - return dims_[dim]; - } +namespace detail { - // set the size of specified dimension - void setDim(size_t dim, size_t size) { - CHECK_GE(dim, 0); - CHECK_LT(dim, ndims_); - dims_[dim] = size; - numElements(); - } +template +struct MatrixT; - // number of dimensions of the tensor - size_t ndims() const { return ndims_; } +template <> +struct MatrixT { + using type = CpuMatrix; +}; - size_t getElements() const { return nelements_; } +template <> +struct MatrixT { + using type = GpuMatrix; +}; -private: - // compute number of elements - void numElements() { - nelements_ = 1; - for (size_t n = 0; n < ndims_; n++) { - nelements_ *= dims_[n]; - } - } +template <> +struct MatrixT { + using type = void; // Not implemented +}; - // init dims_ - void initDims(size_t ndims) { - size_t count = ndims < 4 ? 4 : ndims; - dims_.assign(count, 1); - } +template <> +struct MatrixT { + using type = void; // Not implemented +}; + +template +struct VectorT; + +template <> +struct VectorT { + using type = CpuVector; +}; + +template <> +struct VectorT { + using type = GpuVector; +}; + +template <> +struct VectorT { + using type = CpuIVector; +}; + +template <> +struct VectorT { + using type = GpuIVector; +}; + +} // namespace detail - // number of dimensions - // ndims_ may be not equeal dims_.size() - size_t ndims_; - // number of elements - size_t nelements_; - std::vector dims_; +template +struct Tensor { + typedef typename detail::MatrixT::type Matrix; + typedef typename detail::VectorT::type Vector; }; } // namespace paddle diff --git a/paddle/function/TensorTypeTest.cpp b/paddle/function/TensorTypeTest.cpp index 99c25f42a1e6c..4a86245c2a288 100644 --- a/paddle/function/TensorTypeTest.cpp +++ b/paddle/function/TensorTypeTest.cpp @@ -17,37 +17,31 @@ limitations under the License. */ namespace paddle { -TEST(TensorShape, Constructor) { - TensorShape t1; - EXPECT_EQ(t1.ndims(), 0); - EXPECT_EQ(t1.getElements(), 0); - - TensorShape t2(3); - EXPECT_EQ(t2.ndims(), 3); - EXPECT_EQ(t2.getElements(), 1); - - TensorShape t3({8, 10}); - EXPECT_EQ(t3.ndims(), 2); - EXPECT_EQ(t3.getElements(), 80); - - TensorShape t4(t3); - EXPECT_EQ(t4.ndims(), t3.ndims()); - EXPECT_EQ(t4.getElements(), t3.getElements()); - - TensorShape t5({1, 2, 3, 4, 5}); - EXPECT_EQ(t5.ndims(), 5); - EXPECT_EQ(t5.getElements(), 120); +TEST(TensorType, Matrix) { + Tensor::Matrix matrix(100, 200); + EXPECT_EQ(matrix.getHeight(), 100); + EXPECT_EQ(matrix.getWidth(), 200); + EXPECT_EQ(matrix.getElementCnt(), 100 * 200); + EXPECT_EQ(matrix.useGpu(), false); + + Tensor::Matrix testGpu(100, 200); + EXPECT_EQ(testGpu.useGpu(), true); } -TEST(TensorShape, GetAndSet) { - TensorShape t({1, 2, 3}); - EXPECT_EQ(t.ndims(), 3); - EXPECT_EQ(t.getElements(), 6); - - EXPECT_EQ(t[1], 2); - t.setDim(1, 100); - EXPECT_EQ(t.getElements(), 300); - EXPECT_EQ(t[1], 100); +TEST(TensorType, Vector) { + Tensor::Vector cpuVector(100); + Tensor::Vector gpuVector(100); + EXPECT_EQ(cpuVector.useGpu(), false); + EXPECT_EQ(gpuVector.useGpu(), true); + EXPECT_EQ(cpuVector.getSize(), 100); + EXPECT_EQ(gpuVector.getSize(), 100); + + Tensor::Vector cpuIVector(100); + Tensor::Vector gpuIVector(100); + EXPECT_EQ(cpuIVector.useGpu(), false); + EXPECT_EQ(gpuIVector.useGpu(), true); + EXPECT_EQ(cpuIVector.getSize(), 100); + EXPECT_EQ(gpuIVector.getSize(), 100); } } // namespace paddle From c5c8051657611025eeaf8bc095da09a81fb76a1d Mon Sep 17 00:00:00 2001 From: hedaoyuan Date: Wed, 4 Jan 2017 21:17:56 +0800 Subject: [PATCH 03/11] add BufferArg --- paddle/function/BufferArg.cpp | 43 +++++ paddle/function/BufferArg.h | 260 ++++++++++++++++++++++++++++++ paddle/function/BufferArgTest.cpp | 128 +++++++++++++++ paddle/function/TensorType.h | 5 + 4 files changed, 436 insertions(+) create mode 100644 paddle/function/BufferArg.cpp create mode 100644 paddle/function/BufferArg.h create mode 100644 paddle/function/BufferArgTest.cpp diff --git a/paddle/function/BufferArg.cpp b/paddle/function/BufferArg.cpp new file mode 100644 index 0000000000000..08031917b21e1 --- /dev/null +++ b/paddle/function/BufferArg.cpp @@ -0,0 +1,43 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include + +#include "BufferArg.h" + +namespace paddle { + +const SequenceArg& BufferArg::sequence() const { + // CHECK_EQ(bufferType_, TENSOR_SEQUENCE_DATA); + return dynamic_cast(*this); +} + +const SparseMatrixArg& BufferArg::sparse() const { + // CHECK_EQ(bufferType_, TENSOR_SPARSE); + return dynamic_cast(*this); +} + +void BufferArgs::addArg(const Matrix& arg, const TensorShape& shape) { + args_.push_back(std::make_shared(arg, shape)); +} + +void BufferArgs::addArg(const CpuSparseMatrix& arg) { + args_.push_back(std::make_shared(arg)); +} + +void BufferArgs::addArg(const GpuSparseMatrix& arg) { + args_.push_back(std::make_shared(arg)); +} + +} // namespace paddle diff --git a/paddle/function/BufferArg.h b/paddle/function/BufferArg.h new file mode 100644 index 0000000000000..9fcda7a878aad --- /dev/null +++ b/paddle/function/BufferArg.h @@ -0,0 +1,260 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include + +#include "TensorShape.h" +#include "TensorType.h" +#include "paddle/math/CpuSparseMatrix.h" +#include "paddle/math/Matrix.h" +#include "paddle/math/SparseMatrix.h" + +namespace paddle { + +enum BufferType { + TENSOR_NORMAL = 0, + TENSOR_SEQUENCE_ID = 1, + TENSOR_SEQUENCE_DATA = 2, + TENSOR_SPARSE = 3 +}; + +enum SparseDataType { + SPARSE_NO_VALUE = 0, // do not need value pointer, all values are 1 + SPARSE_FLOAT_VALUE = 1 +}; + +enum SparseDataFormat { SPARSE_CSR_FORMAT = 0, SPARSE_CSC_FORMAT = 1 }; + +/** + * BufferArg used as the argument type for Function. + */ +class BufferArg; +class SequenceArg; +class SparseMatrixArg; +typedef std::shared_ptr BufferArgPtr; + +class BufferArgs { +public: + BufferArgs() {} + size_t size() const { return args_.size(); } + + // add argument into BufferArgss + template + void addArg(const Tensor& arg) { + args_.push_back(std::make_shared(arg)); + } + + void addArg(const Matrix& arg, const TensorShape& shape); + + void addArg(const CpuSparseMatrix& arg); + void addArg(const GpuSparseMatrix& arg); + + // get argument + const BufferArg& operator[](size_t num) const { + CHECK_LT(num, args_.size()); + return *args_[num]; + } + +private: + std::vector args_; +}; + +// an array of arbitrary dimensions +class BufferArg { +public: + BufferArg(void* buf, ValueType valueType, const TensorShape& shape) + : buf_(buf), valueType_(valueType), shape_(shape) {} + + BufferArg(void* buf, ValueType valueType) + : buf_(buf), valueType_(valueType) {} + + BufferArg(const Matrix& matrix) + : buf_((void*)matrix.getData()), + valueType_(DataType::value), + shape_(2) { + shape_.setDim(0, matrix.getHeight()); + shape_.setDim(1, matrix.getWidth()); + } + + BufferArg(const Matrix& matrix, const TensorShape& shape) + : buf_((void*)matrix.getData()), + valueType_(DataType::value), + shape_(shape) { + CHECK_EQ(matrix.getElementCnt(), shape.getElements()); + } + + BufferArg(const Vector& vector) + : buf_((void*)vector.getData()), + valueType_(DataType::value), + shape_(1) { + shape_.setDim(0, vector.getSize()); + } + + BufferArg(const IVector& vector) + : buf_((void*)vector.getData()), valueType_(VALUE_TYPE_INT32), shape_(1) { + shape_.setDim(0, vector.getSize()); + } + + template + typename Tensor::Matrix matrix() const { + CHECK(buf_); + CHECK(valueType_ == DataType::value); + // CHECK(deviceType_ == DType); + CHECK_EQ(2, shape_.ndims()); + return typename Tensor::Matrix( + reinterpret_cast(buf_), shape_[0], shape_[1]); + } + + template + typename Tensor::Vector vector() const { + CHECK(buf_); + CHECK(valueType_ == DataType::value); + // CHECK(deviceType_ == DType); + CHECK_EQ(1, shape_.ndims()); + return typename Tensor::Vector( + shape_[0], reinterpret_cast(buf_)); + } + + virtual ~BufferArg() {} + + template + T* data() const { + return reinterpret_cast(buf_); + } + + void* data() const { return buf_; } + ValueType valueType() const { return valueType_; } + BufferType bufferType() const { return bufferType_; } + const TensorShape& shape() const { return shape_; } + + const SequenceArg& sequence() const; + const SparseMatrixArg& sparse() const; + +protected: + void* buf_; + ValueType valueType_; + TensorShape shape_; + BufferType bufferType_; + // leading dimensions. The size is dims_.size() + // Dims lds_; +}; + +// sequence start positions in a mini-batch of sequences +// shape_.ndims() == 1 +// valueType_ = int32 +// if a < b than value_.buf_[a] < value_.buf_[b] +class SequenceIdArg : public BufferArg { +public: + SequenceIdArg(void* buf, const TensorShape& shape) + : BufferArg(buf, VALUE_TYPE_INT32, shape) { + CHECK_EQ(shape_.ndims(), 1); + numSeqs_ = shape_[0] - 1; + } + + SequenceIdArg(const IVector& vector) : BufferArg(vector) { + numSeqs_ = shape_[0] - 1; + } + + ~SequenceIdArg() {} + + size_t numSeqs() const { return numSeqs_; } + +private: + size_t numSeqs_; +}; + +// sequence data +class SequenceArg : public BufferArg { +public: + SequenceArg(void* buf, + ValueType valueType, + const TensorShape& shape, + const SequenceIdArg& startPositions) + : BufferArg(buf, valueType, shape), startPositions_(startPositions) {} + + SequenceArg(const Matrix& matrix, const IVector& vector) + : BufferArg(matrix), startPositions_(vector) {} + + ~SequenceArg() {} + + void* getIdBuf() const { return startPositions_.data(); } + size_t numSeqs() const { return startPositions_.numSeqs(); } + +private: + SequenceIdArg startPositions_; +}; + +// sparse matrix +// valueType_ == float or double +// shape_.ndims() == 2 +class SparseMatrixArg : public BufferArg { +public: + SparseMatrixArg(void* buf, + ValueType valueType, + const TensorShape& shape, + const BufferArg& row, + const BufferArg& col, + size_t nnz, + SparseDataFormat format, + SparseDataType type) + : BufferArg(buf, valueType, shape), + row_(row), + col_(col), + nnz_(nnz), + format_(format), + type_(type) { + CHECK((valueType == VALUE_TYPE_FLOAT) || (valueType == VALUE_TYPE_DOUBLE)); + CHECK_EQ(shape_.ndims(), 2); + CHECK_EQ(row_.shape().ndims(), 1); + CHECK_EQ(col_.shape().ndims(), 1); + if (format == SPARSE_CSR_FORMAT) { + CHECK_EQ(nnz, col.shape()[0]); + } else if (format == SPARSE_CSC_FORMAT) { + CHECK_EQ(nnz, row.shape()[0]); + } + } + + SparseMatrixArg(const CpuSparseMatrix& sparse) + : BufferArg(sparse), + row_((void*)sparse.getRows(), VALUE_TYPE_INT32), + col_((void*)sparse.getCols(), VALUE_TYPE_INT32) {} + + SparseMatrixArg(const GpuSparseMatrix& sparse) + : BufferArg(sparse), + row_((void*)sparse.getRows(), VALUE_TYPE_INT32), + col_((void*)sparse.getCols(), VALUE_TYPE_INT32) {} + + ~SparseMatrixArg() {} + + void* getRowBuf() const { return row_.data(); } + + void* getColBuf() const { return col_.data(); } + + size_t nnz() const { return nnz_; } + + SparseDataFormat dataFormat() const { return format_; } + + SparseDataType dataType() const { return type_; } + +private: + BufferArg row_; + BufferArg col_; + size_t nnz_; + SparseDataFormat format_; + SparseDataType type_; +}; + +} // namespace paddle diff --git a/paddle/function/BufferArgTest.cpp b/paddle/function/BufferArgTest.cpp new file mode 100644 index 0000000000000..5d669b8137e1a --- /dev/null +++ b/paddle/function/BufferArgTest.cpp @@ -0,0 +1,128 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "BufferArg.h" +#include +#include "paddle/math/MemoryHandle.h" + +namespace paddle { + +TEST(BufferTest, BufferArg) { + TensorShape shape({8, 10}); + CpuMemoryHandle memory(shape.getElements() * + sizeOfValuType(VALUE_TYPE_FLOAT)); + BufferArg buffer(memory.getBuf(), VALUE_TYPE_FLOAT, shape); + EXPECT_EQ(buffer.data(), memory.getBuf()); +} + +TEST(BufferTest, SequenceIdArg) { + TensorShape shape({10}); + CpuMemoryHandle memory(shape.getElements() * + sizeOfValuType(VALUE_TYPE_INT32)); + SequenceIdArg buffer(memory.getBuf(), shape); + EXPECT_EQ(buffer.data(), memory.getBuf()); + EXPECT_EQ(buffer.numSeqs(), 9); +} + +TEST(BufferTest, asArgument) { + MatrixPtr matrix = Matrix::create(100, 200); + VectorPtr vector = Vector::create(100, false); + CpuSparseMatrix sparse(200, 300, 50); + + // prepare arguments + BufferArgs argments; + argments.addArg(*matrix); + argments.addArg(*vector); + argments.addArg(sparse); + + // function + auto function = [=](const BufferArgs& inputs) { + EXPECT_EQ(inputs.size(), 3); + + // check inputs[0] + EXPECT_EQ(inputs[0].shape().ndims(), 2); + EXPECT_EQ(inputs[0].shape()[0], 100); + EXPECT_EQ(inputs[0].shape()[1], 200); + EXPECT_EQ(inputs[0].data(), matrix->getData()); + + EXPECT_EQ(inputs[0].matrix().getHeight(), + matrix->getHeight()); + EXPECT_EQ(inputs[0].matrix().getWidth(), + matrix->getWidth()); + EXPECT_EQ(inputs[0].matrix().getData(), matrix->getData()); + + // check inputs[1] + EXPECT_EQ(inputs[1].shape().ndims(), 1); + EXPECT_EQ(inputs[1].shape()[0], 100); + EXPECT_EQ(inputs[1].data(), vector->getData()); + CpuVector inVector = inputs[1].vector(); + EXPECT_EQ(inVector.getSize(), vector->getSize()); + EXPECT_EQ(inVector.getData(), vector->getData()); + + // check inputs[2] + EXPECT_EQ(inputs[2].shape().ndims(), 2); + EXPECT_EQ(inputs[2].shape()[0], 200); + EXPECT_EQ(inputs[2].shape()[1], 300); + EXPECT_EQ(inputs[2].data(), sparse.getData()); + // CHECK_EQ(inputs[2].sparse().nnz(), 50); + // CHECK_EQ(inputs[2].sparse().dataFormat(), SPARSE_CSR_FORMAT); + // CHECK_EQ(inputs[2].sparse().dataType(), SPARSE_FLOAT_VALUE); + EXPECT_EQ(inputs[2].sparse().getRowBuf(), sparse.getRows()); + EXPECT_EQ(inputs[2].sparse().getColBuf(), sparse.getCols()); + }; + + // call function + function(argments); +} + +template +void FunctionApi(typename Tensor::Matrix& output, + const typename Tensor::Matrix& input); + +template <> +void FunctionApi(CpuMatrix& output, const CpuMatrix& input) { + EXPECT_EQ(output.getHeight(), 100); + EXPECT_EQ(output.getWidth(), 200); +} + +template <> +void FunctionApi(GpuMatrix& output, const GpuMatrix& input) { + EXPECT_EQ(output.getHeight(), 10); + EXPECT_EQ(output.getWidth(), 20); +} + +template +void Function(const BufferArgs& arguments) { + auto input = arguments[0].matrix(); + auto output = arguments[1].matrix(); + FunctionApi(output, input); +} + +TEST(BufferTest, Function) { + CpuMatrix cpuInput = CpuMatrix(100, 200); + CpuMatrix cpuOutput = CpuMatrix(100, 200); + BufferArgs cpuArgments; + cpuArgments.addArg(cpuInput); + cpuArgments.addArg(cpuOutput); + Function(cpuArgments); + + GpuMatrix gpuInput = GpuMatrix(10, 20); + GpuMatrix gpuOutput = GpuMatrix(10, 20); + BufferArgs gpuArgments; + gpuArgments.addArg(gpuInput); + gpuArgments.addArg(gpuOutput); + Function(gpuArgments); +} + +} // namespace paddle diff --git a/paddle/function/TensorType.h b/paddle/function/TensorType.h index 800f71a5b974c..98942cff9e2ea 100644 --- a/paddle/function/TensorType.h +++ b/paddle/function/TensorType.h @@ -57,6 +57,11 @@ struct DataType { static const ValueType value = VALUE_TYPE_DOUBLE; }; +template <> +struct DataType { + static const ValueType value = VALUE_TYPE_INT32; +}; + namespace detail { template From 68156c88c50aff2c614ecc69b56bd5f814dc30be Mon Sep 17 00:00:00 2001 From: hedaoyuan Date: Thu, 5 Jan 2017 19:45:12 +0800 Subject: [PATCH 04/11] Modify the argument type of Function --- paddle/function/CrossMapNormalOp.cpp | 68 +++++++++---------- paddle/function/Function.h | 53 ++------------- paddle/gserver/layers/NormProjectionLayer.cpp | 30 +++++--- paddle/gserver/layers/NormProjectionLayer.h | 2 +- 4 files changed, 56 insertions(+), 97 deletions(-) diff --git a/paddle/function/CrossMapNormalOp.cpp b/paddle/function/CrossMapNormalOp.cpp index f13eb78d27d90..ec27db9c21296 100644 --- a/paddle/function/CrossMapNormalOp.cpp +++ b/paddle/function/CrossMapNormalOp.cpp @@ -125,27 +125,25 @@ class CrossMapNormalFunc : public FunctionBase { pow_ = config.get("pow"); } - void calc(const Arguments& inputs, - const Arguments& outputs, - const Arguments& inouts) override { + void calc(const BufferArgs& inputs, + const BufferArgs& outputs, + const BufferArgs& inouts) override { CHECK_EQ(1, inputs.size()); CHECK_EQ(2, outputs.size()); CHECK_EQ(0, inouts.size()); - CHECK_EQ(inputs[0].dims_.size(), 4); - for (size_t i = 0; i < inputs[0].dims_.size(); i++) { - CHECK_EQ(inputs[0].dims_[i], outputs[0].dims_[i]); - CHECK_EQ(inputs[0].dims_[i], outputs[1].dims_[i]); - } + CHECK_EQ(inputs[0].shape().ndims(), 4); + CHECK(inputs[0].shape() == outputs[0].shape()); + CHECK(inputs[0].shape() == outputs[1].shape()); - size_t samples = inputs[0].dims_[0]; - size_t channels = inputs[0].dims_[1]; - size_t height = inputs[0].dims_[2]; - size_t width = inputs[0].dims_[3]; + size_t samples = inputs[0].shape()[0]; + size_t channels = inputs[0].shape()[1]; + size_t height = inputs[0].shape()[2]; + size_t width = inputs[0].shape()[3]; - CrossMapNormal(outputs[0].getData(), - outputs[1].getData(), - inputs[0].getData(), + CrossMapNormal(outputs[0].data(), + outputs[1].data(), + inputs[0].data(), samples, channels, height, @@ -177,31 +175,29 @@ class CrossMapNormalGradFunc : public FunctionBase { pow_ = config.get("pow"); } - void calc(const Arguments& inputs, - const Arguments& outputs, - const Arguments& inouts) override { + void calc(const BufferArgs& inputs, + const BufferArgs& outputs, + const BufferArgs& inouts) override { CHECK_EQ(4, inputs.size()); CHECK_EQ(1, outputs.size()); CHECK_EQ(0, inouts.size()); - CHECK_EQ(inputs[0].dims_.size(), 4); - for (size_t i = 0; i < inputs[0].dims_.size(); i++) { - CHECK_EQ(inputs[0].dims_[i], inputs[1].dims_[i]); - CHECK_EQ(inputs[0].dims_[i], inputs[2].dims_[i]); - CHECK_EQ(inputs[0].dims_[i], inputs[3].dims_[i]); - CHECK_EQ(inputs[0].dims_[i], outputs[0].dims_[i]); - } - - size_t samples = inputs[0].dims_[0]; - size_t channels = inputs[0].dims_[1]; - size_t height = inputs[0].dims_[2]; - size_t width = inputs[0].dims_[3]; - - CrossMapNormalGrad(outputs[0].getData(), - inputs[0].getData(), - inputs[1].getData(), - inputs[2].getData(), - inputs[3].getData(), + CHECK_EQ(inputs[0].shape().ndims(), 4); + CHECK(inputs[0].shape() == inputs[1].shape()); + CHECK(inputs[0].shape() == inputs[2].shape()); + CHECK(inputs[0].shape() == inputs[3].shape()); + CHECK(inputs[0].shape() == outputs[0].shape()); + + size_t samples = inputs[0].shape()[0]; + size_t channels = inputs[0].shape()[1]; + size_t height = inputs[0].shape()[2]; + size_t width = inputs[0].shape()[3]; + + CrossMapNormalGrad(outputs[0].data(), + inputs[0].data(), + inputs[1].data(), + inputs[2].data(), + inputs[3].data(), samples, channels, height, diff --git a/paddle/function/Function.h b/paddle/function/Function.h index 9e8cbb8e48c30..024575b4f7bcd 100644 --- a/paddle/function/Function.h +++ b/paddle/function/Function.h @@ -16,57 +16,12 @@ limitations under the License. */ #include #include +#include "BufferArg.h" #include "paddle/math/Matrix.h" #include "paddle/utils/ClassRegistrar.h" namespace paddle { -enum DeviceType { - DEVICE_TYPE_UNSPECIFIED = 0, - DEVICE_TYPE_CPU = 1, - DEVICE_TYPE_GPU = 2, -}; - -template -struct MatrixT; - -template <> -struct MatrixT { - using type = CpuMatrix; -}; - -template <> -struct MatrixT { - using type = GpuMatrix; -}; - -template -struct SequenceT; - -template <> -struct SequenceT { - using type = CpuIVector; -}; - -template <> -struct SequenceT { - using type = GpuIVector; -}; - -typedef std::vector Dims; - -class Tensor { -public: - Tensor(real* data, const Dims& dim) : buf_(data), dims_(dim) {} - - real* getData() const { return buf_; } - - real* buf_; - Dims dims_; -}; - -typedef std::vector Arguments; - class FuncConfig { public: union value { @@ -92,9 +47,9 @@ class FunctionBase { virtual void init(const FuncConfig& config) {} - virtual void calc(const Arguments& inputs, - const Arguments& outputs, - const Arguments& inouts) {} + virtual void calc(const BufferArgs& inputs, + const BufferArgs& outputs, + const BufferArgs& inouts) {} static ClassRegistrar funcRegistrar_; }; diff --git a/paddle/gserver/layers/NormProjectionLayer.cpp b/paddle/gserver/layers/NormProjectionLayer.cpp index 262d757c67e10..573de152fd0d5 100644 --- a/paddle/gserver/layers/NormProjectionLayer.cpp +++ b/paddle/gserver/layers/NormProjectionLayer.cpp @@ -71,11 +71,16 @@ void CMRProjectionNormLayer::forward(PassType passType) { Matrix::resizeOrCreate(denoms_, batchSize, size, /* trans */ false, useGpu_); - dims_ = {batchSize, channels_, imgSizeH_, imgSizeW_}; - forward_[0]->calc( - {Tensor(input->getData(), dims_)}, - {Tensor(outV->getData(), dims_), Tensor(denoms_->getData(), dims_)}, - {}); + shape_ = TensorShape({batchSize, channels_, imgSizeH_, imgSizeW_}); + + BufferArgs inputs; + BufferArgs outputs; + BufferArgs inouts; + inputs.addArg(*input, shape_); + outputs.addArg(*outV, shape_); + outputs.addArg(*denoms_, shape_); + + forward_[0]->calc(inputs, outputs, inouts); } void CMRProjectionNormLayer::backward(const UpdateCallback& callback) { @@ -90,11 +95,14 @@ void CMRProjectionNormLayer::backward(const UpdateCallback& callback) { MatrixPtr localOutV = getOutputValue(); MatrixPtr preOutV = inputLayers_[0]->getOutputValue(); - backward_[0]->calc({Tensor(preOutV->getData(), dims_), - Tensor(localOutV->getData(), dims_), - Tensor(localGrad->getData(), dims_), - Tensor(denoms_->getData(), dims_)}, - {Tensor(preOutGrad->getData(), dims_)}, - {}); + BufferArgs inputs; + BufferArgs outputs; + BufferArgs inouts; + inputs.addArg(*preOutV, shape_); + inputs.addArg(*localOutV, shape_); + inputs.addArg(*localGrad, shape_); + inputs.addArg(*denoms_, shape_); + outputs.addArg(*preOutGrad, shape_); + backward_[0]->calc(inputs, outputs, inouts); } } // namespace paddle diff --git a/paddle/gserver/layers/NormProjectionLayer.h b/paddle/gserver/layers/NormProjectionLayer.h index 6b2c5dde0d74d..2c0d8a3a718c4 100644 --- a/paddle/gserver/layers/NormProjectionLayer.h +++ b/paddle/gserver/layers/NormProjectionLayer.h @@ -41,6 +41,6 @@ class CMRProjectionNormLayer : public ResponseNormLayer { void backward(const UpdateCallback& callback = nullptr); protected: - Dims dims_; + TensorShape shape_; }; } // namespace paddle From 41c52d3b0ce619ba25ff9d681ef39613daa1c868 Mon Sep 17 00:00:00 2001 From: hedaoyuan Date: Thu, 5 Jan 2017 20:33:09 +0800 Subject: [PATCH 05/11] Modify the argument type of ContextProjectionFunc --- paddle/function/CMakeLists.txt | 12 +- paddle/function/ContextProjectionOp.cpp | 161 +- paddle/function/ContextProjectionOp.h | 54 +- paddle/function/ContextProjectionOpGpu.cu | 44 +- paddle/function/TensorTypeTest.cpp | 17 + paddle/gserver/layers/ContextProjection.cpp | 42 +- paddle/math/Matrix.h | 4 + paddle/math/Matrix.h~RFbb8b484f.TMP | 1870 +++++++++++++++++++ 8 files changed, 2048 insertions(+), 156 deletions(-) create mode 100644 paddle/math/Matrix.h~RFbb8b484f.TMP diff --git a/paddle/function/CMakeLists.txt b/paddle/function/CMakeLists.txt index 0b3126155d0c0..37c011549eca9 100644 --- a/paddle/function/CMakeLists.txt +++ b/paddle/function/CMakeLists.txt @@ -3,6 +3,7 @@ file(GLOB cpp_files . *Op.cpp) list(APPEND h_files Function.h) list(APPEND cpp_files Function.cpp) +list(APPEND cpp_files BufferArg.cpp) if(WITH_GPU) file(GLOB cu_files . *OpGpu.cu) @@ -16,10 +17,13 @@ if(WITH_TESTING) # TODO: # file(GLOB test_files . *OpTest.cpp) # add_executable(${test_bin} EXCLUDE_FROM_ALL ${test_files}) - add_simple_unittest(CrossMapNormalOpTest) - add_unittest(ContextProjectionOpTest - ContextProjectionOpTest.cpp - ../gserver/tests/TestUtil.cpp) + # add_simple_unittest(CrossMapNormalOpTest) + add_simple_unittest(TensorShapeTest) + add_simple_unittest(TensorTypeTest) + add_simple_unittest(BufferArgTest) + # add_unittest(ContextProjectionOpTest + # ContextProjectionOpTest.cpp + # ../gserver/tests/TestUtil.cpp) endif() endif() diff --git a/paddle/function/ContextProjectionOp.cpp b/paddle/function/ContextProjectionOp.cpp index bd367a859e10c..1a483c47953b1 100644 --- a/paddle/function/ContextProjectionOp.cpp +++ b/paddle/function/ContextProjectionOp.cpp @@ -19,17 +19,15 @@ limitations under the License. */ namespace paddle { template <> -void ContextProjectionForward(CpuMatrix* out_mat, - const CpuMatrix* input_mat, - const CpuMatrix* weight_mat, +void ContextProjectionForward(CpuMatrix& out_mat, + const CpuMatrix& input_mat, + const CpuMatrix& weight_mat, const CpuIVector& seq_vec, size_t context_length, int context_start, size_t begin_pad) { const int* starts = seq_vec.getData(); const size_t num_sequences = seq_vec.getSize() - 1; - auto w_mat = const_cast(weight_mat); - auto in_mat = const_cast(input_mat); for (size_t i = 0; i < num_sequences; ++i) { for (size_t j = 0; j < context_length; ++j) { int begin = starts[i] + context_start + j; @@ -39,10 +37,11 @@ void ContextProjectionForward(CpuMatrix* out_mat, if (begin < starts[i]) { int64_t pad_size = std::min(starts[i] - begin, starts[i + 1] - starts[i]); - MatrixPtr mat = out_mat->subMatrix(starts[i], pad_size); - if (w_mat) { - MatrixPtr sub = w_mat->subMatrix(j, pad_size); - mat->addAtOffset(*sub, j * in_mat->getWidth()); + MatrixPtr mat = out_mat.subMatrix(starts[i], pad_size); + if (weight_mat) { + MatrixPtr sub = + const_cast(weight_mat).subMatrix(j, pad_size); + mat->addAtOffset(*sub, j * input_mat.getWidth()); } dst_begin = starts[i] + pad_size; begin = starts[i]; @@ -50,19 +49,22 @@ void ContextProjectionForward(CpuMatrix* out_mat, if (end > starts[i + 1]) { int64_t pad_size = std::min(end - starts[i + 1], starts[i + 1] - starts[i]); - MatrixPtr mat = out_mat->subMatrix(starts[i + 1] - pad_size, pad_size); - if (w_mat) { - MatrixPtr sub = w_mat->subMatrix( - begin_pad + context_start + j - pad_size, pad_size); - mat->addAtOffset(*sub, j * in_mat->getWidth()); + MatrixPtr mat = out_mat.subMatrix(starts[i + 1] - pad_size, pad_size); + if (weight_mat) { + MatrixPtr sub = + const_cast(weight_mat) + .subMatrix(begin_pad + context_start + j - pad_size, + pad_size); + mat->addAtOffset(*sub, j * input_mat.getWidth()); } dst_end = starts[i + 1] - pad_size; end = starts[i + 1]; } if (end <= begin) continue; - MatrixPtr src = in_mat->subMatrix(begin, end - begin); - MatrixPtr dst = out_mat->subMatrix(dst_begin, dst_end - dst_begin); - dst->addAtOffset(*src, j * in_mat->getWidth()); + MatrixPtr src = + const_cast(input_mat).subMatrix(begin, end - begin); + MatrixPtr dst = out_mat.subMatrix(dst_begin, dst_end - dst_begin); + dst->addAtOffset(*src, j * input_mat.getWidth()); } } } @@ -82,40 +84,34 @@ class ContextProjectionForwardFunc : public FunctionBase { begin_pad_ = config.get("begin_pad"); } - void calc(const Arguments& inputs, - const Arguments& outputs, - const Arguments& inouts) override { + void calc(const BufferArgs& inputs, + const BufferArgs& outputs, + const BufferArgs& inouts) override { CHECK_EQ(3, inputs.size()); CHECK_EQ(1, outputs.size()); CHECK_EQ(0, inouts.size()); - CHECK(outputs[0].getData() && inputs[0].getData() && inputs[2].getData()); - CHECK_EQ(outputs[0].dims_.size(), 2); - CHECK_EQ(inputs[0].dims_.size(), 2); - CHECK_EQ(inputs[1].dims_.size(), 2); - CHECK_EQ(inputs[2].dims_.size(), 1); + CHECK(outputs[0].data() && inputs[0].data() && inputs[2].data()); + CHECK_EQ(outputs[0].shape().ndims(), 2); + CHECK_EQ(inputs[0].shape().ndims(), 2); + CHECK_EQ(inputs[1].shape().ndims(), 2); + CHECK_EQ(inputs[2].shape().ndims(), 1); /// dim of output = dim of input * context_length - CHECK_EQ(outputs[0].dims_[1], inputs[0].dims_[1] * context_length_); + CHECK_EQ(outputs[0].shape()[1], inputs[0].shape()[1] * context_length_); /// dim of input == dim of weight - CHECK_EQ(inputs[0].dims_[1], inputs[1].dims_[1]); + CHECK_EQ(inputs[0].shape()[1], inputs[1].shape()[1]); /// input and output has the same batch_size - CHECK_EQ(inputs[0].dims_[0], outputs[0].dims_[0]); - - auto out_mat = std::make_shared::type>( - outputs[0].getData(), outputs[0].dims_[0], outputs[0].dims_[1]); - const auto in_mat = std::make_shared::type>( - inputs[0].getData(), inputs[0].dims_[0], inputs[0].dims_[1]); - const auto w_mat = - !inputs[1].getData() - ? nullptr - : std::make_shared::type>( - inputs[1].getData(), inputs[1].dims_[0], inputs[1].dims_[1]); - typename SequenceT::type seq_vec( - inputs[2].dims_[0], reinterpret_cast(inputs[2].getData())); - - ContextProjectionForward(out_mat.get(), - in_mat.get(), - w_mat.get(), + CHECK_EQ(inputs[0].shape()[0], outputs[0].shape()[0]); + + auto out_mat = outputs[0].matrix(); + auto in_mat = inputs[0].matrix(); + auto w_mat = !inputs[1].data() + ? typename Tensor::Matrix(nullptr, 0, 0) + : inputs[1].matrix(); + auto seq_vec = inputs[2].vector(); + ContextProjectionForward(out_mat, + in_mat, + w_mat, seq_vec, context_length_, context_start_, @@ -129,18 +125,17 @@ class ContextProjectionForwardFunc : public FunctionBase { }; template <> -void ContextProjectionBackward(CpuMatrix* out_grad_mat, - CpuMatrix* in_grad_mat, - CpuMatrix* w_grad_mat, +void ContextProjectionBackward(CpuMatrix& out_grad_mat, + CpuMatrix& in_grad_mat, + CpuMatrix& w_grad_mat, const CpuIVector& seq_vec, size_t context_length, int context_start, size_t begin_pad, bool is_padding, size_t total_pad) { - CHECK(out_grad_mat); - size_t input_dim = in_grad_mat ? in_grad_mat->getWidth() - : w_grad_mat ? w_grad_mat->getWidth() : 0; + size_t input_dim = in_grad_mat ? in_grad_mat.getWidth() + : w_grad_mat ? w_grad_mat.getWidth() : 0; const int* starts = seq_vec.getData(); size_t num_sequences = seq_vec.getSize() - 1; for (size_t i = 0; i < num_sequences; ++i) { @@ -153,8 +148,8 @@ void ContextProjectionBackward(CpuMatrix* out_grad_mat, int64_t pad_size = std::min(starts[i] - begin, starts[i + 1] - starts[i]); if (is_padding && w_grad_mat) { - MatrixPtr mat = out_grad_mat->subMatrix(starts[i], pad_size); - MatrixPtr sub = w_grad_mat->subMatrix(j, pad_size); + MatrixPtr mat = out_grad_mat.subMatrix(starts[i], pad_size); + MatrixPtr sub = w_grad_mat.subMatrix(j, pad_size); sub->addAtOffset(*mat, j * input_dim); } dst_begin = starts[i] + pad_size; @@ -165,8 +160,8 @@ void ContextProjectionBackward(CpuMatrix* out_grad_mat, std::min(end - starts[i + 1], starts[i + 1] - starts[i]); if (is_padding && w_grad_mat) { MatrixPtr mat = - out_grad_mat->subMatrix(starts[i + 1] - pad_size, pad_size); - MatrixPtr sub = w_grad_mat->subMatrix( + out_grad_mat.subMatrix(starts[i + 1] - pad_size, pad_size); + MatrixPtr sub = w_grad_mat.subMatrix( begin_pad + context_start + j - pad_size, pad_size); sub->addAtOffset(*mat, j * input_dim); } @@ -175,8 +170,8 @@ void ContextProjectionBackward(CpuMatrix* out_grad_mat, } if (end <= begin) continue; if (!in_grad_mat) continue; - MatrixPtr src = in_grad_mat->subMatrix(begin, end - begin); - MatrixPtr dst = out_grad_mat->subMatrix(dst_begin, dst_end - dst_begin); + MatrixPtr src = in_grad_mat.subMatrix(begin, end - begin); + MatrixPtr dst = out_grad_mat.subMatrix(dst_begin, dst_end - dst_begin); src->addAtOffset(*dst, j * input_dim); } } @@ -199,44 +194,37 @@ class ContextProjectionBackwardFunc : public FunctionBase { total_pad_ = config.get("total_pad"); } - void calc(const Arguments& inputs, - const Arguments& outputs, - const Arguments& inouts) override { + void calc(const BufferArgs& inputs, + const BufferArgs& outputs, + const BufferArgs& inouts) override { CHECK_EQ(3, inputs.size()); CHECK_EQ(1, outputs.size()); CHECK_EQ(0, inouts.size()); - CHECK(outputs[0].getData() && inputs[2].getData()); - CHECK_EQ(outputs[0].dims_.size(), 2); - CHECK_EQ(inputs[0].dims_.size(), 2); - CHECK_EQ(inputs[1].dims_.size(), 2); - CHECK_EQ(inputs[2].dims_.size(), 1); + CHECK(outputs[0].data() && inputs[2].data()); + CHECK_EQ(outputs[0].shape().ndims(), 2); + CHECK_EQ(inputs[0].shape().ndims(), 2); + CHECK_EQ(inputs[1].shape().ndims(), 2); + CHECK_EQ(inputs[2].shape().ndims(), 1); /// dim of input == dim of weight - CHECK_EQ(inputs[0].dims_[1], inputs[1].dims_[1]); + CHECK_EQ(inputs[0].shape()[1], inputs[1].shape()[1]); /// input and output has the same batch_size - CHECK_EQ(inputs[0].dims_[0], outputs[0].dims_[0]); + CHECK_EQ(inputs[0].shape()[0], outputs[0].shape()[0]); /// dim of output = dim of input * context_length - CHECK_EQ(outputs[0].dims_[1], inputs[0].dims_[1] * context_length_); + CHECK_EQ(outputs[0].shape()[1], inputs[0].shape()[1] * context_length_); - auto out_grad_mat = std::make_shared::type>( - outputs[0].getData(), outputs[0].dims_[0], outputs[0].dims_[1]); + auto out_grad_mat = outputs[0].matrix(); auto in_grad_mat = - !inputs[0].getData() - ? nullptr - : std::make_shared::type>( - inputs[0].getData(), inputs[0].dims_[0], inputs[0].dims_[1]); - auto w_grad_mat = - !inputs[1].getData() - ? nullptr - : std::make_shared::type>( - inputs[1].getData(), inputs[1].dims_[0], inputs[1].dims_[1]); - typename SequenceT::type seq_vec( - inputs[2].dims_[0], reinterpret_cast(inputs[2].getData())); - - ContextProjectionBackward(out_grad_mat.get(), - in_grad_mat ? in_grad_mat.get() : nullptr, - w_grad_mat ? w_grad_mat.get() : nullptr, + !inputs[0].data() ? typename Tensor::Matrix(nullptr, 0, 0) + : inputs[0].matrix(); + auto w_grad_mat = !inputs[1].data() + ? typename Tensor::Matrix(nullptr, 0, 0) + : inputs[1].matrix(); + auto seq_vec = inputs[2].vector(); + ContextProjectionBackward(out_grad_mat, + in_grad_mat, + w_grad_mat, seq_vec, context_length_, context_start_, @@ -253,6 +241,7 @@ class ContextProjectionBackwardFunc : public FunctionBase { size_t total_pad_; }; +#if 0 /** * \param inputs[0] input grad. * \param inputs[1] input sequence. @@ -272,6 +261,7 @@ class ContextProjectionBackwardDataFunc : public FunctionBase { CHECK_EQ(2, inputs.size()); CHECK_EQ(1, outputs.size()); CHECK_EQ(0, inouts.size()); + CHECK(inputs[0].getData() && outputs[0].getData() && inputs[1].getData()); CHECK_EQ(outputs[0].dims_.size(), 2); CHECK_EQ(inputs[0].dims_.size(), 2); @@ -349,6 +339,7 @@ class ContextProjectionBackwardWeightFunc : public FunctionBase { size_t begin_pad_; size_t total_pad_; }; +#endif REGISTER_TYPED_FUNC(ContextProjectionForward, CPU, @@ -363,6 +354,7 @@ REGISTER_TYPED_FUNC(ContextProjectionForward, REGISTER_TYPED_FUNC(ContextProjectionBackward, GPU, ContextProjectionBackwardFunc); +#if 0 REGISTER_TYPED_FUNC(ContextProjectionBackwardData, GPU, ContextProjectionBackwardDataFunc); @@ -370,4 +362,5 @@ REGISTER_TYPED_FUNC(ContextProjectionBackwardWeight, GPU, ContextProjectionBackwardWeightFunc); #endif +#endif } // namespace paddle diff --git a/paddle/function/ContextProjectionOp.h b/paddle/function/ContextProjectionOp.h index 93eb050fde35f..a558df5e072f2 100644 --- a/paddle/function/ContextProjectionOp.h +++ b/paddle/function/ContextProjectionOp.h @@ -31,14 +31,15 @@ namespace paddle { * \param[in] is_padding whether padding 0 or not. * */ -template -void ContextProjectionForward(typename MatrixT::type* output, - const typename MatrixT::type* input, - const typename MatrixT::type* weight, - const typename SequenceT::type& sequence, - size_t context_length, - int context_start, - size_t begin_pad); +template +void ContextProjectionForward( + typename Tensor::Matrix& output, + const typename Tensor::Matrix& input, + const typename Tensor::Matrix& weight, + const typename Tensor::Vector& sequence, + size_t context_length, + int context_start, + size_t begin_pad); /** * \brief Context Projection Backward. @@ -53,30 +54,31 @@ void ContextProjectionForward(typename MatrixT::type* output, * \param[in] is_padding whether padding 0 or not. * */ -template -void ContextProjectionBackward(typename MatrixT::type* out_grad, - typename MatrixT::type* in_grad, - typename MatrixT::type* w_grad, - const typename SequenceT::type& seq_vec, - size_t context_length, - int context_start, - size_t begin_pad, - bool is_padding, - size_t total_pad); +template +void ContextProjectionBackward( + typename Tensor::Matrix& out_grad, + typename Tensor::Matrix& in_grad, + typename Tensor::Matrix& w_grad, + const typename Tensor::Vector& seq_vec, + size_t context_length, + int context_start, + size_t begin_pad, + bool is_padding, + size_t total_pad); -template +template void ContextProjectionBackwardData( - typename MatrixT::type* out_grad, - typename MatrixT::type* in_grad, - const typename SequenceT::type& sequence, + typename Tensor::Matrix& out_grad, + typename Tensor::Matrix& in_grad, + const typename Tensor::Vector& sequence, size_t context_length, int context_start); -template +template void ContextProjectionBackwardWeight( - typename MatrixT::type* out_grad, - typename MatrixT::type* w_grad, - const typename SequenceT::type& seq_vec, + typename Tensor::Matrix& out_grad, + typename Tensor::Matrix& w_grad, + const typename Tensor::Vector& seq_vec, size_t context_length, int context_start, size_t total_pad, diff --git a/paddle/function/ContextProjectionOpGpu.cu b/paddle/function/ContextProjectionOpGpu.cu index 1ec7058f96c82..6a4a01a651041 100644 --- a/paddle/function/ContextProjectionOpGpu.cu +++ b/paddle/function/ContextProjectionOpGpu.cu @@ -120,20 +120,19 @@ void hl_context_projection_forward(const real* input, } template <> -void ContextProjectionForward(GpuMatrix* output, - const GpuMatrix* input, - const GpuMatrix* weight, +void ContextProjectionForward(GpuMatrix& output, + const GpuMatrix& input, + const GpuMatrix& weight, const GpuIVector& sequence, size_t context_length, int context_start, size_t begin_pad) { - CHECK(input && output); - hl_context_projection_forward(input->getData(), + hl_context_projection_forward(input.getData(), sequence.getData(), - weight ? weight->getData() : nullptr, - output->getData(), + weight ? weight.getData() : nullptr, + output.getData(), sequence.getSize() - 1, - input->getWidth(), + input.getWidth(), context_length, context_start, begin_pad); @@ -217,17 +216,16 @@ void hl_context_projection_backward_data(real* out_grad, } template <> -void ContextProjectionBackwardData(GpuMatrix* out_grad, - GpuMatrix* in_grad, +void ContextProjectionBackwardData(GpuMatrix& out_grad, + GpuMatrix& in_grad, const GpuIVector& sequence, size_t context_length, int context_start) { - CHECK(in_grad && out_grad); - hl_context_projection_backward_data(out_grad->getData(), + hl_context_projection_backward_data(out_grad.getData(), sequence.getData(), - in_grad->getData(), + in_grad.getData(), sequence.getSize() - 1, - in_grad->getWidth(), + in_grad.getWidth(), context_length, context_start); } @@ -348,19 +346,18 @@ void hl_context_projection_backward_weight(real* out_grad, template <> void ContextProjectionBackwardWeight( - GpuMatrix* out_grad, - GpuMatrix* w_grad, + GpuMatrix& out_grad, + GpuMatrix& w_grad, const GpuIVector& seq_vec, size_t context_length, int context_start, size_t total_pad, size_t begin_pad) { - CHECK(out_grad && w_grad); - hl_context_projection_backward_weight(out_grad->getData(), + hl_context_projection_backward_weight(out_grad.getData(), seq_vec.getData(), - w_grad->getData(), + w_grad.getData(), seq_vec.getSize() - 1, - w_grad->getWidth(), + w_grad.getWidth(), total_pad, context_length, context_start, @@ -368,16 +365,15 @@ void ContextProjectionBackwardWeight( } template <> -void ContextProjectionBackward(GpuMatrix* out_grad, - GpuMatrix* in_grad, - GpuMatrix* w_grad, +void ContextProjectionBackward(GpuMatrix& out_grad, + GpuMatrix& in_grad, + GpuMatrix& w_grad, const GpuIVector& sequence, size_t context_length, int context_start, size_t begin_pad, bool is_padding, size_t total_pad) { - CHECK(out_grad); if (in_grad) { ContextProjectionBackwardData( out_grad, diff --git a/paddle/function/TensorTypeTest.cpp b/paddle/function/TensorTypeTest.cpp index 4a86245c2a288..e50e46f3e9911 100644 --- a/paddle/function/TensorTypeTest.cpp +++ b/paddle/function/TensorTypeTest.cpp @@ -44,4 +44,21 @@ TEST(TensorType, Vector) { EXPECT_EQ(gpuIVector.getSize(), 100); } +TEST(TensorType, EmptyMatrix) { + CpuMatrix empty(nullptr, 0, 0); + CpuMatrix nonEmpty(10, 10); + EXPECT_EQ(empty.isEmpty(), true); + EXPECT_EQ(nonEmpty.isEmpty(), false); + CHECK(nonEmpty); + auto function = [](const CpuMatrix& matrix) { + if (matrix) { + EXPECT_NE(matrix.getData(), nullptr); + } else { + EXPECT_EQ(matrix.getData(), nullptr); + } + }; + function(empty); + function(nonEmpty); +} + } // namespace paddle diff --git a/paddle/gserver/layers/ContextProjection.cpp b/paddle/gserver/layers/ContextProjection.cpp index e947b2b9ecbeb..26783a42cac42 100644 --- a/paddle/gserver/layers/ContextProjection.cpp +++ b/paddle/gserver/layers/ContextProjection.cpp @@ -110,7 +110,7 @@ void ContextProjection::forward() { size_t input_dim = in_->value->getWidth(); size_t dim = out_->value->getWidth(); CHECK_EQ(dim, input_dim * config_.context_length()); - size_t batch_size = in_->value->getHeight(); + // size_t batch_size = in_->value->getHeight(); CHECK_EQ(forward_.size(), 1) << "Only one forward function here"; REGISTER_TIMER_INFO("ContextProjectionForward", getName().c_str()); @@ -119,14 +119,17 @@ void ContextProjection::forward() { auto w_ptr = state_ ? state_.get() : is_padding ? weight_->getW().get() : nullptr; auto start_pos = in_->sequenceStartPositions; - forward_[0]->calc({Tensor(in_->value->getData(), Dims{batch_size, input_dim}), - Tensor(w_ptr ? w_ptr->getData() : nullptr, - Dims{w_ptr ? w_ptr->getHeight() : 0, input_dim}), - Tensor(reinterpret_cast( - const_cast(start_pos->getData(useGpu_))), - Dims{start_pos->getSize()})}, - {Tensor(out_->value->getData(), Dims{batch_size, dim})}, - {}); + + BufferArgs inputs; + BufferArgs outputs; + BufferArgs inouts; + inputs.addArg(*in_->value); + inputs.addArg(CpuMatrix(w_ptr ? w_ptr->getData() : nullptr, + w_ptr ? w_ptr->getHeight() : 0, + input_dim)); + inputs.addArg(*in_->sequenceStartPositions->getVector(useGpu_)); + outputs.addArg(*out_->value); + forward_[0]->calc(inputs, outputs, inouts); if (state_ && config_.context_start() < 0) { CHECK_EQ(1, in_->getNumSequences()); @@ -160,15 +163,18 @@ void ContextProjection::backward(const UpdateCallback& callback) { bool is_padding = config_.trainable_padding(); auto start_pos = in_->sequenceStartPositions; auto w_ptr = is_padding ? weight_->getWGrad() : nullptr; - backward_[0]->calc({Tensor(in_->grad ? in_->grad->getData() : nullptr, - Dims{batch_size, input_dim}), - Tensor(w_ptr ? w_ptr->getData() : nullptr, - Dims{w_ptr ? w_ptr->getHeight() : 0, input_dim}), - Tensor(reinterpret_cast( - const_cast(start_pos->getData(useGpu_))), - Dims{start_pos->getSize()})}, - {Tensor(out_->grad->getData(), Dims{batch_size, dim})}, - {}); + + BufferArgs inputs; + BufferArgs outputs; + BufferArgs inouts; + inputs.addArg(CpuMatrix( + in_->grad ? in_->grad->getData() : nullptr, batch_size, input_dim)); + inputs.addArg(CpuMatrix(w_ptr ? w_ptr->getData() : nullptr, + w_ptr ? w_ptr->getHeight() : 0, + input_dim)); + inputs.addArg(*in_->sequenceStartPositions->getVector(useGpu_)); + outputs.addArg(*out_->grad); + backward_[0]->calc(inputs, outputs, inouts); if (config_.trainable_padding()) { weight_->getParameterPtr()->incUpdate(callback); diff --git a/paddle/math/Matrix.h b/paddle/math/Matrix.h index 4865a081a5aaa..60c6560396854 100644 --- a/paddle/math/Matrix.h +++ b/paddle/math/Matrix.h @@ -1091,6 +1091,10 @@ class Matrix : public BaseMatrix { TensorCpuApply(*this, expr); } } + + bool isEmpty() const { return data_ == nullptr; } + + explicit operator bool() const { return !isEmpty(); } }; inline std::ostream& operator<<(std::ostream& os, const Matrix& mat) { diff --git a/paddle/math/Matrix.h~RFbb8b484f.TMP b/paddle/math/Matrix.h~RFbb8b484f.TMP new file mode 100644 index 0000000000000..d89b0f67b3c98 --- /dev/null +++ b/paddle/math/Matrix.h~RFbb8b484f.TMP @@ -0,0 +1,1870 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include +#include + +#include "paddle/utils/Logging.h" +#include "paddle/utils/ThreadLocal.h" + +#include + +#include "BaseMatrix.h" +#include "MemoryHandle.h" +#include "Vector.h" +#include "paddle/utils/ThreadLocal.h" +#include "paddle/utils/common.h" + +namespace paddle { + +enum SparseValueType { NO_VALUE = 0, FLOAT_VALUE = 1 }; + +/** + * @brief matrix sparse_format . + * + * nnz represents nonzero number in sparse matrix. + * + * SPARSE_CSR: row major matrix. length of row is height_ + 1, each element + * represents row start index in Matrix. length of col and value are nnz. + * + * SPARSE_CSC: col major matrix. length of col is width_ + 1, each element + * represents col start index in Matrix. length of col and value are nnz. + * + * @code + * for example: [0, 1, 0, 2, 0; + * 1, 0, 0, 0, 0; + * 0, 0, 0, 2, 5]; + * SPARSE_CSR row [0, 2, 3, 5]; + * col [1, 3, 0, 3, 4]; + * value [1, 2, 1, 2, 5] + * SPARSE_CSC col [0, 1, 2, 2, 4, 5]; + * row [1, 0, 0, 2, 2]; + * value [1, 1, 2, 2, 5] + * @endcode + */ +enum SparseFormat { SPARSE_CSR = 0, SPARSE_CSC = 1 }; + +class Matrix; +class GpuMatrix; +class CpuMatrix; +class CpuSparseMatrix; +class GpuSparseMatrix; +typedef std::shared_ptr MatrixPtr; +typedef std::shared_ptr GpuMatrixPtr; +typedef std::shared_ptr CpuMatrixPtr; +typedef std::shared_ptr GpuSparseMatrixPtr; +typedef std::shared_ptr CpuSparseMatrixPtr; + +/** + * Copy or assignemnt constructor will share the data as opposed to making a + * copy of the original data. To make a copy of the orinal data, use copyFrom() + * instead. + */ +class Matrix : public BaseMatrix { +protected: + Matrix(MemoryHandlePtr memHandle, + size_t height, + size_t width, + bool trans, + bool use_gpu); + + Matrix(real* data, size_t height, size_t width, bool trans, bool use_gpu); + + Matrix(real* data, + size_t height, + size_t width, + size_t stride, + bool trans, + bool use_gpu); + + static ThreadLocal tmpMat_; + +public: + size_t elementCnt_; // maximal number of elements which can be held in data_ + MemoryHandlePtr memoryHandle_; + +public: + virtual ~Matrix() {} + + static MatrixPtr create(MemoryHandlePtr memHandle, + size_t height, + size_t width, + bool trans = false); + static MatrixPtr create(size_t height, + size_t width, + bool trans = false, + bool useGpu = false); + static MatrixPtr create(real* data, + size_t height, + size_t width, + bool trans = false, + bool useGpu = false); + static MatrixPtr create(real* data, + size_t height, + size_t width, + size_t stride, + bool trans = false, + bool useGpu = false); + + static MatrixPtr createSparseMatrix(size_t height, + size_t width, + size_t nnz, + SparseValueType valueType = FLOAT_VALUE, + bool trans = false, + bool useGpu = false); + static MatrixPtr createSparseMatrix(size_t height, + size_t width, + size_t nnz, + SparseValueType valueType = FLOAT_VALUE, + SparseFormat foramt = SPARSE_CSR, + bool trans = false, + bool useGpu = false); + + static MatrixPtr createSparseMatrix(real* data, + int* row, + int* col, + size_t height, + size_t width, + size_t nnz, /* used to allocate space */ + SparseValueType valueType, /*value type*/ + SparseFormat format, + bool trans, + bool useGpu); + + static void resizeOrCreateSparseMatrix( + MatrixPtr& matrix, + size_t height, + size_t width, + size_t nnz, + SparseValueType valueType = FLOAT_VALUE, + SparseFormat foramt = SPARSE_CSR, + bool trans = false, + bool useGpu = false); + + static void resizeOrCreate(MatrixPtr& a, + size_t height, + size_t width, + bool trans = false, + bool useGpu = false); + + /** + * @brief set the data buffer used to hold the matrix data. + * + * caller should make sure that the size of data is at least + * sizeof(real)*height*width. + */ + void setData(real* data) { + BaseMatrix::setData(data); + memoryHandle_.reset(); + } + + /// the data should be contiguous + void setData(real* data, size_t newHeight, size_t newWidth) { + setData(data); + height_ = newHeight; + width_ = newWidth; + elementCnt_ = newHeight * newWidth; + stride_ = width_; + } + + size_t getWidth() const { return width_; } + size_t getHeight() const { return height_; } + size_t getStride() const { return stride_; } + size_t getElementCnt() const { return elementCnt_; } + virtual real* getData() { return data_; } + virtual const real* getData() const { return data_; } + bool isTransposed() const { return trans_; } + bool isContiguous() const { return stride_ == width_ || height_ == 1; } + + // If sparse matrix, need to dynamic_cast to CpuSparseMatrix/GpuSparseMatrix + // befor call the following functions. + // Declare these functions in the base class just easy to call them. + // And these declarations should be moved to base class of sparse matrix + // if refactor sparse matrix + virtual int* getRows() const { + LOG(FATAL) << "Not implemented"; + return nullptr; //! suppress warning for no return value. + } + + virtual int* getCols() const { + LOG(FATAL) << "Not implemented"; + return nullptr; //! suppress warning for no return value. + } + + virtual SparseFormat getFormat() const { + LOG(FATAL) << "Not implemented"; + return SPARSE_CSR; //! suppress warning for no return value. + } + + virtual SparseValueType getValueType() const { + LOG(FATAL) << "Not implemented"; + return NO_VALUE; //! suppress warning for no return value. + } + + /** + * @brief matrix elment-wise add + * + * Named add3 just because add/add2 has been used in BaseMatrix.cu + * and they are not virtual function. + */ + virtual void add3(MatrixPtr b) { LOG(FATAL) << "Not implemented"; } + + MemoryHandlePtr getMemoryHandle() const { return memoryHandle_; } + + virtual void zeroMem() { LOG(FATAL) << "Not implemented"; } + + virtual void resetOne() { LOG(FATAL) << "Not implemented"; } + + void setDiag(real value); + + virtual void copyFrom(const Matrix& src) { LOG(FATAL) << "Not implemented"; } + + virtual void trimFrom(const CpuSparseMatrix& src) { + LOG(FATAL) << "Not implemented"; + } + + // asynchronous copy + virtual void copyFrom(const Matrix& src, hl_stream_t stream) { + LOG(FATAL) << "Not implemented"; + } + + MatrixPtr subMatrix(size_t startRow, + size_t endRow, + size_t startCol, + size_t endCol); + + MatrixPtr subRowMatrix(size_t startRow, size_t endRow) { + return subMatrix(startRow, endRow, 0, getWidth()); + } + + MatrixPtr subColMatrix(size_t startCol, size_t endCol) { + return subMatrix(0, getHeight(), startCol, endCol); + } + + virtual MatrixPtr subMatrix(size_t startRow, size_t numRows) { + CHECK_LE(startRow + numRows, getHeight()); + return Matrix::create(getData() + startRow * getWidth(), + numRows, + getWidth(), + trans_, + useGpu_); + } + virtual MatrixPtr subMatrix(size_t startRow, size_t numRows, MatrixPtr dest) { + CHECK_LE(startRow + numRows, getHeight()); + CHECK_EQ(useGpu_, dest->useGpu_); + dest->setData(this->rowBuf(startRow), numRows, getWidth()); + return dest; + } + + /** + * If this is GpuMatrix, src is assumed to be CPU memory + * + * If this is CpuMatrix, src is assumed to be CPU memory + */ + virtual void copyFrom(const real* src, size_t size) { + LOG(FATAL) << "Not implemented"; + } + + virtual void copyFrom(const real* src, const int64_t* seq) { + LOG(FATAL) << "Not implemented"; + } + + /** + * @brief convert a int vector to a real matrix. + * + * (1) source and dest are both in CPU. + * + * (2) sizes are exactly match. + */ + virtual void copyFrom(const IVector& src) { + LOG(FATAL) << "copy data from int vector only available on CpuMatrix."; + } + + virtual void copyByRowIndex(Matrix& b, const IVector& rowIndex) { + LOG(FATAL) << "Not implemented"; + } + + /** + * @brief Create a matrix with the same type (GpuMatrix, CpuMatrix, + * NonValueSparseMatrix, etc.) as this. + * + * If height and width is zero, the new matrix will have the same size + * as this, otherwise the new matrix will have the specified size. + * + */ + virtual MatrixPtr clone(size_t height = 0, + size_t width = 0, + bool useGpu = false) { + LOG(FATAL) << "Not implemented"; + return nullptr; + } + + virtual real* getRowBuf(size_t row) { + LOG(FATAL) << "Not implemented"; + return nullptr; + } + + virtual real getElement(size_t x, size_t y) const { + LOG(FATAL) << "Not implemented"; + return 0; + } + + virtual real getSum() { + LOG(FATAL) << "Not implemented"; + return 0; + } + + virtual void accumulateColSum(Matrix& src) { + LOG(FATAL) << "Not implemented"; + } + + virtual real getAbsSum() { + LOG(FATAL) << "Not implemented"; + return 0; + } + + /** + * @note Original data may not be preserved after resize(). + */ + virtual void resize(size_t newHeight, size_t newWidth) = 0; + + /** + * @note This should only be used for sparse matrix. + */ + virtual void resize(size_t newHeight, + size_t newWidth, + size_t newNnz, /* total item used to allocate space */ + SparseValueType valueType, + SparseFormat format) = 0; + + /** + * @brief This should only be used for sparse matrix. + * + * Currently must be called for each row in order. + * The matrix is not valid until setRow is called for the last row. + */ + virtual void setRow(size_t row, + size_t colNum, + const unsigned int* cols, + const real* values) = 0; + + virtual MatrixPtr getTranspose() = 0; + + /** + * @brief hard transpose. + * + * allocate matTrans' memory outside, then set memAlloc as false; + * else set as true. + */ + virtual void transpose(MatrixPtr matTrans, bool memAlloc) { + LOG(FATAL) << "Not implemented"; + } + + virtual MatrixPtr getInverse() { + LOG(FATAL) << "Not implemented"; + return nullptr; + } + + /** + * @brief inverse. + * + * if allocate matInv's memory outside, then set memAlloc as false; + * else set as true. + */ + virtual void inverse(MatrixPtr matInv, bool memAlloc) { + LOG(FATAL) << "Not implemented"; + } + +public: + /// Only set all variables to 0 or NULL but not free them. + virtual void clear() { + height_ = 0; + width_ = 0; + data_ = NULL; + } + + void reshape(size_t height, size_t width); + + /// add b to each sample of this. + virtual void addBias(Matrix& b, real scale) { + LOG(FATAL) << "Not implemented"; + } + + virtual void addSharedBias(Matrix& b, real scale) { + LOG(FATAL) << "Not implemented"; + } + + void addBias(Matrix& b, real scale, bool sharedBias) { + if (!sharedBias) { + addBias(b, scale); + } else { + addSharedBias(b, scale); + } + } + + /// add each sample from a to this. + virtual void collectBias(Matrix& a, real scale) { + LOG(FATAL) << "Not implemented"; + } + + virtual void collectSharedBias(Matrix& a, real scale) { + LOG(FATAL) << "Not implemented"; + } + + void collectBias(Matrix& a, real scale, bool sharedBias) { + if (!sharedBias) { + collectBias(a, scale); + } else { + collectSharedBias(a, scale); + } + } + + virtual void sequenceAvgForward(Matrix& a, + const IVector& startsPos, + int mode) { + LOG(FATAL) << "Not implemented"; + } + + /** + * @code + * this = scaleAB*(a*b) + scaleT*this + * @endcode + */ + virtual void mul(const Matrix& a, + const Matrix& b, + real scaleAB, + real scaleT) { + LOG(FATAL) << "Not implemented"; + } + + /// Add a vector (column) b to matrix a, column by column. + virtual void addColumnVector(const Matrix& b) { + LOG(FATAL) << "Not implemented"; + } + + /** + * @code + * For j < codeLength: + * this(i, j) += vec(index(i, j), 0) + * where index(i, j) = ((codes(i) + numClasses) >> (j + 1)) - 1 + * @endcode + */ + virtual void addByBitCode(size_t numClasses, + const IVector& codes, + const Matrix& vec) { + (void)numClasses; + (void)codes; + (void)vec; + LOG(FATAL) << "Not implemeted"; + } + + /** + * @code + * For j < codeLength: + * vec(index(i, j), 0) += this(i, j) + * where index is same as the index for addByBitCode + * @endcode + */ + virtual void addByBitCodeBackward(size_t numClasses, + const IVector& codes, + Matrix& vec) { + (void)numClasses; + (void)codes; + (void)vec; + LOG(FATAL) << "Not implemeted"; + } + + /** + * @code + * For j < codeLength: + * this(i, j) += + * where index is same as the index for addByBitCode + * @endcode + */ + virtual void mulByBitCode(size_t numClasses, + const IVector& codes, + const Matrix& mat, + const Matrix& input) { + (void)numClasses; + (void)codes; + (void)mat; + (void)input; + LOG(FATAL) << "Not implemeted"; + } + + /** + * @code + * For j < codeLength: + * mat.row(index(i, j)) += this(i, j) * input.row(i) + * where index is same as the index for addByBitCode + * @endcode + */ + virtual void mulByBitCodeBackwardWeight(size_t numClasses, + const IVector& codes, + Matrix& mat, + const Matrix& input) { + (void)numClasses; + (void)codes; + (void)mat; + (void)input; + LOG(FATAL) << "Not implemeted"; + } + + /** + * @code + * For j < codeLength: + * input.row(i) += this(i, j) * mat.row(index(i, j)) + * where index is same as the index for addByBitCode + * @endcode + */ + virtual void mulByBitCodeBackwardError(size_t numClasses, + const IVector& codes, + const Matrix& mat, + Matrix& input) { + (void)numClasses; + (void)codes; + (void)mat; + (void)input; + LOG(FATAL) << "Not implemeted"; + } + + /** + * @code + * For j < codeLength + * sum(i, 0) = scaleSum * \sum_j bit(i, j) * this(i, j) + * where bit(i, j) = ((codes(i) + numClasses) & 2^j) ? 1 : 0 + * @endcode + */ + virtual void sumByBitCode(size_t numClasses, + IVector& codes, + Matrix& sum, + real scaleSum) { + (void)numClasses; + (void)codes; + (void)sum; + (void)scaleSum; + LOG(FATAL) << "Not implemeted"; + } + + /** + * @code + * For j < codeLength + * this(i, j) -= bit(i, j) + * where bit(i, j) is same as that for sumByBitCode + * @endcode + */ + virtual void subByBitCode(size_t numClasses_, IVector& codes) { + (void)numClasses_; + (void)codes; + LOG(FATAL) << "Not implemeted"; + } + + /** + * add the sum of each row of this to mat + */ + virtual void rowSum(Matrix& sum) { + (void)sum; + LOG(FATAL) << "Not implemeted"; + } + + /** + * set the max of each row of this to mat + */ + virtual void rowMax(Matrix& max) { + (void)max; + LOG(FATAL) << "Not implemeted"; + } + + /** + * set the max of each column of this to mat + */ + virtual void colMax(Matrix& max) { LOG(FATAL) << "not implemented"; } + + /** + * @brief Get the top k elements of each column of this matrix. + * + * The row ids and values of these elements are stored in + * maxIds and max respectively. where k is the size of maxIds. + * And note that the top k elements are not sorted. + */ + virtual void colMax(IVector& maxIds, Matrix& maxVal) { + LOG(FATAL) << "not implemented"; + } + + virtual void maxoutForward(Matrix& a, + IVector& id, + size_t channels, + size_t groups) { + LOG(FATAL) << "not implemented"; + } + + virtual void maxoutBackward(Matrix& a, + IVector& id, + size_t channels, + size_t groups) { + LOG(FATAL) << "not implemented"; + } + + virtual void rowMaxId(IVector& maxIds) { LOG(FATAL) << "Not implemented"; } + + /** + * @brief Get the top k elements of each row of this matrix. + * + * The column ids and values of these elements are stored in + * maxIds and max respectively. where k is the size of maxIds. + * And note that the top k elements are not sorted. + */ + virtual void rowMax(IVector& maxIds, Matrix& max) { + LOG(FATAL) << "Not implemented"; + } + + /// normalize each row so that the sum of each row is 1. + virtual void rowNormalizeL1(Matrix& out) { + (void)out; + LOG(FATAL) << "Not implemeted"; + } + + /** + * @code + * this = a*b + * @endcode + */ + virtual void mul(const Matrix& a, const Matrix& b) { + LOG(FATAL) << "Not implemented"; + } + + /** + * @code + * this = scaleAB*(this*b) + scaleT*this + * @endcode + */ + virtual void rightMul(Matrix& b, real scaleAB, real scaleT) { + LOG(FATAL) << "Not implemented"; + } + + /** + * @code + * this = this* b + * @endcode + */ + virtual void rightMul(Matrix& b) { LOG(FATAL) << "Not implemented"; } + + /** + * @code + * this = scaleAB*(a*this) + scaleT*this + * @endcode + */ + virtual void leftMul(Matrix& a, real scaleAB, real scaleT) { + LOG(FATAL) << "Not implemented"; + } + + /** + * @code + * this = a*this) + * @endcode + */ + virtual void leftMul(Matrix& a) { LOG(FATAL) << "Not implemented"; } + + /// merge the element for each col. + virtual void colMerge(Matrix& src) { LOG(FATAL) << "Not implemented"; } + + /// copy -log(output[label]) to this->data[i]. + virtual void oneHotCrossEntropy(Matrix& output, IVector& label) { + LOG(FATAL) << "Not implemented"; + } + + /// calculate the error of outputV according to label. + virtual void oneHotCrossEntropyBp(Matrix& outputV, IVector& label) { + LOG(FATAL) << "Not implemented"; + } + + /// copy -log(output[label]) to this->data[i]. + virtual void oneHotCrossEntropyWithSelfNorm(Matrix& output, + IVector& label, + real alpha) { + LOG(FATAL) << "Not implemented"; + } + + /// calculate the error of outputV according to label. + virtual void oneHotCrossEntropyWithSelfNormBp(Matrix& outputV, + IVector& label, + real alpha) { + LOG(FATAL) << "Not implemented"; + } + + /** + * \f[ + * a[i] = \sum_{j=-(N-1)/2}^{(N-1)/2} b_{i+j} * c_{j} + * \f] + * + * b contains M elements, + * c contains N elements (N is odd), + * b's index arithmetic is computed modulo M, + * c's index arithmetic is computed modulo N. + */ + virtual void circularConv(Matrix& b, Matrix& c) { + LOG(FATAL) << "Not implemented"; + } + + virtual void circularConvDerivative(Matrix& output, + Matrix& prevOut1, + Matrix& prevOut2, + Matrix& prevGrad1, + Matrix& prevGrad2) { + LOG(FATAL) << "Not implemented"; + } + + /* output_ij = exp(this_{ij}) / (sum_j exp(this_ij)) */ + virtual void softmax(Matrix& output) { + (void)output; + LOG(FATAL) << "Not implemeted"; + } + virtual void sequenceSoftmax(Matrix& output, const IVector& index) { + (void)output; + LOG(FATAL) << "Not implemeted"; + } + + virtual void softmaxBackward(Matrix& outputV) { + (void)outputV; + LOG(FATAL) << "Not implemeted"; + } + + /* + sum_i = sum_j this_ij * output_ij + this_ij = output_ij* (this_ij - sum_i) + */ + virtual void softmaxDerivative(Matrix& output, Matrix& sftmaxSum) { + LOG(FATAL) << "Not implemented"; + } + + /// calculate the sum of squares diff cost. + virtual void sumOfSquares(Matrix& output, Matrix& label) { + LOG(FATAL) << "Not implemented"; + } + + /// gradient of sumOfSquares. + virtual void sumOfSquaresBp(Matrix& outputV, Matrix& label) { + LOG(FATAL) << "Not implemented"; + } + + virtual void tanh(Matrix& output) { LOG(FATAL) << "Not implemented"; } + + virtual void tanhDerivative(Matrix& output) { + LOG(FATAL) << "Not implemented"; + } + + virtual void softrelu(Matrix& output) { LOG(FATAL) << "Not implemented"; } + + virtual void softreluDerivative(Matrix& output) { + LOG(FATAL) << "Not implemented"; + } + + virtual void scaledTanh(Matrix& output, real p1, real p2) { + LOG(FATAL) << "Not implemented"; + } + + /** + * cosine similarity, for each row i, + * this[i] = cos(output1[i], output2[i]) + * + * output2 can only have one row, then for each row i, + * this[i] = cos(output1[i], output2[0]) + */ + virtual void cosSim(Matrix& output1, Matrix& output2, real scale = 1.0f) { + LOG(FATAL) << "Not implemented"; + } + + virtual void cosSimDerivative(Matrix& output, + Matrix& prevOut1, + Matrix& prevOut2, + Matrix& prevGrad1, + Matrix& prevGrad2, + real scale = 1.0f) { + LOG(FATAL) << "Not implemented"; + } + + /// print out the values of elements to os + virtual void print(std::ostream& os) const { + LOG(FATAL) << "Not implemented"; + } + + /** + * print a part of the matrix + * from the (top,left) value to the (height, width) value (not included) + */ + virtual void print(std::ostream& os, size_t height, size_t width) const { + LOG(FATAL) << "Not implemented"; + } + + /// print one row to os + virtual void printOneRow(std::ostream& os, size_t idx) const { + LOG(FATAL) << "Not implemented"; + } + + virtual void check(std::ostream& os, Matrix& refMat, bool printDiff = true) {} + + virtual real getMin() { + LOG(FATAL) << "Not implemented"; + return 0; + } + virtual real getMax() { + LOG(FATAL) << "Not implemented"; + return 0; + } + + virtual void randomizeUniform() { LOG(FATAL) << "Not implemented"; } + + /** + * @brief calulate the error of classification + * + * output[i] = 1 if row i is an error. + * + * output[i] = 0 if row i is correct. + */ + virtual void classificationError(Matrix& output, IVector& label) { + LOG(FATAL) << "Not implemented"; + } + + /** + * This function is used to calculate the convolution: + * + * It will expand a feature matrix according to the + * convolution filters + */ + virtual void convExpand(Matrix& feature, + int feaImgHeight, + int feaImgWidth, + int channels, + int blockH, + int blockW, + int strideH, + int strideW, + int paddingH, + int paddingW, + int outputH, + int outputW) { + LOG(FATAL) << "Not implemeted"; + } + + /** + * This function is the reverse implementation of convExpand: + * + * Its function is to restore a expanded-matrix into a feature matrix + */ + virtual void convShrink(Matrix& expandColMat, + int thisImgHeight, + int thisImgWidth, + int channels, + int blockH, + int blockW, + int strideH, + int strideW, + int paddingH, + int paddingW, + int outputH, + int outputW, + real alpha = 1.0f, + real beta = 0.0f) { + LOG(FATAL) << "Not implemeted"; + } + + /** + * Pooling forward operation, pick out the largest element + * in the sizeX of value + */ + virtual void maxPoolForward(Matrix& inputMat, + size_t imgSizeH, + size_t imgSizeW, + size_t channels, + size_t sizeX, + size_t sizeY, + size_t strideH, + size_t strideW, + size_t outputH, + size_t outputW, + size_t paddingH, + size_t paddingW) { + LOG(FATAL) << "Not implemeted"; + } + + /// Pooling backward operation. + virtual void maxPoolBackward(Matrix& image, + size_t imgSizeH, + size_t imgSizeW, + Matrix& outGrad, + Matrix& outV, + size_t sizeX, + size_t sizeY, + size_t strideH, + size_t strideW, + size_t outputH, + size_t outputW, + real scaleTargets, + real scaleOutput, + size_t paddingH, + size_t paddingW) { + LOG(FATAL) << "Not implemeted"; + } + + /// Pooling forward operation, caculate the average of sizeX elements. + virtual void avgPoolForward(Matrix& input, + size_t imgSizeH, + size_t imgSizeW, + size_t channels, + size_t sizeX, + size_t sizeY, + size_t strideH, + size_t strideW, + size_t outputH, + size_t outputW, + size_t paddingH, + size_t paddingW) { + LOG(FATAL) << "Not implemeted"; + } + + virtual void avgPoolBackward(Matrix& input, + size_t imgSizeH, + size_t imgSizeW, + size_t sizeX, + size_t sizeY, + size_t strideH, + size_t strideW, + size_t outputH, + size_t outputW, + real scaleTargets, + real scaleOutput, + size_t paddingH, + size_t paddingW) { + LOG(FATAL) << "Not implemeted"; + } + + /** + * Input: one or more sequences. Each sequence contains some instances. + * + * Output: output size is the number of input sequences (NOT input + * instances). + * + * output[i] is set to max_input[i]. + */ + virtual void maxSequenceForward(Matrix& input, + const IVector& sequence, + IVector& index) { + LOG(FATAL) << "Not implemeted"; + } + + virtual void maxSequenceBackward(Matrix& outputGrad, + const IVector& sequence, + IVector& index) { + LOG(FATAL) << "Not implemeted"; + } + + /** + * @code + * this.row[i] += table.row[ids[i]] + * if ids[i] == -1, it will be ignored + * @endcode + */ + virtual void selectRows(Matrix& table, IVector& ids) { + (void)table; + (void)ids; + LOG(FATAL) << "Not implemented"; + } + + /** + * @code + * this[i] = table[i, id[i]] + * @endcode + */ + virtual void selectElements(Matrix& table, IVector& ids) { + LOG(FATAL) << "Not implemented"; + } + + /** + * @code + * table.row[ids[i]] += this.row[i] + * if ids[i] == -1, it will be ignored + * @endcode + */ + virtual void addToRows(Matrix& table, IVector& ids) { + (void)table; + (void)ids; + LOG(FATAL) << "Not implemented"; + } + + /** + * @code + * table[i, id[i]] += this[i] + * @endcode + */ + virtual void addElements(Matrix& table, IVector& ids) { + LOG(FATAL) << "Not implemented"; + } + /** + * @brief cross entropy for multi binary labels + * + * @code + * this[i] = -sum(label[i][j]*log(output[i][j]) + * + (1-label[i][j])*log(1-output[i][j])) + * @endcode + */ + virtual void multiBinaryLabelCrossEntropy(Matrix& output, Matrix& label) { + LOG(FATAL) << "Not implemented"; + } + + /** + * @brief The gradient of cross entropy for multi binary labels on output + * + * @code + * this[i][j] = -label[i][j]/output[i][j] + * + (1-label[i][j])/(1-output[i][j]) + * @endcode + */ + virtual void multiBinaryLabelCrossEntropyBp(Matrix& output, Matrix& label) { + LOG(FATAL) << "Not implemented"; + } + + /** + * @brief Calculate the classification error for multi binary labels + * + * @code + * this[i] = sum((output[i][j] >= threshold && label[i][j] == 0) + * || (output[i][j] < threshold && label[i][j] == 1)) + * / output->getWidth() + * @endcode + */ + virtual void classificationErrorMulti(Matrix& output, + Matrix& label, + real threshold) { + LOG(FATAL) << "Not implemented"; + } + + virtual void paramReluForward(Matrix& data, Matrix& W) { + LOG(FATAL) << "Not implemented"; + } + virtual void paramReluBackwardW(Matrix& oGrad, Matrix& data) { + LOG(FATAL) << "Not implemented"; + } + virtual void paramReluBackwardDiff(Matrix& oGrad, Matrix& data, Matrix& W) { + LOG(FATAL) << "Not implemented"; + } + + virtual void bilinearForward(const Matrix& in, + const size_t inImgH, + const size_t inImgW, + const size_t outImgH, + const size_t outImgW, + const size_t numChannels, + const real ratioH, + const real ratioW) { + LOG(FATAL) << "Not implemented"; + } + virtual void bilinearBackward(const Matrix& out, + const size_t outImgH, + const size_t outImgW, + const size_t inImgH, + const size_t inImgW, + const size_t numChannels, + const real ratioH, + const real ratioW) { + LOG(FATAL) << "Not implemented"; + } + + template + void operator=(const ExpressionType& expr) { + if (useGpu_) { + TensorGpuApply(*this, expr); + } else { + TensorCpuApply(*this, expr); + } + } + + bool isEmpty() const { + return data_ == nullptr; + } + + explicit operator bool() const { + return !isEmpty(); + } +}; + +inline std::ostream& operator<<(std::ostream& os, const Matrix& mat) { + mat.print(os); + return os; +} + +class GpuMatrix : public Matrix { +public: + GpuMatrix(); + + GpuMatrix(size_t height, size_t width, bool trans = false); + GpuMatrix(real* data, size_t height, size_t width, bool trans = false) + : Matrix(data, height, width, trans, true) {} + GpuMatrix(real* data, + size_t height, + size_t width, + size_t stride, + bool trans = false) + : Matrix(data, height, width, stride, trans, true) {} + GpuMatrix(GpuMemHandlePtr dataHandle, + size_t height, + size_t width, + bool trans = false) + : Matrix(dataHandle, height, width, trans, true) {} + ~GpuMatrix(); + + void zeroMem(); + void resetOne(); + void setDiag(real value); + + void resize(size_t newHeight, size_t newWidth); + void resize(size_t newHeight, + size_t newWidth, + size_t newNnz, /* used to allocate space */ + SparseValueType valueType, + SparseFormat format) { + LOG(FATAL) << "Only Support Sparse Matrix"; + } + void setRow(size_t row, + size_t colNum, + const unsigned int* cols, + const real* values) { + LOG(FATAL) << "Only Support Sparse Matrix"; + } + + /** + * Copy the data from cpu_memory buffer + */ + void copyFrom(const real* hostSrc, size_t size); + + void copyFrom(const real* hostSrc, const int64_t* seq); + + void copyFrom(const Matrix& src, hl_stream_t stream); + + void copyFrom(const Matrix& src); + + void copyFrom(const IVector& src); + + void copyByRowIndex(Matrix& b, const IVector& rowIndex); + + MatrixPtr clone(size_t height, size_t width, bool useGpu = false); + + real getElement(size_t x, size_t y) const; + + real* getRow(size_t row) { return BaseMatrix::rowBuf(row); } + virtual real* getRowBuf(size_t row) { return getRow(row); } + + real getSum(); + void accumulateColSum(Matrix& src); + real getAbsSum(); + + MatrixPtr getTranspose(); + void transpose(MatrixPtr matTrans, bool memAlloc); + + MatrixPtr getInverse(); + void inverse(MatrixPtr matInv, bool memAlloc); + + /// add b to each sample of this. + void addBias(Matrix& b, real scale); + void addSharedBias(Matrix& b, real scale); + + /** + * @code + * add each sample from a to this. + * @endcode + */ + void collectBias(Matrix& a, real scale); + void collectSharedBias(Matrix& a, real scale); + + void sequenceAvgForward(Matrix& a, const IVector& startsPos, int mode); + + /** + * @code + * this.row[i] += table.row[ids[i]] + * @endcode + */ + virtual void selectRows(Matrix& table, IVector& ids); + + /** + * @code + * this[i] = table[i, id[i]] + * @endcode + */ + virtual void selectElements(Matrix& table, IVector& ids); + + /** + * @code + * table.row[ids[i]] += this.row[i] + * @endcode + */ + virtual void addToRows(Matrix& table, IVector& ids); + + void addColumnVector(const Matrix& b); + + /** + * @code + * this = scaleAB*(a*b) + scaleT*this + * @endcode + */ + void mul(const Matrix& a, const Matrix& b, real scaleAB, real scaleT); + + /** + * @code + * this = a*b + * @endcode + */ + void mul(const Matrix& a, const Matrix& b); + + void mul(const GpuMatrix& a, const GpuMatrix& b, real scaleAB, real scaleT); + + void mul(const GpuSparseMatrix& a, + const GpuMatrix& b, + real scaleAB, + real scaleT); + + void mul(const GpuMatrix& a, + const GpuSparseMatrix& b, + real scaleAB, + real scaleT); + + /** + * @code + * this = scaleAB*(this*b) + scaleT*this + * @endcode + */ + void rightMul(Matrix& b, real scaleAB, real scaleT); + + /** + * @code + * this = this* b + * @endcode + */ + void rightMul(Matrix& b); + + /** + * @code + * this = scaleAB*(a*this) + scaleT*this + * @endcode + */ + void leftMul(Matrix& a, real scaleAB, real scaleT); + + /** + * @code + * this = a*this + * @endcode + */ + void leftMul(Matrix& a); + + void colMerge(Matrix& src); + void rowSum(Matrix& sum); + void rowMax(Matrix& max); + void rowMax(IVector& maxIds, Matrix& max); + void colMax(Matrix& max); + void colMax(IVector& maxIds, Matrix& max); + void maxoutForward(Matrix& a, IVector& id, size_t channels, size_t groups); + void maxoutBackward(Matrix& a, IVector& id, size_t channels, size_t groups); + + void oneHotCrossEntropy(Matrix& output, IVector& label); + void oneHotCrossEntropyBp(Matrix& outputV, IVector& label); + void oneHotCrossEntropyWithSelfNorm(Matrix& output, + IVector& label, + real alpha); + void oneHotCrossEntropyWithSelfNormBp(Matrix& outputV, + IVector& label, + real alpha); + + void softmax(Matrix& output); + void sequenceSoftmax(Matrix& output, const IVector& index); + void softmaxBackward(Matrix& outputV); + void softmaxDerivative(Matrix& output, Matrix& sftmaxSum); + + /// calculate the sum of squares diff cost. + void sumOfSquares(Matrix& output, Matrix& label); + + /// gradient of sumOfSquares. + void sumOfSquaresBp(Matrix& outputV, Matrix& label); + void tanh(Matrix& output); + void tanhDerivative(Matrix& output); + void softrelu(Matrix& output); + void softreluDerivative(Matrix& output); + void scaledTanh(Matrix& output, real p1, real p2); + + void cosSim(Matrix& output1, Matrix& output2, real scale); + void cosSimDerivative(Matrix& output, + Matrix& prevOut1, + Matrix& prevOut2, + Matrix& prevGrad1, + Matrix& prevGrad2, + real scale); + + virtual void print(std::ostream& os) const; + virtual void print(std::ostream& os, size_t height, size_t width) const; + + void paramReluForward(Matrix& data, Matrix& W); + void paramReluBackwardW(Matrix& oGrad, Matrix& data); + void paramReluBackwardDiff(Matrix& oGrad, Matrix& data, Matrix& W); + + void check(std::ostream& os, Matrix& refMat, bool printDiff = true); + void randomizeUniform(); + + void classificationError(Matrix& output, IVector& label); + + void convExpand(Matrix& feature, + int feaImgHeight, + int feaImgWidth, + int channels, + int blockH, + int blockW, + int strideH, + int strideW, + int paddingH, + int paddingW, + int outputH, + int outputW); + + void convShrink(Matrix& expandColMat, + int thisImgHeight, + int thisImgWidth, + int channels, + int blockH, + int blochW, + int strideH, + int strideW, + int paddingH, + int paddingWreal, + int outputH, + int outputW, + real alpha = 1.0f, + real beta = 0.0f); + + void maxPoolForward(Matrix& inputMat, + size_t imgSizeH, + size_t imgSizeW, + size_t channels, + size_t sizeX, + size_t sizeY, + size_t strideH, + size_t strideW, + size_t outputH, + size_t outputW, + size_t paddingH, + size_t paddingW); + + void maxPoolBackward(Matrix& image, + size_t imgSizeH, + size_t imgSizeW, + Matrix& outGrad, + Matrix& outV, + size_t sizeX, + size_t sizeY, + size_t strideH, + size_t strideW, + size_t outputH, + size_t outputW, + real scaleTargets, + real scaleOutput, + size_t paddingH, + size_t paddingW); + + void avgPoolForward(Matrix& input, + size_t imgSizeH, + size_t imgSizeW, + size_t channels, + size_t sizeX, + size_t sizeY, + size_t strideH, + size_t strideW, + size_t outputH, + size_t outputW, + size_t paddingH, + size_t paddingW); + + void avgPoolBackward(Matrix& input, + size_t imgSizeH, + size_t imgSizeW, + size_t sizeX, + size_t sizeY, + size_t strideH, + size_t strideW, + size_t outputH, + size_t outputW, + real scaleTargets, + real scaleOutput, + size_t paddingH, + size_t paddingW); + + void maxSequenceForward(Matrix& input, + const IVector& sequence, + IVector& index); + + void maxSequenceBackward(Matrix& outputGrad, + const IVector& sequence, + IVector& index); + + void bilinearForward(const Matrix& in, + const size_t inImgH, + const size_t inImgW, + const size_t outImgH, + const size_t outImgW, + const size_t numChannels, + const real ratioH, + const real ratioW); + + void bilinearBackward(const Matrix& out, + const size_t outImgH, + const size_t outImgW, + const size_t inImgH, + const size_t inImgW, + const size_t numChannels, + const real ratioH, + const real ratioW); + + void multiBinaryLabelCrossEntropy(Matrix& output, Matrix& label); + + void multiBinaryLabelCrossEntropyBp(Matrix& output, Matrix& label); + + template + void operator=(const ExpressionType& expr) { + TensorGpuApply(*this, expr); + } +}; + +class CpuMatrix : public Matrix { +public: + CpuMatrix(size_t height, size_t width, bool trans = false); + CpuMatrix(real* data, size_t height, size_t width, bool trans = false) + : Matrix(data, height, width, trans, false) {} + CpuMatrix(real* data, + size_t height, + size_t width, + size_t stride, + bool trans = false) + : Matrix(data, height, width, stride, trans, false) {} + + CpuMatrix(CpuMemHandlePtr dataHandle, + size_t height, + size_t width, + bool trans = false) + : Matrix(dataHandle, height, width, trans, false) {} + + ~CpuMatrix(); + + void zeroMem(); + void resetOne(); + void setDiag(real value); + + void resize(size_t newHeight, size_t newWidth); + void resize(size_t newHeight, + size_t newWidth, + size_t newNnz, /* used to allocate space */ + SparseValueType valueType, + SparseFormat format) { + LOG(FATAL) << "Only Support Sparse Matrix"; + } + void setRow(size_t row, + size_t colNum, + const unsigned int* cols, + const real* values) { + LOG(FATAL) << "Only Support Sparse Matrix"; + } + + real getElement(size_t x, size_t y) const; + real getSum(); + void accumulateColSum(Matrix& src); + real getAbsSum(); + + MatrixPtr getTranspose(); + void transpose(MatrixPtr matTrans, bool memAlloc); + + MatrixPtr getInverse(); + void inverse(MatrixPtr matInv, bool memAlloc); + + void copyFrom(const Matrix& src); + + void copyFrom(const Matrix& src, hl_stream_t stream); + + void copyFrom(const real* cpuSrc, size_t size); + + void copyFrom(const real* cpuSrc, const int64_t* seq); + + void copyFrom(const IVector& src); + + void copyFrom(CpuSparseMatrix& src); + + void copyByRowIndex(Matrix& b, const IVector& rowIndex); + + MatrixPtr clone(size_t height, size_t width, bool useGpu = false); + + void convExpand(Matrix& feature, + int feaImgHeight, + int feaImgWidth, + int channels, + int blcokH, + int blockW, + int strideH, + int strideW, + int paddingH, + int paddingW, + int outputH, + int outputW); + + void convShrink(Matrix& expandFeat, + int thisImgHeight, + int thisImgWidth, + int channels, + int blockH, + int blockW, + int strideH, + int strideW, + int paddingH, + int paddingW, + int outputH, + int outputW, + real alpha = 1.0f, + real beta = 0.0f); + + void maxPoolForward(Matrix& inputMat, + size_t imgSizeH, + size_t imgSizeW, + size_t channels, + size_t sizeX, + size_t sizeY, + size_t strideH, + size_t strideW, + size_t outputH, + size_t outputW, + size_t paddingH, + size_t paddingW); + + void maxPoolBackward(Matrix& image, + size_t imgSizeH, + size_t imgSizeW, + Matrix& outGrad, + Matrix& outV, + size_t sizeX, + size_t sizeY, + size_t strideH, + size_t strideW, + size_t outputH, + size_t outputW, + real scaleTargets, + real scaleOutput, + size_t paddingH, + size_t paddingW); + + void avgPoolForward(Matrix& input, + size_t imgSizeH, + size_t imgSizeW, + size_t channels, + size_t sizeX, + size_t sizeY, + size_t strideH, + size_t strideW, + size_t outputH, + size_t outputW, + size_t paddingH, + size_t paddingW); + + void avgPoolBackward(Matrix& input, + size_t imgSizeH, + size_t imgSizeW, + size_t sizeX, + size_t sizeY, + size_t strideH, + size_t strideW, + size_t outputH, + size_t outputW, + real scaleTargets, + real scaleOutput, + size_t paddingH, + size_t paddingW); + + void maxSequenceForward(Matrix& input, + const IVector& sequence, + IVector& index); + + void maxSequenceBackward(Matrix& outputGrad, + const IVector& sequence, + IVector& index); + + real* getRow(size_t row) { return BaseMatrix::rowBuf(row); } + virtual real* getRowBuf(size_t row) { return getRow(row); } + +public: + /// add b to each sample of this. + void addBias(Matrix& b, real scale); + void addSharedBias(Matrix& b, real scale); + + /// add each sample of a to this. + void collectBias(Matrix& a, real scale); + void collectSharedBias(Matrix& a, real scale); + + void sequenceAvgForward(Matrix& a, const IVector& startsPos, int mode); + + /** + * @code + * this.row[i] += table.row[ids[i]] + * @endcode + */ + virtual void selectRows(Matrix& table, IVector& ids); + + /** + * @code + * table.row[ids[i]] += this.row[i] + * @endcode + */ + virtual void addToRows(Matrix& table, IVector& ids); + + /** + * @code + * this[i] = table[i, id[i]] + * @endcode + */ + virtual void selectElements(Matrix& table, IVector& ids); + + /** + * @code + * table[i, id[i]] += this[i] + * @endcode + */ + virtual void addElements(Matrix& table, IVector& ids); + + /** + * use abstract getRow() to get row from table. + * + * Define table as template instead of virtual class for performance sake. + * internal used by above two virtual funcs. + */ + template + void selectRowsImp(TableMatType& table, IVector& ids); + template + void addToRowsImp(TableMatType& table, IVector& ids); + + void addColumnVector(const Matrix& b); + + void mul(const Matrix& a, const Matrix& b, real scaleAB, real scaleT); + void mul(CpuMatrix* a, CpuMatrix* b, real scaleAB, real scaleT); + + void mul(CpuMatrix* a, CpuSparseMatrix* b, real scaleAB, real scaleT); + + static void mul(CpuMatrix* a, + CpuMatrix* b, + CpuSparseMatrix* c, + real scaleAB, + real scaleT); + + /** + * c = a * b + * + * use abstract getRow() to get row from B,C. + * Define B,C as template instead of virtual class for performance sake. + */ + template + static void mul( + CpuSparseMatrix* a, MatBType* b, MatCType* c, real scaleAB, real scaleT); + + virtual void mul(CpuSparseMatrix* a, CpuMatrix* b, real scaleAB, real scaleT); + + void mul(const Matrix& a, const Matrix& b); + + void rightMul(Matrix& b, real scaleAB, real scaleT); + void rightMul(Matrix& b); + + void leftMul(Matrix& a, real scaleAB, real scaleT); + void leftMul(Matrix& a); + void colMerge(Matrix& src); + void rowSum(Matrix& sum); + void rowMaxId(IVector& maxIds); + void rowMax(Matrix& max); + void rowMax(IVector& maxIds, Matrix& maxVal); + void colMax(Matrix& max); + void colMax(IVector& maxIds, Matrix& maxVal); + void maxoutForward(Matrix& a, IVector& id, size_t channels, size_t groups); + void maxoutBackward(Matrix& a, IVector& id, size_t channels, size_t groups); + void rowNormalizeL1(Matrix& out); + + void oneHotCrossEntropy(Matrix& output, IVector& label); + void oneHotCrossEntropyBp(Matrix& outputV, IVector& label); + void oneHotCrossEntropyWithSelfNorm(Matrix& output, + IVector& label, + real alpha); + void oneHotCrossEntropyWithSelfNormBp(Matrix& outputV, + IVector& label, + real alpha); + + void circularConv(Matrix& b, Matrix& c); + void circularConvDerivative(Matrix& output, + Matrix& prevOut1, + Matrix& prevOut2, + Matrix& prevGrad1, + Matrix& prevGrad2); + + void softmax(Matrix& output); + void sequenceSoftmax(Matrix& output, const IVector& index); + void softmaxDerivative(Matrix& output, Matrix& sftmaxSum); + + /// calculate the sum of squares diff cost. + void sumOfSquares(Matrix& output, Matrix& label); + + /// gradient of sumOfSquares. + void sumOfSquaresBp(Matrix& outputV, Matrix& label); + + void tanh(Matrix& output); + void tanhDerivative(Matrix& output); + + void softrelu(Matrix& output); + void softreluDerivative(Matrix& output); + void scaledTanh(Matrix& output, real p1, real p2); + + void cosSim(Matrix& output1, Matrix& output2, real scale); + void cosSimDerivative(Matrix& output, + Matrix& prevOut1, + Matrix& prevOut2, + Matrix& prevGrad1, + Matrix& prevGrad2, + real scale); + + void print(std::ostream& os) const; + void print(std::ostream& os, size_t height, size_t width) const; + void printOneRow(std::ostream& os, size_t idx) const; + + void paramReluForward(Matrix& data, Matrix& W); + void paramReluBackwardW(Matrix& oGrad, Matrix& data); + void paramReluBackwardDiff(Matrix& oGrad, Matrix& data, Matrix& W); + + void check(std::ostream& os, Matrix& refMat, bool printDiff = true); + + real getMin(); + real getMax(); + + void randomizeUniform(); + + void classificationError(Matrix& output, IVector& label); + + void addByBitCode(size_t numClasses, const IVector& codes, const Matrix& vec); + + void addByBitCodeBackward(size_t numClasses, + const IVector& codes, + Matrix& vec); + + void mulByBitCode(size_t numClasses, + const IVector& codes, + const Matrix& mat, + const Matrix& input); + + void mulByBitCodeBackwardWeight(size_t numClasses, + const IVector& codes, + Matrix& mat, + const Matrix& input); + + void mulByBitCodeBackwardError(size_t numClasses, + const IVector& codes, + const Matrix& mat, + Matrix& input); + + void sumByBitCode(size_t numClasses, + IVector& codes, + Matrix& sum, + real scaleSum); + + void subByBitCode(size_t numClasses_, IVector& codes); + + void multiBinaryLabelCrossEntropy(Matrix& output, Matrix& label); + void multiBinaryLabelCrossEntropyBp(Matrix& output, Matrix& label); + void classificationErrorMulti(Matrix& output, Matrix& label, real threshold); + + void bilinearForward(const Matrix& in, + const size_t inImgH, + const size_t inImgW, + const size_t outImgH, + const size_t outImgW, + const size_t numChannels, + const real ratioH, + const real ratioW); + + void bilinearBackward(const Matrix& out, + const size_t outImgH, + const size_t outImgW, + const size_t inImgH, + const size_t inImgW, + const size_t numChannels, + const real ratioH, + const real ratioW); + + template + void operator=(const ExpressionType& expr) { + TensorCpuApply(*this, expr); + } +}; + +class SharedCpuMatrix : public CpuMatrix { +public: + /* blockNum is number of partitions of the matrix */ + SharedCpuMatrix(int blockNum, size_t height, size_t width, bool trans = false) + : CpuMatrix(height, width, trans) { + initShared(blockNum); + } + SharedCpuMatrix( + int blockNum, real* data, size_t height, size_t width, bool trans = false) + : CpuMatrix(data, height, width, trans) { + initShared(blockNum); + } + + SharedCpuMatrix(int blockNum, + CpuMemHandlePtr dataHandle, + size_t height, + size_t width, + bool trans = false) + : CpuMatrix(dataHandle, height, width, trans) { + initShared(blockNum); + } + + SharedCpuMatrix(CpuMemHandlePtr dataHandle, + size_t height, + size_t width, + bool trans = false) + : CpuMatrix(dataHandle, height, width, trans) { + initBlock(1); + } + + ~SharedCpuMatrix() {} + +public: + virtual void mul(CpuSparseMatrix* a, CpuMatrix* b, real scaleAB, real scaleT); + virtual void add(Matrix& b, real p1, real p2); + virtual void add(real p1, real p2); + +private: + using Matrix::mul; + void initShared(int blockNum); + void initBlock(int blockNum); + + int blockNum_; + std::vector> blockLocks_; + ThreadLocal localBuf_; + ThreadLocal> localBufRows_; + ThreadLocal> blockSeq_; +}; + +typedef struct { unsigned int col; } sparse_non_value_t; + +typedef struct { + unsigned int col; + float value; +} sparse_float_value_t; + +} // namespace paddle +#include "ExecViaCpu.h" From f3fdfd941f170fbcfa5162246803b4cf8be6131c Mon Sep 17 00:00:00 2001 From: hedaoyuan Date: Thu, 5 Jan 2017 21:32:55 +0800 Subject: [PATCH 06/11] add some comments for Function.h --- paddle/function/BufferArg.h | 26 ----------------------- paddle/function/Function.h | 42 +++++++++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+), 26 deletions(-) diff --git a/paddle/function/BufferArg.h b/paddle/function/BufferArg.h index 9fcda7a878aad..52494afed3b85 100644 --- a/paddle/function/BufferArg.h +++ b/paddle/function/BufferArg.h @@ -46,32 +46,6 @@ class SequenceArg; class SparseMatrixArg; typedef std::shared_ptr BufferArgPtr; -class BufferArgs { -public: - BufferArgs() {} - size_t size() const { return args_.size(); } - - // add argument into BufferArgss - template - void addArg(const Tensor& arg) { - args_.push_back(std::make_shared(arg)); - } - - void addArg(const Matrix& arg, const TensorShape& shape); - - void addArg(const CpuSparseMatrix& arg); - void addArg(const GpuSparseMatrix& arg); - - // get argument - const BufferArg& operator[](size_t num) const { - CHECK_LT(num, args_.size()); - return *args_[num]; - } - -private: - std::vector args_; -}; - // an array of arbitrary dimensions class BufferArg { public: diff --git a/paddle/function/Function.h b/paddle/function/Function.h index 024575b4f7bcd..27ebe808aaf44 100644 --- a/paddle/function/Function.h +++ b/paddle/function/Function.h @@ -22,6 +22,11 @@ limitations under the License. */ namespace paddle { +/** + * Function Configuration. + * The argument type of Function::init. + * Follow-up will consider moving this data structure to Proto inside. + */ class FuncConfig { public: union value { @@ -41,6 +46,43 @@ class FuncConfig { std::map valueMap_; }; +/** + * Argument type for Function::calc(). + * A BufferArgs contains a set of BufferArg, + * because Function can have multiple inputs, outputs and inouts. + */ +class BufferArgs { +public: + BufferArgs() {} + size_t size() const { return args_.size(); } + + // add argument into BufferArgss + template + void addArg(const Tensor& arg) { + args_.push_back(std::make_shared(arg)); + } + + void addArg(const Matrix& arg, const TensorShape& shape); + + void addArg(const CpuSparseMatrix& arg); + void addArg(const GpuSparseMatrix& arg); + + // get argument + const BufferArg& operator[](size_t num) const { + CHECK_LT(num, args_.size()); + return *args_[num]; + } + +private: + std::vector args_; +}; + +/** + * Base class for Function. + * The basic Function implementation requires override init and calc interfaces. + * Need to pay attention to the inouts argument. For the input argument + * that will be modified, it needs to be passed through inouts. + */ class FunctionBase { public: virtual ~FunctionBase() {} From ccf0b1bb2e32e0b52b56af18c1c8e339eface97e Mon Sep 17 00:00:00 2001 From: hedaoyuan Date: Thu, 5 Jan 2017 21:45:00 +0800 Subject: [PATCH 07/11] add FunctionTest.cpp --- paddle/function/BufferArg.cpp | 12 ------- paddle/function/BufferArgTest.cpp | 40 +-------------------- paddle/function/CMakeLists.txt | 1 + paddle/function/Function.cpp | 12 +++++++ paddle/function/FunctionTest.cpp | 59 +++++++++++++++++++++++++++++++ 5 files changed, 73 insertions(+), 51 deletions(-) create mode 100644 paddle/function/FunctionTest.cpp diff --git a/paddle/function/BufferArg.cpp b/paddle/function/BufferArg.cpp index 08031917b21e1..65c6f303041d8 100644 --- a/paddle/function/BufferArg.cpp +++ b/paddle/function/BufferArg.cpp @@ -28,16 +28,4 @@ const SparseMatrixArg& BufferArg::sparse() const { return dynamic_cast(*this); } -void BufferArgs::addArg(const Matrix& arg, const TensorShape& shape) { - args_.push_back(std::make_shared(arg, shape)); -} - -void BufferArgs::addArg(const CpuSparseMatrix& arg) { - args_.push_back(std::make_shared(arg)); -} - -void BufferArgs::addArg(const GpuSparseMatrix& arg) { - args_.push_back(std::make_shared(arg)); -} - } // namespace paddle diff --git a/paddle/function/BufferArgTest.cpp b/paddle/function/BufferArgTest.cpp index 5d669b8137e1a..a9ee3ab079e33 100644 --- a/paddle/function/BufferArgTest.cpp +++ b/paddle/function/BufferArgTest.cpp @@ -14,6 +14,7 @@ limitations under the License. */ #include "BufferArg.h" #include +#include "Function.h" #include "paddle/math/MemoryHandle.h" namespace paddle { @@ -86,43 +87,4 @@ TEST(BufferTest, asArgument) { function(argments); } -template -void FunctionApi(typename Tensor::Matrix& output, - const typename Tensor::Matrix& input); - -template <> -void FunctionApi(CpuMatrix& output, const CpuMatrix& input) { - EXPECT_EQ(output.getHeight(), 100); - EXPECT_EQ(output.getWidth(), 200); -} - -template <> -void FunctionApi(GpuMatrix& output, const GpuMatrix& input) { - EXPECT_EQ(output.getHeight(), 10); - EXPECT_EQ(output.getWidth(), 20); -} - -template -void Function(const BufferArgs& arguments) { - auto input = arguments[0].matrix(); - auto output = arguments[1].matrix(); - FunctionApi(output, input); -} - -TEST(BufferTest, Function) { - CpuMatrix cpuInput = CpuMatrix(100, 200); - CpuMatrix cpuOutput = CpuMatrix(100, 200); - BufferArgs cpuArgments; - cpuArgments.addArg(cpuInput); - cpuArgments.addArg(cpuOutput); - Function(cpuArgments); - - GpuMatrix gpuInput = GpuMatrix(10, 20); - GpuMatrix gpuOutput = GpuMatrix(10, 20); - BufferArgs gpuArgments; - gpuArgments.addArg(gpuInput); - gpuArgments.addArg(gpuOutput); - Function(gpuArgments); -} - } // namespace paddle diff --git a/paddle/function/CMakeLists.txt b/paddle/function/CMakeLists.txt index 37c011549eca9..31c395c8484a3 100644 --- a/paddle/function/CMakeLists.txt +++ b/paddle/function/CMakeLists.txt @@ -21,6 +21,7 @@ if(WITH_TESTING) add_simple_unittest(TensorShapeTest) add_simple_unittest(TensorTypeTest) add_simple_unittest(BufferArgTest) + add_simple_unittest(FunctionTest) # add_unittest(ContextProjectionOpTest # ContextProjectionOpTest.cpp # ../gserver/tests/TestUtil.cpp) diff --git a/paddle/function/Function.cpp b/paddle/function/Function.cpp index 6f82a8d053bc2..2f56cfc1b5492 100644 --- a/paddle/function/Function.cpp +++ b/paddle/function/Function.cpp @@ -72,6 +72,18 @@ FuncConfig& FuncConfig::set(const std::string& key, bool v) { return *this; } +void BufferArgs::addArg(const Matrix& arg, const TensorShape& shape) { + args_.push_back(std::make_shared(arg, shape)); +} + +void BufferArgs::addArg(const CpuSparseMatrix& arg) { + args_.push_back(std::make_shared(arg)); +} + +void BufferArgs::addArg(const GpuSparseMatrix& arg) { + args_.push_back(std::make_shared(arg)); +} + ClassRegistrar FunctionBase::funcRegistrar_; } // namespace paddle diff --git a/paddle/function/FunctionTest.cpp b/paddle/function/FunctionTest.cpp new file mode 100644 index 0000000000000..7c3d6684cded1 --- /dev/null +++ b/paddle/function/FunctionTest.cpp @@ -0,0 +1,59 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "Function.h" +#include + +namespace paddle { + +template +void FunctionApi(typename Tensor::Matrix& output, + const typename Tensor::Matrix& input); + +template <> +void FunctionApi(CpuMatrix& output, const CpuMatrix& input) { + EXPECT_EQ(output.getHeight(), 100); + EXPECT_EQ(output.getWidth(), 200); +} + +template <> +void FunctionApi(GpuMatrix& output, const GpuMatrix& input) { + EXPECT_EQ(output.getHeight(), 10); + EXPECT_EQ(output.getWidth(), 20); +} + +template +void Function(const BufferArgs& arguments) { + auto input = arguments[0].matrix(); + auto output = arguments[1].matrix(); + FunctionApi(output, input); +} + +TEST(Function, BufferArgs) { + CpuMatrix cpuInput = CpuMatrix(100, 200); + CpuMatrix cpuOutput = CpuMatrix(100, 200); + BufferArgs cpuArgments; + cpuArgments.addArg(cpuInput); + cpuArgments.addArg(cpuOutput); + Function(cpuArgments); + + GpuMatrix gpuInput = GpuMatrix(10, 20); + GpuMatrix gpuOutput = GpuMatrix(10, 20); + BufferArgs gpuArgments; + gpuArgments.addArg(gpuInput); + gpuArgments.addArg(gpuOutput); + Function(gpuArgments); +} + +} // namespace paddle From d35ef9de10b3b97f63fa0156a8c7d36e7e89c8b8 Mon Sep 17 00:00:00 2001 From: hedaoyuan Date: Mon, 9 Jan 2017 11:47:18 +0800 Subject: [PATCH 08/11] follow commit --- paddle/function/BufferArg.h | 20 +++++++++++--------- paddle/function/TensorShape.h | 4 ++-- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/paddle/function/BufferArg.h b/paddle/function/BufferArg.h index 52494afed3b85..d787d2814d88b 100644 --- a/paddle/function/BufferArg.h +++ b/paddle/function/BufferArg.h @@ -56,7 +56,7 @@ class BufferArg { : buf_(buf), valueType_(valueType) {} BufferArg(const Matrix& matrix) - : buf_((void*)matrix.getData()), + : buf_(reinterpret_cast(matrix.getData())), valueType_(DataType::value), shape_(2) { shape_.setDim(0, matrix.getHeight()); @@ -64,21 +64,23 @@ class BufferArg { } BufferArg(const Matrix& matrix, const TensorShape& shape) - : buf_((void*)matrix.getData()), + : buf_(reinterpret_cast(matrix.getData())), valueType_(DataType::value), shape_(shape) { CHECK_EQ(matrix.getElementCnt(), shape.getElements()); } BufferArg(const Vector& vector) - : buf_((void*)vector.getData()), + : buf_(reinterpret_cast(vector.getData())), valueType_(DataType::value), shape_(1) { shape_.setDim(0, vector.getSize()); } BufferArg(const IVector& vector) - : buf_((void*)vector.getData()), valueType_(VALUE_TYPE_INT32), shape_(1) { + : buf_(reinterpret_cast(vector.getData())), + valueType_(VALUE_TYPE_INT32), + shape_(1) { shape_.setDim(0, vector.getSize()); } @@ -129,7 +131,7 @@ class BufferArg { // sequence start positions in a mini-batch of sequences // shape_.ndims() == 1 // valueType_ = int32 -// if a < b than value_.buf_[a] < value_.buf_[b] +// if a < b then value_.buf_[a] < value_.buf_[b] class SequenceIdArg : public BufferArg { public: SequenceIdArg(void* buf, const TensorShape& shape) @@ -203,13 +205,13 @@ class SparseMatrixArg : public BufferArg { SparseMatrixArg(const CpuSparseMatrix& sparse) : BufferArg(sparse), - row_((void*)sparse.getRows(), VALUE_TYPE_INT32), - col_((void*)sparse.getCols(), VALUE_TYPE_INT32) {} + row_(reinterpret_cast(sparse.getRows()), VALUE_TYPE_INT32), + col_(reinterpret_cast(sparse.getCols()), VALUE_TYPE_INT32) {} SparseMatrixArg(const GpuSparseMatrix& sparse) : BufferArg(sparse), - row_((void*)sparse.getRows(), VALUE_TYPE_INT32), - col_((void*)sparse.getCols(), VALUE_TYPE_INT32) {} + row_(reinterpret_cast(sparse.getRows()), VALUE_TYPE_INT32), + col_(reinterpret_cast(sparse.getCols()), VALUE_TYPE_INT32) {} ~SparseMatrixArg() {} diff --git a/paddle/function/TensorShape.h b/paddle/function/TensorShape.h index e70484a1afd99..0333fe18316ba 100644 --- a/paddle/function/TensorShape.h +++ b/paddle/function/TensorShape.h @@ -30,14 +30,14 @@ class TensorShape { TensorShape(std::initializer_list dims) { ndims_ = dims.size(); initDims(ndims_); - std::copy(dims.begin(), dims.end(), dims_.begin()); + dims_.assign(dims); numElements(); }; TensorShape(const TensorShape& t) : ndims_(t.ndims_), nelements_(t.nelements_) { initDims(ndims_); - std::copy(t.dims_.begin(), t.dims_.end(), dims_.begin()); + dims_.assign(t.dims_.begin(), t.dims_.end()); }; // get the size of specified dimension From 57e252119eee99523a92ecd323532bec355f9144 Mon Sep 17 00:00:00 2001 From: hedaoyuan Date: Mon, 9 Jan 2017 15:21:47 +0800 Subject: [PATCH 09/11] BufferArg add ArgType and Function remove inouts --- paddle/function/BufferArg.h | 45 +- paddle/function/Function.h | 28 +- paddle/function/FunctionTest.cpp | 2 +- paddle/math/Matrix.h~RFbb8b484f.TMP | 1870 --------------------------- 4 files changed, 59 insertions(+), 1886 deletions(-) delete mode 100644 paddle/math/Matrix.h~RFbb8b484f.TMP diff --git a/paddle/function/BufferArg.h b/paddle/function/BufferArg.h index d787d2814d88b..3d28249f69c2b 100644 --- a/paddle/function/BufferArg.h +++ b/paddle/function/BufferArg.h @@ -38,16 +38,40 @@ enum SparseDataType { enum SparseDataFormat { SPARSE_CSR_FORMAT = 0, SPARSE_CSC_FORMAT = 1 }; -/** - * BufferArg used as the argument type for Function. - */ class BufferArg; class SequenceArg; class SparseMatrixArg; typedef std::shared_ptr BufferArgPtr; -// an array of arbitrary dimensions +/** + * \brief BufferArg used as the argument type of Function. + * + * The arguments of the Paddle Function have four Buffer types. + * 1. BufferArg for a dense Buffer of any dimension. + * 2. SequenceIdArg for a Buffer of sequence start positions. + * 3. SequenceArg for a Buffer of sequence data. + * 4. SparseMatrixArg for a Buffer of sparse matrix. + * + * There is an ArgType property for the BufferArg used as Function Output. + * Whether the result of the Function calculation is assigned to the + * output Buffer or added to the output Buffer is determined by the + * argType_ property of the output BufferArg. + */ class BufferArg { +public: + // ArgType is only used by output BufferArg. + // For input argument, argType_ is ignored. + // For output argument, need to set the argType_ of the BufferArg. + enum ArgType { + UNSPECIFIED = 0, + ASSIGN_TO = 1, + ADD_TO = 2, + }; + + void setArgType(ArgType argType) { argType_ = argType; } + + ArgType getArgType() const { return argType_; } + public: BufferArg(void* buf, ValueType valueType, const TensorShape& shape) : buf_(buf), valueType_(valueType), shape_(shape) {} @@ -56,7 +80,8 @@ class BufferArg { : buf_(buf), valueType_(valueType) {} BufferArg(const Matrix& matrix) - : buf_(reinterpret_cast(matrix.getData())), + : buf_( + const_cast(reinterpret_cast(matrix.getData()))), valueType_(DataType::value), shape_(2) { shape_.setDim(0, matrix.getHeight()); @@ -64,21 +89,24 @@ class BufferArg { } BufferArg(const Matrix& matrix, const TensorShape& shape) - : buf_(reinterpret_cast(matrix.getData())), + : buf_( + const_cast(reinterpret_cast(matrix.getData()))), valueType_(DataType::value), shape_(shape) { CHECK_EQ(matrix.getElementCnt(), shape.getElements()); } BufferArg(const Vector& vector) - : buf_(reinterpret_cast(vector.getData())), + : buf_( + const_cast(reinterpret_cast(vector.getData()))), valueType_(DataType::value), shape_(1) { shape_.setDim(0, vector.getSize()); } BufferArg(const IVector& vector) - : buf_(reinterpret_cast(vector.getData())), + : buf_( + const_cast(reinterpret_cast(vector.getData()))), valueType_(VALUE_TYPE_INT32), shape_(1) { shape_.setDim(0, vector.getSize()); @@ -124,6 +152,7 @@ class BufferArg { ValueType valueType_; TensorShape shape_; BufferType bufferType_; + ArgType argType_ = UNSPECIFIED; // leading dimensions. The size is dims_.size() // Dims lds_; }; diff --git a/paddle/function/Function.h b/paddle/function/Function.h index 27ebe808aaf44..88d6824aa3939 100644 --- a/paddle/function/Function.h +++ b/paddle/function/Function.h @@ -56,12 +56,18 @@ class BufferArgs { BufferArgs() {} size_t size() const { return args_.size(); } - // add argument into BufferArgss + // add argument into BufferArgs + // Tensor can be Matrix, Vector, IVector. template void addArg(const Tensor& arg) { args_.push_back(std::make_shared(arg)); } + // Add arg into BufferArgs and reshape the arg. + // + // For example, arg represents an image buffer, + // but Matrix can only represent a two-dimensional Tensor. + // So need an extra argument to describe the shape of the image buffer. void addArg(const Matrix& arg, const TensorShape& shape); void addArg(const CpuSparseMatrix& arg); @@ -78,10 +84,20 @@ class BufferArgs { }; /** - * Base class for Function. + * \brief Base class for Function. * The basic Function implementation requires override init and calc interfaces. - * Need to pay attention to the inouts argument. For the input argument - * that will be modified, it needs to be passed through inouts. + * + * Function inputs are readonly, Function outputs have two modes: ASSIGN_TO + * and ADD_TO. + * If output.getArgType() == ASSIGN_TO, this is assign mode, and the calculation + * result of Function assigned to the output BufferArg. + * If output.getArgType() == ADD_TO, this is add mode, and the calculation + * result of Function need added to the output BufferArg. + * + * For example: + * ASSIGN_TO: output = Function(inputs) + * ADD_TO: output += Function(inputs) + * If Function has more than one output, each output can have different modes. */ class FunctionBase { public: @@ -89,9 +105,7 @@ class FunctionBase { virtual void init(const FuncConfig& config) {} - virtual void calc(const BufferArgs& inputs, - const BufferArgs& outputs, - const BufferArgs& inouts) {} + virtual void calc(const BufferArgs& inputs, const BufferArgs& outputs) {} static ClassRegistrar funcRegistrar_; }; diff --git a/paddle/function/FunctionTest.cpp b/paddle/function/FunctionTest.cpp index 7c3d6684cded1..7ce908320a6f6 100644 --- a/paddle/function/FunctionTest.cpp +++ b/paddle/function/FunctionTest.cpp @@ -35,7 +35,7 @@ void FunctionApi(GpuMatrix& output, const GpuMatrix& input) { template void Function(const BufferArgs& arguments) { - auto input = arguments[0].matrix(); + const auto input = arguments[0].matrix(); auto output = arguments[1].matrix(); FunctionApi(output, input); } diff --git a/paddle/math/Matrix.h~RFbb8b484f.TMP b/paddle/math/Matrix.h~RFbb8b484f.TMP deleted file mode 100644 index d89b0f67b3c98..0000000000000 --- a/paddle/math/Matrix.h~RFbb8b484f.TMP +++ /dev/null @@ -1,1870 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include -#include -#include - -#include "paddle/utils/Logging.h" -#include "paddle/utils/ThreadLocal.h" - -#include - -#include "BaseMatrix.h" -#include "MemoryHandle.h" -#include "Vector.h" -#include "paddle/utils/ThreadLocal.h" -#include "paddle/utils/common.h" - -namespace paddle { - -enum SparseValueType { NO_VALUE = 0, FLOAT_VALUE = 1 }; - -/** - * @brief matrix sparse_format . - * - * nnz represents nonzero number in sparse matrix. - * - * SPARSE_CSR: row major matrix. length of row is height_ + 1, each element - * represents row start index in Matrix. length of col and value are nnz. - * - * SPARSE_CSC: col major matrix. length of col is width_ + 1, each element - * represents col start index in Matrix. length of col and value are nnz. - * - * @code - * for example: [0, 1, 0, 2, 0; - * 1, 0, 0, 0, 0; - * 0, 0, 0, 2, 5]; - * SPARSE_CSR row [0, 2, 3, 5]; - * col [1, 3, 0, 3, 4]; - * value [1, 2, 1, 2, 5] - * SPARSE_CSC col [0, 1, 2, 2, 4, 5]; - * row [1, 0, 0, 2, 2]; - * value [1, 1, 2, 2, 5] - * @endcode - */ -enum SparseFormat { SPARSE_CSR = 0, SPARSE_CSC = 1 }; - -class Matrix; -class GpuMatrix; -class CpuMatrix; -class CpuSparseMatrix; -class GpuSparseMatrix; -typedef std::shared_ptr MatrixPtr; -typedef std::shared_ptr GpuMatrixPtr; -typedef std::shared_ptr CpuMatrixPtr; -typedef std::shared_ptr GpuSparseMatrixPtr; -typedef std::shared_ptr CpuSparseMatrixPtr; - -/** - * Copy or assignemnt constructor will share the data as opposed to making a - * copy of the original data. To make a copy of the orinal data, use copyFrom() - * instead. - */ -class Matrix : public BaseMatrix { -protected: - Matrix(MemoryHandlePtr memHandle, - size_t height, - size_t width, - bool trans, - bool use_gpu); - - Matrix(real* data, size_t height, size_t width, bool trans, bool use_gpu); - - Matrix(real* data, - size_t height, - size_t width, - size_t stride, - bool trans, - bool use_gpu); - - static ThreadLocal tmpMat_; - -public: - size_t elementCnt_; // maximal number of elements which can be held in data_ - MemoryHandlePtr memoryHandle_; - -public: - virtual ~Matrix() {} - - static MatrixPtr create(MemoryHandlePtr memHandle, - size_t height, - size_t width, - bool trans = false); - static MatrixPtr create(size_t height, - size_t width, - bool trans = false, - bool useGpu = false); - static MatrixPtr create(real* data, - size_t height, - size_t width, - bool trans = false, - bool useGpu = false); - static MatrixPtr create(real* data, - size_t height, - size_t width, - size_t stride, - bool trans = false, - bool useGpu = false); - - static MatrixPtr createSparseMatrix(size_t height, - size_t width, - size_t nnz, - SparseValueType valueType = FLOAT_VALUE, - bool trans = false, - bool useGpu = false); - static MatrixPtr createSparseMatrix(size_t height, - size_t width, - size_t nnz, - SparseValueType valueType = FLOAT_VALUE, - SparseFormat foramt = SPARSE_CSR, - bool trans = false, - bool useGpu = false); - - static MatrixPtr createSparseMatrix(real* data, - int* row, - int* col, - size_t height, - size_t width, - size_t nnz, /* used to allocate space */ - SparseValueType valueType, /*value type*/ - SparseFormat format, - bool trans, - bool useGpu); - - static void resizeOrCreateSparseMatrix( - MatrixPtr& matrix, - size_t height, - size_t width, - size_t nnz, - SparseValueType valueType = FLOAT_VALUE, - SparseFormat foramt = SPARSE_CSR, - bool trans = false, - bool useGpu = false); - - static void resizeOrCreate(MatrixPtr& a, - size_t height, - size_t width, - bool trans = false, - bool useGpu = false); - - /** - * @brief set the data buffer used to hold the matrix data. - * - * caller should make sure that the size of data is at least - * sizeof(real)*height*width. - */ - void setData(real* data) { - BaseMatrix::setData(data); - memoryHandle_.reset(); - } - - /// the data should be contiguous - void setData(real* data, size_t newHeight, size_t newWidth) { - setData(data); - height_ = newHeight; - width_ = newWidth; - elementCnt_ = newHeight * newWidth; - stride_ = width_; - } - - size_t getWidth() const { return width_; } - size_t getHeight() const { return height_; } - size_t getStride() const { return stride_; } - size_t getElementCnt() const { return elementCnt_; } - virtual real* getData() { return data_; } - virtual const real* getData() const { return data_; } - bool isTransposed() const { return trans_; } - bool isContiguous() const { return stride_ == width_ || height_ == 1; } - - // If sparse matrix, need to dynamic_cast to CpuSparseMatrix/GpuSparseMatrix - // befor call the following functions. - // Declare these functions in the base class just easy to call them. - // And these declarations should be moved to base class of sparse matrix - // if refactor sparse matrix - virtual int* getRows() const { - LOG(FATAL) << "Not implemented"; - return nullptr; //! suppress warning for no return value. - } - - virtual int* getCols() const { - LOG(FATAL) << "Not implemented"; - return nullptr; //! suppress warning for no return value. - } - - virtual SparseFormat getFormat() const { - LOG(FATAL) << "Not implemented"; - return SPARSE_CSR; //! suppress warning for no return value. - } - - virtual SparseValueType getValueType() const { - LOG(FATAL) << "Not implemented"; - return NO_VALUE; //! suppress warning for no return value. - } - - /** - * @brief matrix elment-wise add - * - * Named add3 just because add/add2 has been used in BaseMatrix.cu - * and they are not virtual function. - */ - virtual void add3(MatrixPtr b) { LOG(FATAL) << "Not implemented"; } - - MemoryHandlePtr getMemoryHandle() const { return memoryHandle_; } - - virtual void zeroMem() { LOG(FATAL) << "Not implemented"; } - - virtual void resetOne() { LOG(FATAL) << "Not implemented"; } - - void setDiag(real value); - - virtual void copyFrom(const Matrix& src) { LOG(FATAL) << "Not implemented"; } - - virtual void trimFrom(const CpuSparseMatrix& src) { - LOG(FATAL) << "Not implemented"; - } - - // asynchronous copy - virtual void copyFrom(const Matrix& src, hl_stream_t stream) { - LOG(FATAL) << "Not implemented"; - } - - MatrixPtr subMatrix(size_t startRow, - size_t endRow, - size_t startCol, - size_t endCol); - - MatrixPtr subRowMatrix(size_t startRow, size_t endRow) { - return subMatrix(startRow, endRow, 0, getWidth()); - } - - MatrixPtr subColMatrix(size_t startCol, size_t endCol) { - return subMatrix(0, getHeight(), startCol, endCol); - } - - virtual MatrixPtr subMatrix(size_t startRow, size_t numRows) { - CHECK_LE(startRow + numRows, getHeight()); - return Matrix::create(getData() + startRow * getWidth(), - numRows, - getWidth(), - trans_, - useGpu_); - } - virtual MatrixPtr subMatrix(size_t startRow, size_t numRows, MatrixPtr dest) { - CHECK_LE(startRow + numRows, getHeight()); - CHECK_EQ(useGpu_, dest->useGpu_); - dest->setData(this->rowBuf(startRow), numRows, getWidth()); - return dest; - } - - /** - * If this is GpuMatrix, src is assumed to be CPU memory - * - * If this is CpuMatrix, src is assumed to be CPU memory - */ - virtual void copyFrom(const real* src, size_t size) { - LOG(FATAL) << "Not implemented"; - } - - virtual void copyFrom(const real* src, const int64_t* seq) { - LOG(FATAL) << "Not implemented"; - } - - /** - * @brief convert a int vector to a real matrix. - * - * (1) source and dest are both in CPU. - * - * (2) sizes are exactly match. - */ - virtual void copyFrom(const IVector& src) { - LOG(FATAL) << "copy data from int vector only available on CpuMatrix."; - } - - virtual void copyByRowIndex(Matrix& b, const IVector& rowIndex) { - LOG(FATAL) << "Not implemented"; - } - - /** - * @brief Create a matrix with the same type (GpuMatrix, CpuMatrix, - * NonValueSparseMatrix, etc.) as this. - * - * If height and width is zero, the new matrix will have the same size - * as this, otherwise the new matrix will have the specified size. - * - */ - virtual MatrixPtr clone(size_t height = 0, - size_t width = 0, - bool useGpu = false) { - LOG(FATAL) << "Not implemented"; - return nullptr; - } - - virtual real* getRowBuf(size_t row) { - LOG(FATAL) << "Not implemented"; - return nullptr; - } - - virtual real getElement(size_t x, size_t y) const { - LOG(FATAL) << "Not implemented"; - return 0; - } - - virtual real getSum() { - LOG(FATAL) << "Not implemented"; - return 0; - } - - virtual void accumulateColSum(Matrix& src) { - LOG(FATAL) << "Not implemented"; - } - - virtual real getAbsSum() { - LOG(FATAL) << "Not implemented"; - return 0; - } - - /** - * @note Original data may not be preserved after resize(). - */ - virtual void resize(size_t newHeight, size_t newWidth) = 0; - - /** - * @note This should only be used for sparse matrix. - */ - virtual void resize(size_t newHeight, - size_t newWidth, - size_t newNnz, /* total item used to allocate space */ - SparseValueType valueType, - SparseFormat format) = 0; - - /** - * @brief This should only be used for sparse matrix. - * - * Currently must be called for each row in order. - * The matrix is not valid until setRow is called for the last row. - */ - virtual void setRow(size_t row, - size_t colNum, - const unsigned int* cols, - const real* values) = 0; - - virtual MatrixPtr getTranspose() = 0; - - /** - * @brief hard transpose. - * - * allocate matTrans' memory outside, then set memAlloc as false; - * else set as true. - */ - virtual void transpose(MatrixPtr matTrans, bool memAlloc) { - LOG(FATAL) << "Not implemented"; - } - - virtual MatrixPtr getInverse() { - LOG(FATAL) << "Not implemented"; - return nullptr; - } - - /** - * @brief inverse. - * - * if allocate matInv's memory outside, then set memAlloc as false; - * else set as true. - */ - virtual void inverse(MatrixPtr matInv, bool memAlloc) { - LOG(FATAL) << "Not implemented"; - } - -public: - /// Only set all variables to 0 or NULL but not free them. - virtual void clear() { - height_ = 0; - width_ = 0; - data_ = NULL; - } - - void reshape(size_t height, size_t width); - - /// add b to each sample of this. - virtual void addBias(Matrix& b, real scale) { - LOG(FATAL) << "Not implemented"; - } - - virtual void addSharedBias(Matrix& b, real scale) { - LOG(FATAL) << "Not implemented"; - } - - void addBias(Matrix& b, real scale, bool sharedBias) { - if (!sharedBias) { - addBias(b, scale); - } else { - addSharedBias(b, scale); - } - } - - /// add each sample from a to this. - virtual void collectBias(Matrix& a, real scale) { - LOG(FATAL) << "Not implemented"; - } - - virtual void collectSharedBias(Matrix& a, real scale) { - LOG(FATAL) << "Not implemented"; - } - - void collectBias(Matrix& a, real scale, bool sharedBias) { - if (!sharedBias) { - collectBias(a, scale); - } else { - collectSharedBias(a, scale); - } - } - - virtual void sequenceAvgForward(Matrix& a, - const IVector& startsPos, - int mode) { - LOG(FATAL) << "Not implemented"; - } - - /** - * @code - * this = scaleAB*(a*b) + scaleT*this - * @endcode - */ - virtual void mul(const Matrix& a, - const Matrix& b, - real scaleAB, - real scaleT) { - LOG(FATAL) << "Not implemented"; - } - - /// Add a vector (column) b to matrix a, column by column. - virtual void addColumnVector(const Matrix& b) { - LOG(FATAL) << "Not implemented"; - } - - /** - * @code - * For j < codeLength: - * this(i, j) += vec(index(i, j), 0) - * where index(i, j) = ((codes(i) + numClasses) >> (j + 1)) - 1 - * @endcode - */ - virtual void addByBitCode(size_t numClasses, - const IVector& codes, - const Matrix& vec) { - (void)numClasses; - (void)codes; - (void)vec; - LOG(FATAL) << "Not implemeted"; - } - - /** - * @code - * For j < codeLength: - * vec(index(i, j), 0) += this(i, j) - * where index is same as the index for addByBitCode - * @endcode - */ - virtual void addByBitCodeBackward(size_t numClasses, - const IVector& codes, - Matrix& vec) { - (void)numClasses; - (void)codes; - (void)vec; - LOG(FATAL) << "Not implemeted"; - } - - /** - * @code - * For j < codeLength: - * this(i, j) += - * where index is same as the index for addByBitCode - * @endcode - */ - virtual void mulByBitCode(size_t numClasses, - const IVector& codes, - const Matrix& mat, - const Matrix& input) { - (void)numClasses; - (void)codes; - (void)mat; - (void)input; - LOG(FATAL) << "Not implemeted"; - } - - /** - * @code - * For j < codeLength: - * mat.row(index(i, j)) += this(i, j) * input.row(i) - * where index is same as the index for addByBitCode - * @endcode - */ - virtual void mulByBitCodeBackwardWeight(size_t numClasses, - const IVector& codes, - Matrix& mat, - const Matrix& input) { - (void)numClasses; - (void)codes; - (void)mat; - (void)input; - LOG(FATAL) << "Not implemeted"; - } - - /** - * @code - * For j < codeLength: - * input.row(i) += this(i, j) * mat.row(index(i, j)) - * where index is same as the index for addByBitCode - * @endcode - */ - virtual void mulByBitCodeBackwardError(size_t numClasses, - const IVector& codes, - const Matrix& mat, - Matrix& input) { - (void)numClasses; - (void)codes; - (void)mat; - (void)input; - LOG(FATAL) << "Not implemeted"; - } - - /** - * @code - * For j < codeLength - * sum(i, 0) = scaleSum * \sum_j bit(i, j) * this(i, j) - * where bit(i, j) = ((codes(i) + numClasses) & 2^j) ? 1 : 0 - * @endcode - */ - virtual void sumByBitCode(size_t numClasses, - IVector& codes, - Matrix& sum, - real scaleSum) { - (void)numClasses; - (void)codes; - (void)sum; - (void)scaleSum; - LOG(FATAL) << "Not implemeted"; - } - - /** - * @code - * For j < codeLength - * this(i, j) -= bit(i, j) - * where bit(i, j) is same as that for sumByBitCode - * @endcode - */ - virtual void subByBitCode(size_t numClasses_, IVector& codes) { - (void)numClasses_; - (void)codes; - LOG(FATAL) << "Not implemeted"; - } - - /** - * add the sum of each row of this to mat - */ - virtual void rowSum(Matrix& sum) { - (void)sum; - LOG(FATAL) << "Not implemeted"; - } - - /** - * set the max of each row of this to mat - */ - virtual void rowMax(Matrix& max) { - (void)max; - LOG(FATAL) << "Not implemeted"; - } - - /** - * set the max of each column of this to mat - */ - virtual void colMax(Matrix& max) { LOG(FATAL) << "not implemented"; } - - /** - * @brief Get the top k elements of each column of this matrix. - * - * The row ids and values of these elements are stored in - * maxIds and max respectively. where k is the size of maxIds. - * And note that the top k elements are not sorted. - */ - virtual void colMax(IVector& maxIds, Matrix& maxVal) { - LOG(FATAL) << "not implemented"; - } - - virtual void maxoutForward(Matrix& a, - IVector& id, - size_t channels, - size_t groups) { - LOG(FATAL) << "not implemented"; - } - - virtual void maxoutBackward(Matrix& a, - IVector& id, - size_t channels, - size_t groups) { - LOG(FATAL) << "not implemented"; - } - - virtual void rowMaxId(IVector& maxIds) { LOG(FATAL) << "Not implemented"; } - - /** - * @brief Get the top k elements of each row of this matrix. - * - * The column ids and values of these elements are stored in - * maxIds and max respectively. where k is the size of maxIds. - * And note that the top k elements are not sorted. - */ - virtual void rowMax(IVector& maxIds, Matrix& max) { - LOG(FATAL) << "Not implemented"; - } - - /// normalize each row so that the sum of each row is 1. - virtual void rowNormalizeL1(Matrix& out) { - (void)out; - LOG(FATAL) << "Not implemeted"; - } - - /** - * @code - * this = a*b - * @endcode - */ - virtual void mul(const Matrix& a, const Matrix& b) { - LOG(FATAL) << "Not implemented"; - } - - /** - * @code - * this = scaleAB*(this*b) + scaleT*this - * @endcode - */ - virtual void rightMul(Matrix& b, real scaleAB, real scaleT) { - LOG(FATAL) << "Not implemented"; - } - - /** - * @code - * this = this* b - * @endcode - */ - virtual void rightMul(Matrix& b) { LOG(FATAL) << "Not implemented"; } - - /** - * @code - * this = scaleAB*(a*this) + scaleT*this - * @endcode - */ - virtual void leftMul(Matrix& a, real scaleAB, real scaleT) { - LOG(FATAL) << "Not implemented"; - } - - /** - * @code - * this = a*this) - * @endcode - */ - virtual void leftMul(Matrix& a) { LOG(FATAL) << "Not implemented"; } - - /// merge the element for each col. - virtual void colMerge(Matrix& src) { LOG(FATAL) << "Not implemented"; } - - /// copy -log(output[label]) to this->data[i]. - virtual void oneHotCrossEntropy(Matrix& output, IVector& label) { - LOG(FATAL) << "Not implemented"; - } - - /// calculate the error of outputV according to label. - virtual void oneHotCrossEntropyBp(Matrix& outputV, IVector& label) { - LOG(FATAL) << "Not implemented"; - } - - /// copy -log(output[label]) to this->data[i]. - virtual void oneHotCrossEntropyWithSelfNorm(Matrix& output, - IVector& label, - real alpha) { - LOG(FATAL) << "Not implemented"; - } - - /// calculate the error of outputV according to label. - virtual void oneHotCrossEntropyWithSelfNormBp(Matrix& outputV, - IVector& label, - real alpha) { - LOG(FATAL) << "Not implemented"; - } - - /** - * \f[ - * a[i] = \sum_{j=-(N-1)/2}^{(N-1)/2} b_{i+j} * c_{j} - * \f] - * - * b contains M elements, - * c contains N elements (N is odd), - * b's index arithmetic is computed modulo M, - * c's index arithmetic is computed modulo N. - */ - virtual void circularConv(Matrix& b, Matrix& c) { - LOG(FATAL) << "Not implemented"; - } - - virtual void circularConvDerivative(Matrix& output, - Matrix& prevOut1, - Matrix& prevOut2, - Matrix& prevGrad1, - Matrix& prevGrad2) { - LOG(FATAL) << "Not implemented"; - } - - /* output_ij = exp(this_{ij}) / (sum_j exp(this_ij)) */ - virtual void softmax(Matrix& output) { - (void)output; - LOG(FATAL) << "Not implemeted"; - } - virtual void sequenceSoftmax(Matrix& output, const IVector& index) { - (void)output; - LOG(FATAL) << "Not implemeted"; - } - - virtual void softmaxBackward(Matrix& outputV) { - (void)outputV; - LOG(FATAL) << "Not implemeted"; - } - - /* - sum_i = sum_j this_ij * output_ij - this_ij = output_ij* (this_ij - sum_i) - */ - virtual void softmaxDerivative(Matrix& output, Matrix& sftmaxSum) { - LOG(FATAL) << "Not implemented"; - } - - /// calculate the sum of squares diff cost. - virtual void sumOfSquares(Matrix& output, Matrix& label) { - LOG(FATAL) << "Not implemented"; - } - - /// gradient of sumOfSquares. - virtual void sumOfSquaresBp(Matrix& outputV, Matrix& label) { - LOG(FATAL) << "Not implemented"; - } - - virtual void tanh(Matrix& output) { LOG(FATAL) << "Not implemented"; } - - virtual void tanhDerivative(Matrix& output) { - LOG(FATAL) << "Not implemented"; - } - - virtual void softrelu(Matrix& output) { LOG(FATAL) << "Not implemented"; } - - virtual void softreluDerivative(Matrix& output) { - LOG(FATAL) << "Not implemented"; - } - - virtual void scaledTanh(Matrix& output, real p1, real p2) { - LOG(FATAL) << "Not implemented"; - } - - /** - * cosine similarity, for each row i, - * this[i] = cos(output1[i], output2[i]) - * - * output2 can only have one row, then for each row i, - * this[i] = cos(output1[i], output2[0]) - */ - virtual void cosSim(Matrix& output1, Matrix& output2, real scale = 1.0f) { - LOG(FATAL) << "Not implemented"; - } - - virtual void cosSimDerivative(Matrix& output, - Matrix& prevOut1, - Matrix& prevOut2, - Matrix& prevGrad1, - Matrix& prevGrad2, - real scale = 1.0f) { - LOG(FATAL) << "Not implemented"; - } - - /// print out the values of elements to os - virtual void print(std::ostream& os) const { - LOG(FATAL) << "Not implemented"; - } - - /** - * print a part of the matrix - * from the (top,left) value to the (height, width) value (not included) - */ - virtual void print(std::ostream& os, size_t height, size_t width) const { - LOG(FATAL) << "Not implemented"; - } - - /// print one row to os - virtual void printOneRow(std::ostream& os, size_t idx) const { - LOG(FATAL) << "Not implemented"; - } - - virtual void check(std::ostream& os, Matrix& refMat, bool printDiff = true) {} - - virtual real getMin() { - LOG(FATAL) << "Not implemented"; - return 0; - } - virtual real getMax() { - LOG(FATAL) << "Not implemented"; - return 0; - } - - virtual void randomizeUniform() { LOG(FATAL) << "Not implemented"; } - - /** - * @brief calulate the error of classification - * - * output[i] = 1 if row i is an error. - * - * output[i] = 0 if row i is correct. - */ - virtual void classificationError(Matrix& output, IVector& label) { - LOG(FATAL) << "Not implemented"; - } - - /** - * This function is used to calculate the convolution: - * - * It will expand a feature matrix according to the - * convolution filters - */ - virtual void convExpand(Matrix& feature, - int feaImgHeight, - int feaImgWidth, - int channels, - int blockH, - int blockW, - int strideH, - int strideW, - int paddingH, - int paddingW, - int outputH, - int outputW) { - LOG(FATAL) << "Not implemeted"; - } - - /** - * This function is the reverse implementation of convExpand: - * - * Its function is to restore a expanded-matrix into a feature matrix - */ - virtual void convShrink(Matrix& expandColMat, - int thisImgHeight, - int thisImgWidth, - int channels, - int blockH, - int blockW, - int strideH, - int strideW, - int paddingH, - int paddingW, - int outputH, - int outputW, - real alpha = 1.0f, - real beta = 0.0f) { - LOG(FATAL) << "Not implemeted"; - } - - /** - * Pooling forward operation, pick out the largest element - * in the sizeX of value - */ - virtual void maxPoolForward(Matrix& inputMat, - size_t imgSizeH, - size_t imgSizeW, - size_t channels, - size_t sizeX, - size_t sizeY, - size_t strideH, - size_t strideW, - size_t outputH, - size_t outputW, - size_t paddingH, - size_t paddingW) { - LOG(FATAL) << "Not implemeted"; - } - - /// Pooling backward operation. - virtual void maxPoolBackward(Matrix& image, - size_t imgSizeH, - size_t imgSizeW, - Matrix& outGrad, - Matrix& outV, - size_t sizeX, - size_t sizeY, - size_t strideH, - size_t strideW, - size_t outputH, - size_t outputW, - real scaleTargets, - real scaleOutput, - size_t paddingH, - size_t paddingW) { - LOG(FATAL) << "Not implemeted"; - } - - /// Pooling forward operation, caculate the average of sizeX elements. - virtual void avgPoolForward(Matrix& input, - size_t imgSizeH, - size_t imgSizeW, - size_t channels, - size_t sizeX, - size_t sizeY, - size_t strideH, - size_t strideW, - size_t outputH, - size_t outputW, - size_t paddingH, - size_t paddingW) { - LOG(FATAL) << "Not implemeted"; - } - - virtual void avgPoolBackward(Matrix& input, - size_t imgSizeH, - size_t imgSizeW, - size_t sizeX, - size_t sizeY, - size_t strideH, - size_t strideW, - size_t outputH, - size_t outputW, - real scaleTargets, - real scaleOutput, - size_t paddingH, - size_t paddingW) { - LOG(FATAL) << "Not implemeted"; - } - - /** - * Input: one or more sequences. Each sequence contains some instances. - * - * Output: output size is the number of input sequences (NOT input - * instances). - * - * output[i] is set to max_input[i]. - */ - virtual void maxSequenceForward(Matrix& input, - const IVector& sequence, - IVector& index) { - LOG(FATAL) << "Not implemeted"; - } - - virtual void maxSequenceBackward(Matrix& outputGrad, - const IVector& sequence, - IVector& index) { - LOG(FATAL) << "Not implemeted"; - } - - /** - * @code - * this.row[i] += table.row[ids[i]] - * if ids[i] == -1, it will be ignored - * @endcode - */ - virtual void selectRows(Matrix& table, IVector& ids) { - (void)table; - (void)ids; - LOG(FATAL) << "Not implemented"; - } - - /** - * @code - * this[i] = table[i, id[i]] - * @endcode - */ - virtual void selectElements(Matrix& table, IVector& ids) { - LOG(FATAL) << "Not implemented"; - } - - /** - * @code - * table.row[ids[i]] += this.row[i] - * if ids[i] == -1, it will be ignored - * @endcode - */ - virtual void addToRows(Matrix& table, IVector& ids) { - (void)table; - (void)ids; - LOG(FATAL) << "Not implemented"; - } - - /** - * @code - * table[i, id[i]] += this[i] - * @endcode - */ - virtual void addElements(Matrix& table, IVector& ids) { - LOG(FATAL) << "Not implemented"; - } - /** - * @brief cross entropy for multi binary labels - * - * @code - * this[i] = -sum(label[i][j]*log(output[i][j]) - * + (1-label[i][j])*log(1-output[i][j])) - * @endcode - */ - virtual void multiBinaryLabelCrossEntropy(Matrix& output, Matrix& label) { - LOG(FATAL) << "Not implemented"; - } - - /** - * @brief The gradient of cross entropy for multi binary labels on output - * - * @code - * this[i][j] = -label[i][j]/output[i][j] - * + (1-label[i][j])/(1-output[i][j]) - * @endcode - */ - virtual void multiBinaryLabelCrossEntropyBp(Matrix& output, Matrix& label) { - LOG(FATAL) << "Not implemented"; - } - - /** - * @brief Calculate the classification error for multi binary labels - * - * @code - * this[i] = sum((output[i][j] >= threshold && label[i][j] == 0) - * || (output[i][j] < threshold && label[i][j] == 1)) - * / output->getWidth() - * @endcode - */ - virtual void classificationErrorMulti(Matrix& output, - Matrix& label, - real threshold) { - LOG(FATAL) << "Not implemented"; - } - - virtual void paramReluForward(Matrix& data, Matrix& W) { - LOG(FATAL) << "Not implemented"; - } - virtual void paramReluBackwardW(Matrix& oGrad, Matrix& data) { - LOG(FATAL) << "Not implemented"; - } - virtual void paramReluBackwardDiff(Matrix& oGrad, Matrix& data, Matrix& W) { - LOG(FATAL) << "Not implemented"; - } - - virtual void bilinearForward(const Matrix& in, - const size_t inImgH, - const size_t inImgW, - const size_t outImgH, - const size_t outImgW, - const size_t numChannels, - const real ratioH, - const real ratioW) { - LOG(FATAL) << "Not implemented"; - } - virtual void bilinearBackward(const Matrix& out, - const size_t outImgH, - const size_t outImgW, - const size_t inImgH, - const size_t inImgW, - const size_t numChannels, - const real ratioH, - const real ratioW) { - LOG(FATAL) << "Not implemented"; - } - - template - void operator=(const ExpressionType& expr) { - if (useGpu_) { - TensorGpuApply(*this, expr); - } else { - TensorCpuApply(*this, expr); - } - } - - bool isEmpty() const { - return data_ == nullptr; - } - - explicit operator bool() const { - return !isEmpty(); - } -}; - -inline std::ostream& operator<<(std::ostream& os, const Matrix& mat) { - mat.print(os); - return os; -} - -class GpuMatrix : public Matrix { -public: - GpuMatrix(); - - GpuMatrix(size_t height, size_t width, bool trans = false); - GpuMatrix(real* data, size_t height, size_t width, bool trans = false) - : Matrix(data, height, width, trans, true) {} - GpuMatrix(real* data, - size_t height, - size_t width, - size_t stride, - bool trans = false) - : Matrix(data, height, width, stride, trans, true) {} - GpuMatrix(GpuMemHandlePtr dataHandle, - size_t height, - size_t width, - bool trans = false) - : Matrix(dataHandle, height, width, trans, true) {} - ~GpuMatrix(); - - void zeroMem(); - void resetOne(); - void setDiag(real value); - - void resize(size_t newHeight, size_t newWidth); - void resize(size_t newHeight, - size_t newWidth, - size_t newNnz, /* used to allocate space */ - SparseValueType valueType, - SparseFormat format) { - LOG(FATAL) << "Only Support Sparse Matrix"; - } - void setRow(size_t row, - size_t colNum, - const unsigned int* cols, - const real* values) { - LOG(FATAL) << "Only Support Sparse Matrix"; - } - - /** - * Copy the data from cpu_memory buffer - */ - void copyFrom(const real* hostSrc, size_t size); - - void copyFrom(const real* hostSrc, const int64_t* seq); - - void copyFrom(const Matrix& src, hl_stream_t stream); - - void copyFrom(const Matrix& src); - - void copyFrom(const IVector& src); - - void copyByRowIndex(Matrix& b, const IVector& rowIndex); - - MatrixPtr clone(size_t height, size_t width, bool useGpu = false); - - real getElement(size_t x, size_t y) const; - - real* getRow(size_t row) { return BaseMatrix::rowBuf(row); } - virtual real* getRowBuf(size_t row) { return getRow(row); } - - real getSum(); - void accumulateColSum(Matrix& src); - real getAbsSum(); - - MatrixPtr getTranspose(); - void transpose(MatrixPtr matTrans, bool memAlloc); - - MatrixPtr getInverse(); - void inverse(MatrixPtr matInv, bool memAlloc); - - /// add b to each sample of this. - void addBias(Matrix& b, real scale); - void addSharedBias(Matrix& b, real scale); - - /** - * @code - * add each sample from a to this. - * @endcode - */ - void collectBias(Matrix& a, real scale); - void collectSharedBias(Matrix& a, real scale); - - void sequenceAvgForward(Matrix& a, const IVector& startsPos, int mode); - - /** - * @code - * this.row[i] += table.row[ids[i]] - * @endcode - */ - virtual void selectRows(Matrix& table, IVector& ids); - - /** - * @code - * this[i] = table[i, id[i]] - * @endcode - */ - virtual void selectElements(Matrix& table, IVector& ids); - - /** - * @code - * table.row[ids[i]] += this.row[i] - * @endcode - */ - virtual void addToRows(Matrix& table, IVector& ids); - - void addColumnVector(const Matrix& b); - - /** - * @code - * this = scaleAB*(a*b) + scaleT*this - * @endcode - */ - void mul(const Matrix& a, const Matrix& b, real scaleAB, real scaleT); - - /** - * @code - * this = a*b - * @endcode - */ - void mul(const Matrix& a, const Matrix& b); - - void mul(const GpuMatrix& a, const GpuMatrix& b, real scaleAB, real scaleT); - - void mul(const GpuSparseMatrix& a, - const GpuMatrix& b, - real scaleAB, - real scaleT); - - void mul(const GpuMatrix& a, - const GpuSparseMatrix& b, - real scaleAB, - real scaleT); - - /** - * @code - * this = scaleAB*(this*b) + scaleT*this - * @endcode - */ - void rightMul(Matrix& b, real scaleAB, real scaleT); - - /** - * @code - * this = this* b - * @endcode - */ - void rightMul(Matrix& b); - - /** - * @code - * this = scaleAB*(a*this) + scaleT*this - * @endcode - */ - void leftMul(Matrix& a, real scaleAB, real scaleT); - - /** - * @code - * this = a*this - * @endcode - */ - void leftMul(Matrix& a); - - void colMerge(Matrix& src); - void rowSum(Matrix& sum); - void rowMax(Matrix& max); - void rowMax(IVector& maxIds, Matrix& max); - void colMax(Matrix& max); - void colMax(IVector& maxIds, Matrix& max); - void maxoutForward(Matrix& a, IVector& id, size_t channels, size_t groups); - void maxoutBackward(Matrix& a, IVector& id, size_t channels, size_t groups); - - void oneHotCrossEntropy(Matrix& output, IVector& label); - void oneHotCrossEntropyBp(Matrix& outputV, IVector& label); - void oneHotCrossEntropyWithSelfNorm(Matrix& output, - IVector& label, - real alpha); - void oneHotCrossEntropyWithSelfNormBp(Matrix& outputV, - IVector& label, - real alpha); - - void softmax(Matrix& output); - void sequenceSoftmax(Matrix& output, const IVector& index); - void softmaxBackward(Matrix& outputV); - void softmaxDerivative(Matrix& output, Matrix& sftmaxSum); - - /// calculate the sum of squares diff cost. - void sumOfSquares(Matrix& output, Matrix& label); - - /// gradient of sumOfSquares. - void sumOfSquaresBp(Matrix& outputV, Matrix& label); - void tanh(Matrix& output); - void tanhDerivative(Matrix& output); - void softrelu(Matrix& output); - void softreluDerivative(Matrix& output); - void scaledTanh(Matrix& output, real p1, real p2); - - void cosSim(Matrix& output1, Matrix& output2, real scale); - void cosSimDerivative(Matrix& output, - Matrix& prevOut1, - Matrix& prevOut2, - Matrix& prevGrad1, - Matrix& prevGrad2, - real scale); - - virtual void print(std::ostream& os) const; - virtual void print(std::ostream& os, size_t height, size_t width) const; - - void paramReluForward(Matrix& data, Matrix& W); - void paramReluBackwardW(Matrix& oGrad, Matrix& data); - void paramReluBackwardDiff(Matrix& oGrad, Matrix& data, Matrix& W); - - void check(std::ostream& os, Matrix& refMat, bool printDiff = true); - void randomizeUniform(); - - void classificationError(Matrix& output, IVector& label); - - void convExpand(Matrix& feature, - int feaImgHeight, - int feaImgWidth, - int channels, - int blockH, - int blockW, - int strideH, - int strideW, - int paddingH, - int paddingW, - int outputH, - int outputW); - - void convShrink(Matrix& expandColMat, - int thisImgHeight, - int thisImgWidth, - int channels, - int blockH, - int blochW, - int strideH, - int strideW, - int paddingH, - int paddingWreal, - int outputH, - int outputW, - real alpha = 1.0f, - real beta = 0.0f); - - void maxPoolForward(Matrix& inputMat, - size_t imgSizeH, - size_t imgSizeW, - size_t channels, - size_t sizeX, - size_t sizeY, - size_t strideH, - size_t strideW, - size_t outputH, - size_t outputW, - size_t paddingH, - size_t paddingW); - - void maxPoolBackward(Matrix& image, - size_t imgSizeH, - size_t imgSizeW, - Matrix& outGrad, - Matrix& outV, - size_t sizeX, - size_t sizeY, - size_t strideH, - size_t strideW, - size_t outputH, - size_t outputW, - real scaleTargets, - real scaleOutput, - size_t paddingH, - size_t paddingW); - - void avgPoolForward(Matrix& input, - size_t imgSizeH, - size_t imgSizeW, - size_t channels, - size_t sizeX, - size_t sizeY, - size_t strideH, - size_t strideW, - size_t outputH, - size_t outputW, - size_t paddingH, - size_t paddingW); - - void avgPoolBackward(Matrix& input, - size_t imgSizeH, - size_t imgSizeW, - size_t sizeX, - size_t sizeY, - size_t strideH, - size_t strideW, - size_t outputH, - size_t outputW, - real scaleTargets, - real scaleOutput, - size_t paddingH, - size_t paddingW); - - void maxSequenceForward(Matrix& input, - const IVector& sequence, - IVector& index); - - void maxSequenceBackward(Matrix& outputGrad, - const IVector& sequence, - IVector& index); - - void bilinearForward(const Matrix& in, - const size_t inImgH, - const size_t inImgW, - const size_t outImgH, - const size_t outImgW, - const size_t numChannels, - const real ratioH, - const real ratioW); - - void bilinearBackward(const Matrix& out, - const size_t outImgH, - const size_t outImgW, - const size_t inImgH, - const size_t inImgW, - const size_t numChannels, - const real ratioH, - const real ratioW); - - void multiBinaryLabelCrossEntropy(Matrix& output, Matrix& label); - - void multiBinaryLabelCrossEntropyBp(Matrix& output, Matrix& label); - - template - void operator=(const ExpressionType& expr) { - TensorGpuApply(*this, expr); - } -}; - -class CpuMatrix : public Matrix { -public: - CpuMatrix(size_t height, size_t width, bool trans = false); - CpuMatrix(real* data, size_t height, size_t width, bool trans = false) - : Matrix(data, height, width, trans, false) {} - CpuMatrix(real* data, - size_t height, - size_t width, - size_t stride, - bool trans = false) - : Matrix(data, height, width, stride, trans, false) {} - - CpuMatrix(CpuMemHandlePtr dataHandle, - size_t height, - size_t width, - bool trans = false) - : Matrix(dataHandle, height, width, trans, false) {} - - ~CpuMatrix(); - - void zeroMem(); - void resetOne(); - void setDiag(real value); - - void resize(size_t newHeight, size_t newWidth); - void resize(size_t newHeight, - size_t newWidth, - size_t newNnz, /* used to allocate space */ - SparseValueType valueType, - SparseFormat format) { - LOG(FATAL) << "Only Support Sparse Matrix"; - } - void setRow(size_t row, - size_t colNum, - const unsigned int* cols, - const real* values) { - LOG(FATAL) << "Only Support Sparse Matrix"; - } - - real getElement(size_t x, size_t y) const; - real getSum(); - void accumulateColSum(Matrix& src); - real getAbsSum(); - - MatrixPtr getTranspose(); - void transpose(MatrixPtr matTrans, bool memAlloc); - - MatrixPtr getInverse(); - void inverse(MatrixPtr matInv, bool memAlloc); - - void copyFrom(const Matrix& src); - - void copyFrom(const Matrix& src, hl_stream_t stream); - - void copyFrom(const real* cpuSrc, size_t size); - - void copyFrom(const real* cpuSrc, const int64_t* seq); - - void copyFrom(const IVector& src); - - void copyFrom(CpuSparseMatrix& src); - - void copyByRowIndex(Matrix& b, const IVector& rowIndex); - - MatrixPtr clone(size_t height, size_t width, bool useGpu = false); - - void convExpand(Matrix& feature, - int feaImgHeight, - int feaImgWidth, - int channels, - int blcokH, - int blockW, - int strideH, - int strideW, - int paddingH, - int paddingW, - int outputH, - int outputW); - - void convShrink(Matrix& expandFeat, - int thisImgHeight, - int thisImgWidth, - int channels, - int blockH, - int blockW, - int strideH, - int strideW, - int paddingH, - int paddingW, - int outputH, - int outputW, - real alpha = 1.0f, - real beta = 0.0f); - - void maxPoolForward(Matrix& inputMat, - size_t imgSizeH, - size_t imgSizeW, - size_t channels, - size_t sizeX, - size_t sizeY, - size_t strideH, - size_t strideW, - size_t outputH, - size_t outputW, - size_t paddingH, - size_t paddingW); - - void maxPoolBackward(Matrix& image, - size_t imgSizeH, - size_t imgSizeW, - Matrix& outGrad, - Matrix& outV, - size_t sizeX, - size_t sizeY, - size_t strideH, - size_t strideW, - size_t outputH, - size_t outputW, - real scaleTargets, - real scaleOutput, - size_t paddingH, - size_t paddingW); - - void avgPoolForward(Matrix& input, - size_t imgSizeH, - size_t imgSizeW, - size_t channels, - size_t sizeX, - size_t sizeY, - size_t strideH, - size_t strideW, - size_t outputH, - size_t outputW, - size_t paddingH, - size_t paddingW); - - void avgPoolBackward(Matrix& input, - size_t imgSizeH, - size_t imgSizeW, - size_t sizeX, - size_t sizeY, - size_t strideH, - size_t strideW, - size_t outputH, - size_t outputW, - real scaleTargets, - real scaleOutput, - size_t paddingH, - size_t paddingW); - - void maxSequenceForward(Matrix& input, - const IVector& sequence, - IVector& index); - - void maxSequenceBackward(Matrix& outputGrad, - const IVector& sequence, - IVector& index); - - real* getRow(size_t row) { return BaseMatrix::rowBuf(row); } - virtual real* getRowBuf(size_t row) { return getRow(row); } - -public: - /// add b to each sample of this. - void addBias(Matrix& b, real scale); - void addSharedBias(Matrix& b, real scale); - - /// add each sample of a to this. - void collectBias(Matrix& a, real scale); - void collectSharedBias(Matrix& a, real scale); - - void sequenceAvgForward(Matrix& a, const IVector& startsPos, int mode); - - /** - * @code - * this.row[i] += table.row[ids[i]] - * @endcode - */ - virtual void selectRows(Matrix& table, IVector& ids); - - /** - * @code - * table.row[ids[i]] += this.row[i] - * @endcode - */ - virtual void addToRows(Matrix& table, IVector& ids); - - /** - * @code - * this[i] = table[i, id[i]] - * @endcode - */ - virtual void selectElements(Matrix& table, IVector& ids); - - /** - * @code - * table[i, id[i]] += this[i] - * @endcode - */ - virtual void addElements(Matrix& table, IVector& ids); - - /** - * use abstract getRow() to get row from table. - * - * Define table as template instead of virtual class for performance sake. - * internal used by above two virtual funcs. - */ - template - void selectRowsImp(TableMatType& table, IVector& ids); - template - void addToRowsImp(TableMatType& table, IVector& ids); - - void addColumnVector(const Matrix& b); - - void mul(const Matrix& a, const Matrix& b, real scaleAB, real scaleT); - void mul(CpuMatrix* a, CpuMatrix* b, real scaleAB, real scaleT); - - void mul(CpuMatrix* a, CpuSparseMatrix* b, real scaleAB, real scaleT); - - static void mul(CpuMatrix* a, - CpuMatrix* b, - CpuSparseMatrix* c, - real scaleAB, - real scaleT); - - /** - * c = a * b - * - * use abstract getRow() to get row from B,C. - * Define B,C as template instead of virtual class for performance sake. - */ - template - static void mul( - CpuSparseMatrix* a, MatBType* b, MatCType* c, real scaleAB, real scaleT); - - virtual void mul(CpuSparseMatrix* a, CpuMatrix* b, real scaleAB, real scaleT); - - void mul(const Matrix& a, const Matrix& b); - - void rightMul(Matrix& b, real scaleAB, real scaleT); - void rightMul(Matrix& b); - - void leftMul(Matrix& a, real scaleAB, real scaleT); - void leftMul(Matrix& a); - void colMerge(Matrix& src); - void rowSum(Matrix& sum); - void rowMaxId(IVector& maxIds); - void rowMax(Matrix& max); - void rowMax(IVector& maxIds, Matrix& maxVal); - void colMax(Matrix& max); - void colMax(IVector& maxIds, Matrix& maxVal); - void maxoutForward(Matrix& a, IVector& id, size_t channels, size_t groups); - void maxoutBackward(Matrix& a, IVector& id, size_t channels, size_t groups); - void rowNormalizeL1(Matrix& out); - - void oneHotCrossEntropy(Matrix& output, IVector& label); - void oneHotCrossEntropyBp(Matrix& outputV, IVector& label); - void oneHotCrossEntropyWithSelfNorm(Matrix& output, - IVector& label, - real alpha); - void oneHotCrossEntropyWithSelfNormBp(Matrix& outputV, - IVector& label, - real alpha); - - void circularConv(Matrix& b, Matrix& c); - void circularConvDerivative(Matrix& output, - Matrix& prevOut1, - Matrix& prevOut2, - Matrix& prevGrad1, - Matrix& prevGrad2); - - void softmax(Matrix& output); - void sequenceSoftmax(Matrix& output, const IVector& index); - void softmaxDerivative(Matrix& output, Matrix& sftmaxSum); - - /// calculate the sum of squares diff cost. - void sumOfSquares(Matrix& output, Matrix& label); - - /// gradient of sumOfSquares. - void sumOfSquaresBp(Matrix& outputV, Matrix& label); - - void tanh(Matrix& output); - void tanhDerivative(Matrix& output); - - void softrelu(Matrix& output); - void softreluDerivative(Matrix& output); - void scaledTanh(Matrix& output, real p1, real p2); - - void cosSim(Matrix& output1, Matrix& output2, real scale); - void cosSimDerivative(Matrix& output, - Matrix& prevOut1, - Matrix& prevOut2, - Matrix& prevGrad1, - Matrix& prevGrad2, - real scale); - - void print(std::ostream& os) const; - void print(std::ostream& os, size_t height, size_t width) const; - void printOneRow(std::ostream& os, size_t idx) const; - - void paramReluForward(Matrix& data, Matrix& W); - void paramReluBackwardW(Matrix& oGrad, Matrix& data); - void paramReluBackwardDiff(Matrix& oGrad, Matrix& data, Matrix& W); - - void check(std::ostream& os, Matrix& refMat, bool printDiff = true); - - real getMin(); - real getMax(); - - void randomizeUniform(); - - void classificationError(Matrix& output, IVector& label); - - void addByBitCode(size_t numClasses, const IVector& codes, const Matrix& vec); - - void addByBitCodeBackward(size_t numClasses, - const IVector& codes, - Matrix& vec); - - void mulByBitCode(size_t numClasses, - const IVector& codes, - const Matrix& mat, - const Matrix& input); - - void mulByBitCodeBackwardWeight(size_t numClasses, - const IVector& codes, - Matrix& mat, - const Matrix& input); - - void mulByBitCodeBackwardError(size_t numClasses, - const IVector& codes, - const Matrix& mat, - Matrix& input); - - void sumByBitCode(size_t numClasses, - IVector& codes, - Matrix& sum, - real scaleSum); - - void subByBitCode(size_t numClasses_, IVector& codes); - - void multiBinaryLabelCrossEntropy(Matrix& output, Matrix& label); - void multiBinaryLabelCrossEntropyBp(Matrix& output, Matrix& label); - void classificationErrorMulti(Matrix& output, Matrix& label, real threshold); - - void bilinearForward(const Matrix& in, - const size_t inImgH, - const size_t inImgW, - const size_t outImgH, - const size_t outImgW, - const size_t numChannels, - const real ratioH, - const real ratioW); - - void bilinearBackward(const Matrix& out, - const size_t outImgH, - const size_t outImgW, - const size_t inImgH, - const size_t inImgW, - const size_t numChannels, - const real ratioH, - const real ratioW); - - template - void operator=(const ExpressionType& expr) { - TensorCpuApply(*this, expr); - } -}; - -class SharedCpuMatrix : public CpuMatrix { -public: - /* blockNum is number of partitions of the matrix */ - SharedCpuMatrix(int blockNum, size_t height, size_t width, bool trans = false) - : CpuMatrix(height, width, trans) { - initShared(blockNum); - } - SharedCpuMatrix( - int blockNum, real* data, size_t height, size_t width, bool trans = false) - : CpuMatrix(data, height, width, trans) { - initShared(blockNum); - } - - SharedCpuMatrix(int blockNum, - CpuMemHandlePtr dataHandle, - size_t height, - size_t width, - bool trans = false) - : CpuMatrix(dataHandle, height, width, trans) { - initShared(blockNum); - } - - SharedCpuMatrix(CpuMemHandlePtr dataHandle, - size_t height, - size_t width, - bool trans = false) - : CpuMatrix(dataHandle, height, width, trans) { - initBlock(1); - } - - ~SharedCpuMatrix() {} - -public: - virtual void mul(CpuSparseMatrix* a, CpuMatrix* b, real scaleAB, real scaleT); - virtual void add(Matrix& b, real p1, real p2); - virtual void add(real p1, real p2); - -private: - using Matrix::mul; - void initShared(int blockNum); - void initBlock(int blockNum); - - int blockNum_; - std::vector> blockLocks_; - ThreadLocal localBuf_; - ThreadLocal> localBufRows_; - ThreadLocal> blockSeq_; -}; - -typedef struct { unsigned int col; } sparse_non_value_t; - -typedef struct { - unsigned int col; - float value; -} sparse_float_value_t; - -} // namespace paddle -#include "ExecViaCpu.h" From df9be2d483cc3073e7b8680c1f687654710d2865 Mon Sep 17 00:00:00 2001 From: hedaoyuan Date: Mon, 9 Jan 2017 22:57:29 +0800 Subject: [PATCH 10/11] fix CrossMapNormalFunc and ContextProjectionFunc(remove inouts argument) --- paddle/function/BufferArg.h | 78 +++++++++++-------- paddle/function/ContextProjectionOp.cpp | 13 ++-- paddle/function/CrossMapNormalOp.cpp | 19 +++-- paddle/function/Function.cpp | 14 ++-- paddle/function/Function.h | 16 ++-- paddle/gserver/layers/ContextProjection.cpp | 10 +-- paddle/gserver/layers/NormProjectionLayer.cpp | 33 ++++---- 7 files changed, 98 insertions(+), 85 deletions(-) diff --git a/paddle/function/BufferArg.h b/paddle/function/BufferArg.h index 3d28249f69c2b..6576d18dae99e 100644 --- a/paddle/function/BufferArg.h +++ b/paddle/function/BufferArg.h @@ -57,58 +57,67 @@ typedef std::shared_ptr BufferArgPtr; * output Buffer or added to the output Buffer is determined by the * argType_ property of the output BufferArg. */ + +// ArgType is only used by output BufferArg. +// For input argument, argType_ is ignored. +// For output argument, need to set the argType_ of the BufferArg. +enum ArgType { + UNSPECIFIED = 0, + ASSIGN_TO = 1, + ADD_TO = 2, +}; class BufferArg { public: - // ArgType is only used by output BufferArg. - // For input argument, argType_ is ignored. - // For output argument, need to set the argType_ of the BufferArg. - enum ArgType { - UNSPECIFIED = 0, - ASSIGN_TO = 1, - ADD_TO = 2, - }; - void setArgType(ArgType argType) { argType_ = argType; } ArgType getArgType() const { return argType_; } public: - BufferArg(void* buf, ValueType valueType, const TensorShape& shape) - : buf_(buf), valueType_(valueType), shape_(shape) {} + BufferArg(void* buf, + ValueType valueType, + const TensorShape& shape, + ArgType argType = UNSPECIFIED) + : buf_(buf), valueType_(valueType), shape_(shape), argType_(argType) {} BufferArg(void* buf, ValueType valueType) : buf_(buf), valueType_(valueType) {} - BufferArg(const Matrix& matrix) + BufferArg(const Matrix& matrix, ArgType argType = UNSPECIFIED) : buf_( const_cast(reinterpret_cast(matrix.getData()))), valueType_(DataType::value), - shape_(2) { + shape_(2), + argType_(argType) { shape_.setDim(0, matrix.getHeight()); shape_.setDim(1, matrix.getWidth()); } - BufferArg(const Matrix& matrix, const TensorShape& shape) + BufferArg(const Matrix& matrix, + const TensorShape& shape, + ArgType argType = UNSPECIFIED) : buf_( const_cast(reinterpret_cast(matrix.getData()))), valueType_(DataType::value), - shape_(shape) { + shape_(shape), + argType_(argType) { CHECK_EQ(matrix.getElementCnt(), shape.getElements()); } - BufferArg(const Vector& vector) + BufferArg(const Vector& vector, ArgType argType = UNSPECIFIED) : buf_( const_cast(reinterpret_cast(vector.getData()))), valueType_(DataType::value), - shape_(1) { + shape_(1), + argType_(argType) { shape_.setDim(0, vector.getSize()); } - BufferArg(const IVector& vector) + BufferArg(const IVector& vector, ArgType argType = UNSPECIFIED) : buf_( const_cast(reinterpret_cast(vector.getData()))), valueType_(VALUE_TYPE_INT32), - shape_(1) { + shape_(1), + argType_(argType) { shape_.setDim(0, vector.getSize()); } @@ -163,8 +172,10 @@ class BufferArg { // if a < b then value_.buf_[a] < value_.buf_[b] class SequenceIdArg : public BufferArg { public: - SequenceIdArg(void* buf, const TensorShape& shape) - : BufferArg(buf, VALUE_TYPE_INT32, shape) { + SequenceIdArg(void* buf, + const TensorShape& shape, + ArgType argType = UNSPECIFIED) + : BufferArg(buf, VALUE_TYPE_INT32, shape, argType) { CHECK_EQ(shape_.ndims(), 1); numSeqs_ = shape_[0] - 1; } @@ -187,11 +198,15 @@ class SequenceArg : public BufferArg { SequenceArg(void* buf, ValueType valueType, const TensorShape& shape, - const SequenceIdArg& startPositions) - : BufferArg(buf, valueType, shape), startPositions_(startPositions) {} + const SequenceIdArg& startPositions, + ArgType argType = UNSPECIFIED) + : BufferArg(buf, valueType, shape, argType), + startPositions_(startPositions) {} - SequenceArg(const Matrix& matrix, const IVector& vector) - : BufferArg(matrix), startPositions_(vector) {} + SequenceArg(const Matrix& matrix, + const IVector& vector, + ArgType argType = UNSPECIFIED) + : BufferArg(matrix, argType), startPositions_(vector) {} ~SequenceArg() {} @@ -214,8 +229,9 @@ class SparseMatrixArg : public BufferArg { const BufferArg& col, size_t nnz, SparseDataFormat format, - SparseDataType type) - : BufferArg(buf, valueType, shape), + SparseDataType type, + ArgType argType = UNSPECIFIED) + : BufferArg(buf, valueType, shape, argType), row_(row), col_(col), nnz_(nnz), @@ -232,13 +248,13 @@ class SparseMatrixArg : public BufferArg { } } - SparseMatrixArg(const CpuSparseMatrix& sparse) - : BufferArg(sparse), + SparseMatrixArg(const CpuSparseMatrix& sparse, ArgType argType = UNSPECIFIED) + : BufferArg(sparse, argType), row_(reinterpret_cast(sparse.getRows()), VALUE_TYPE_INT32), col_(reinterpret_cast(sparse.getCols()), VALUE_TYPE_INT32) {} - SparseMatrixArg(const GpuSparseMatrix& sparse) - : BufferArg(sparse), + SparseMatrixArg(const GpuSparseMatrix& sparse, ArgType argType = UNSPECIFIED) + : BufferArg(sparse, argType), row_(reinterpret_cast(sparse.getRows()), VALUE_TYPE_INT32), col_(reinterpret_cast(sparse.getCols()), VALUE_TYPE_INT32) {} diff --git a/paddle/function/ContextProjectionOp.cpp b/paddle/function/ContextProjectionOp.cpp index 1a483c47953b1..b50098c52123a 100644 --- a/paddle/function/ContextProjectionOp.cpp +++ b/paddle/function/ContextProjectionOp.cpp @@ -84,12 +84,9 @@ class ContextProjectionForwardFunc : public FunctionBase { begin_pad_ = config.get("begin_pad"); } - void calc(const BufferArgs& inputs, - const BufferArgs& outputs, - const BufferArgs& inouts) override { + void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { CHECK_EQ(3, inputs.size()); CHECK_EQ(1, outputs.size()); - CHECK_EQ(0, inouts.size()); CHECK(outputs[0].data() && inputs[0].data() && inputs[2].data()); CHECK_EQ(outputs[0].shape().ndims(), 2); @@ -103,6 +100,7 @@ class ContextProjectionForwardFunc : public FunctionBase { /// input and output has the same batch_size CHECK_EQ(inputs[0].shape()[0], outputs[0].shape()[0]); + CHECK_EQ(outputs[0].getArgType(), ADD_TO); auto out_mat = outputs[0].matrix(); auto in_mat = inputs[0].matrix(); auto w_mat = !inputs[1].data() @@ -194,12 +192,9 @@ class ContextProjectionBackwardFunc : public FunctionBase { total_pad_ = config.get("total_pad"); } - void calc(const BufferArgs& inputs, - const BufferArgs& outputs, - const BufferArgs& inouts) override { + void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { CHECK_EQ(3, inputs.size()); CHECK_EQ(1, outputs.size()); - CHECK_EQ(0, inouts.size()); CHECK(outputs[0].data() && inputs[2].data()); CHECK_EQ(outputs[0].shape().ndims(), 2); @@ -214,6 +209,8 @@ class ContextProjectionBackwardFunc : public FunctionBase { /// dim of output = dim of input * context_length CHECK_EQ(outputs[0].shape()[1], inputs[0].shape()[1] * context_length_); + CHECK_EQ(outputs[0].getArgType(), ADD_TO); + auto out_grad_mat = outputs[0].matrix(); auto in_grad_mat = !inputs[0].data() ? typename Tensor::Matrix(nullptr, 0, 0) diff --git a/paddle/function/CrossMapNormalOp.cpp b/paddle/function/CrossMapNormalOp.cpp index ec27db9c21296..23ee357a53d0d 100644 --- a/paddle/function/CrossMapNormalOp.cpp +++ b/paddle/function/CrossMapNormalOp.cpp @@ -112,6 +112,8 @@ void CrossMapNormalGrad(real* inputsGrad, } /** + * \brief {o_0, o_1} = calc(i_0) + * * \param inputs[0] input value. * \param outputs[0] output value. * \param outputs[1] denoms. @@ -125,17 +127,16 @@ class CrossMapNormalFunc : public FunctionBase { pow_ = config.get("pow"); } - void calc(const BufferArgs& inputs, - const BufferArgs& outputs, - const BufferArgs& inouts) override { + void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { CHECK_EQ(1, inputs.size()); CHECK_EQ(2, outputs.size()); - CHECK_EQ(0, inouts.size()); CHECK_EQ(inputs[0].shape().ndims(), 4); CHECK(inputs[0].shape() == outputs[0].shape()); CHECK(inputs[0].shape() == outputs[1].shape()); + CHECK_EQ(outputs[0].getArgType(), ASSIGN_TO); + CHECK_EQ(outputs[1].getArgType(), ASSIGN_TO); size_t samples = inputs[0].shape()[0]; size_t channels = inputs[0].shape()[1]; size_t height = inputs[0].shape()[2]; @@ -160,6 +161,8 @@ class CrossMapNormalFunc : public FunctionBase { }; /** + * \brief {o_0} = calc(i_0, i_1, i_2, i_3) + * * \param inputs[0] input value. * \param inputs[1] output value. * \param inputs[2] output grad. @@ -175,12 +178,9 @@ class CrossMapNormalGradFunc : public FunctionBase { pow_ = config.get("pow"); } - void calc(const BufferArgs& inputs, - const BufferArgs& outputs, - const BufferArgs& inouts) override { + void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { CHECK_EQ(4, inputs.size()); CHECK_EQ(1, outputs.size()); - CHECK_EQ(0, inouts.size()); CHECK_EQ(inputs[0].shape().ndims(), 4); CHECK(inputs[0].shape() == inputs[1].shape()); @@ -188,6 +188,9 @@ class CrossMapNormalGradFunc : public FunctionBase { CHECK(inputs[0].shape() == inputs[3].shape()); CHECK(inputs[0].shape() == outputs[0].shape()); + // TODO(hedaoyuan): need support ASSIGN_TO mode. + CHECK_EQ(outputs[0].getArgType(), ADD_TO); + size_t samples = inputs[0].shape()[0]; size_t channels = inputs[0].shape()[1]; size_t height = inputs[0].shape()[2]; diff --git a/paddle/function/Function.cpp b/paddle/function/Function.cpp index 2f56cfc1b5492..46af4e946258a 100644 --- a/paddle/function/Function.cpp +++ b/paddle/function/Function.cpp @@ -72,16 +72,18 @@ FuncConfig& FuncConfig::set(const std::string& key, bool v) { return *this; } -void BufferArgs::addArg(const Matrix& arg, const TensorShape& shape) { - args_.push_back(std::make_shared(arg, shape)); +void BufferArgs::addArg(const Matrix& arg, + const TensorShape& shape, + ArgType argType) { + args_.push_back(std::make_shared(arg, shape, argType)); } -void BufferArgs::addArg(const CpuSparseMatrix& arg) { - args_.push_back(std::make_shared(arg)); +void BufferArgs::addArg(const CpuSparseMatrix& arg, ArgType argType) { + args_.push_back(std::make_shared(arg, argType)); } -void BufferArgs::addArg(const GpuSparseMatrix& arg) { - args_.push_back(std::make_shared(arg)); +void BufferArgs::addArg(const GpuSparseMatrix& arg, ArgType argType) { + args_.push_back(std::make_shared(arg, argType)); } ClassRegistrar FunctionBase::funcRegistrar_; diff --git a/paddle/function/Function.h b/paddle/function/Function.h index 88d6824aa3939..249f8f9cfad58 100644 --- a/paddle/function/Function.h +++ b/paddle/function/Function.h @@ -49,7 +49,7 @@ class FuncConfig { /** * Argument type for Function::calc(). * A BufferArgs contains a set of BufferArg, - * because Function can have multiple inputs, outputs and inouts. + * because Function can have multiple inputs and outputs. */ class BufferArgs { public: @@ -58,9 +58,11 @@ class BufferArgs { // add argument into BufferArgs // Tensor can be Matrix, Vector, IVector. + // For inputs, do not need argType. + // For outputs, the argType needs to be specified as ASSIGN_TO or ADD_TO. template - void addArg(const Tensor& arg) { - args_.push_back(std::make_shared(arg)); + void addArg(const Tensor& arg, ArgType argType = UNSPECIFIED) { + args_.push_back(std::make_shared(arg, argType)); } // Add arg into BufferArgs and reshape the arg. @@ -68,10 +70,12 @@ class BufferArgs { // For example, arg represents an image buffer, // but Matrix can only represent a two-dimensional Tensor. // So need an extra argument to describe the shape of the image buffer. - void addArg(const Matrix& arg, const TensorShape& shape); + void addArg(const Matrix& arg, + const TensorShape& shape, + ArgType argType = UNSPECIFIED); - void addArg(const CpuSparseMatrix& arg); - void addArg(const GpuSparseMatrix& arg); + void addArg(const CpuSparseMatrix& arg, ArgType argType = UNSPECIFIED); + void addArg(const GpuSparseMatrix& arg, ArgType argType = UNSPECIFIED); // get argument const BufferArg& operator[](size_t num) const { diff --git a/paddle/gserver/layers/ContextProjection.cpp b/paddle/gserver/layers/ContextProjection.cpp index 26783a42cac42..04d06cf33fed1 100644 --- a/paddle/gserver/layers/ContextProjection.cpp +++ b/paddle/gserver/layers/ContextProjection.cpp @@ -122,14 +122,13 @@ void ContextProjection::forward() { BufferArgs inputs; BufferArgs outputs; - BufferArgs inouts; inputs.addArg(*in_->value); inputs.addArg(CpuMatrix(w_ptr ? w_ptr->getData() : nullptr, w_ptr ? w_ptr->getHeight() : 0, input_dim)); inputs.addArg(*in_->sequenceStartPositions->getVector(useGpu_)); - outputs.addArg(*out_->value); - forward_[0]->calc(inputs, outputs, inouts); + outputs.addArg(*out_->value, ADD_TO); + forward_[0]->calc(inputs, outputs); if (state_ && config_.context_start() < 0) { CHECK_EQ(1, in_->getNumSequences()); @@ -166,15 +165,14 @@ void ContextProjection::backward(const UpdateCallback& callback) { BufferArgs inputs; BufferArgs outputs; - BufferArgs inouts; inputs.addArg(CpuMatrix( in_->grad ? in_->grad->getData() : nullptr, batch_size, input_dim)); inputs.addArg(CpuMatrix(w_ptr ? w_ptr->getData() : nullptr, w_ptr ? w_ptr->getHeight() : 0, input_dim)); inputs.addArg(*in_->sequenceStartPositions->getVector(useGpu_)); - outputs.addArg(*out_->grad); - backward_[0]->calc(inputs, outputs, inouts); + outputs.addArg(*out_->grad, ADD_TO); + backward_[0]->calc(inputs, outputs); if (config_.trainable_padding()) { weight_->getParameterPtr()->incUpdate(callback); diff --git a/paddle/gserver/layers/NormProjectionLayer.cpp b/paddle/gserver/layers/NormProjectionLayer.cpp index 573de152fd0d5..4331009de7e98 100644 --- a/paddle/gserver/layers/NormProjectionLayer.cpp +++ b/paddle/gserver/layers/NormProjectionLayer.cpp @@ -59,7 +59,6 @@ bool CMRProjectionNormLayer::init(const LayerMap& layerMap, void CMRProjectionNormLayer::forward(PassType passType) { Layer::forward(passType); - /* malloc memory for the output_ if necessary */ /* note: one sample correspond to one row */ MatrixPtr input = inputLayers_[0]->getOutputValue(); @@ -67,42 +66,36 @@ void CMRProjectionNormLayer::forward(PassType passType) { int size = getSize(); resetOutput(batchSize, size); - MatrixPtr outV = getOutputValue(); - Matrix::resizeOrCreate(denoms_, batchSize, size, /* trans */ false, useGpu_); shape_ = TensorShape({batchSize, channels_, imgSizeH_, imgSizeW_}); + // prepare forward arguments BufferArgs inputs; BufferArgs outputs; - BufferArgs inouts; - inputs.addArg(*input, shape_); - outputs.addArg(*outV, shape_); - outputs.addArg(*denoms_, shape_); + inputs.addArg(*getInputValue(0), shape_); + outputs.addArg(*getOutputValue(), shape_, ASSIGN_TO); + outputs.addArg(*denoms_, shape_, ASSIGN_TO); - forward_[0]->calc(inputs, outputs, inouts); + forward_[0]->calc(inputs, outputs); } void CMRProjectionNormLayer::backward(const UpdateCallback& callback) { (void)callback; - if (NULL == inputLayers_[0]->getOutputGrad()) { + if (NULL == getInputGrad(0)) { return; } - /* Do derivation */ - MatrixPtr preOutGrad = inputLayers_[0]->getOutputGrad(); - MatrixPtr localGrad = getOutputGrad(); - MatrixPtr localOutV = getOutputValue(); - MatrixPtr preOutV = inputLayers_[0]->getOutputValue(); + // prepare backward arguments BufferArgs inputs; BufferArgs outputs; - BufferArgs inouts; - inputs.addArg(*preOutV, shape_); - inputs.addArg(*localOutV, shape_); - inputs.addArg(*localGrad, shape_); + inputs.addArg(*getInputValue(0), shape_); + inputs.addArg(*getOutputValue(), shape_); + inputs.addArg(*getOutputGrad(), shape_); inputs.addArg(*denoms_, shape_); - outputs.addArg(*preOutGrad, shape_); - backward_[0]->calc(inputs, outputs, inouts); + outputs.addArg(*getInputGrad(0), shape_, ADD_TO); + + backward_[0]->calc(inputs, outputs); } } // namespace paddle From ae4400beda6ce14e78d137ff60da4196f7e6c70c Mon Sep 17 00:00:00 2001 From: hedaoyuan Date: Tue, 10 Jan 2017 13:17:23 +0800 Subject: [PATCH 11/11] Bug fix for mac os --- paddle/function/BufferArg.h | 12 ++++++------ paddle/function/ContextProjectionOp.cpp | 8 ++++---- paddle/function/CrossMapNormalOp.cpp | 4 ++-- paddle/function/TensorShape.h | 4 ++-- 4 files changed, 14 insertions(+), 14 deletions(-) diff --git a/paddle/function/BufferArg.h b/paddle/function/BufferArg.h index 6576d18dae99e..9649913fa8d9b 100644 --- a/paddle/function/BufferArg.h +++ b/paddle/function/BufferArg.h @@ -126,7 +126,7 @@ class BufferArg { CHECK(buf_); CHECK(valueType_ == DataType::value); // CHECK(deviceType_ == DType); - CHECK_EQ(2, shape_.ndims()); + CHECK_EQ((size_t)2, shape_.ndims()); return typename Tensor::Matrix( reinterpret_cast(buf_), shape_[0], shape_[1]); } @@ -136,7 +136,7 @@ class BufferArg { CHECK(buf_); CHECK(valueType_ == DataType::value); // CHECK(deviceType_ == DType); - CHECK_EQ(1, shape_.ndims()); + CHECK_EQ((size_t)1, shape_.ndims()); return typename Tensor::Vector( shape_[0], reinterpret_cast(buf_)); } @@ -176,7 +176,7 @@ class SequenceIdArg : public BufferArg { const TensorShape& shape, ArgType argType = UNSPECIFIED) : BufferArg(buf, VALUE_TYPE_INT32, shape, argType) { - CHECK_EQ(shape_.ndims(), 1); + CHECK_EQ(shape_.ndims(), (size_t)1); numSeqs_ = shape_[0] - 1; } @@ -238,9 +238,9 @@ class SparseMatrixArg : public BufferArg { format_(format), type_(type) { CHECK((valueType == VALUE_TYPE_FLOAT) || (valueType == VALUE_TYPE_DOUBLE)); - CHECK_EQ(shape_.ndims(), 2); - CHECK_EQ(row_.shape().ndims(), 1); - CHECK_EQ(col_.shape().ndims(), 1); + CHECK_EQ(shape_.ndims(), (size_t)2); + CHECK_EQ(row_.shape().ndims(), (size_t)1); + CHECK_EQ(col_.shape().ndims(), (size_t)1); if (format == SPARSE_CSR_FORMAT) { CHECK_EQ(nnz, col.shape()[0]); } else if (format == SPARSE_CSC_FORMAT) { diff --git a/paddle/function/ContextProjectionOp.cpp b/paddle/function/ContextProjectionOp.cpp index ca7a11f93683f..cb448562ebb37 100644 --- a/paddle/function/ContextProjectionOp.cpp +++ b/paddle/function/ContextProjectionOp.cpp @@ -85,8 +85,8 @@ class ContextProjectionForwardFunc : public FunctionBase { } void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { - CHECK_EQ(3, inputs.size()); - CHECK_EQ(1, outputs.size()); + CHECK_EQ((size_t)3, inputs.size()); + CHECK_EQ((size_t)1, outputs.size()); CHECK(outputs[0].data() && inputs[0].data() && inputs[2].data()); CHECK_EQ(outputs[0].shape().ndims(), (size_t)2); @@ -193,8 +193,8 @@ class ContextProjectionBackwardFunc : public FunctionBase { } void calc(const BufferArgs& inputs, const BufferArgs& outputs) override { - CHECK_EQ(3, inputs.size()); - CHECK_EQ(1, outputs.size()); + CHECK_EQ((size_t)3, inputs.size()); + CHECK_EQ((size_t)1, outputs.size()); CHECK(outputs[0].data() && inputs[2].data()); CHECK_EQ(outputs[0].shape().ndims(), (size_t)2); diff --git a/paddle/function/CrossMapNormalOp.cpp b/paddle/function/CrossMapNormalOp.cpp index cf989468403d2..92980c503fdaa 100644 --- a/paddle/function/CrossMapNormalOp.cpp +++ b/paddle/function/CrossMapNormalOp.cpp @@ -131,7 +131,7 @@ class CrossMapNormalFunc : public FunctionBase { CHECK_EQ((size_t)1, inputs.size()); CHECK_EQ((size_t)2, outputs.size()); - CHECK_EQ(inputs[0].shape().ndims(), 4); + CHECK_EQ(inputs[0].shape().ndims(), (size_t)4); CHECK(inputs[0].shape() == outputs[0].shape()); CHECK(inputs[0].shape() == outputs[1].shape()); @@ -182,7 +182,7 @@ class CrossMapNormalGradFunc : public FunctionBase { CHECK_EQ((size_t)4, inputs.size()); CHECK_EQ((size_t)1, outputs.size()); - CHECK_EQ(inputs[0].shape().ndims(), 4); + CHECK_EQ(inputs[0].shape().ndims(), (size_t)4); CHECK(inputs[0].shape() == inputs[1].shape()); CHECK(inputs[0].shape() == inputs[2].shape()); CHECK(inputs[0].shape() == inputs[3].shape()); diff --git a/paddle/function/TensorShape.h b/paddle/function/TensorShape.h index 0333fe18316ba..e491e3f1d6b26 100644 --- a/paddle/function/TensorShape.h +++ b/paddle/function/TensorShape.h @@ -42,14 +42,14 @@ class TensorShape { // get the size of specified dimension size_t operator[](size_t dim) const { - CHECK_GE(dim, 0); + CHECK_GE(dim, (size_t)0); CHECK_LT(dim, ndims_); return dims_[dim]; } // set the size of specified dimension void setDim(size_t dim, size_t size) { - CHECK_GE(dim, 0); + CHECK_GE(dim, (size_t)0); CHECK_LT(dim, ndims_); dims_[dim] = size; numElements();