Skip to content

Commit

Permalink
Merge pull request #3501 from qingqing01/cross_entropy
Browse files Browse the repository at this point in the history
Implement GPU kernel for cross entropy operator.
  • Loading branch information
qingqing01 authored Aug 22, 2017
2 parents ce723af + a8863a8 commit f931140
Show file tree
Hide file tree
Showing 6 changed files with 140 additions and 23 deletions.
2 changes: 1 addition & 1 deletion paddle/framework/pybind.cc
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ limitations under the License. */
namespace py = pybind11;

USE_OP(add_two);
USE_CPU_ONLY_OP(onehot_cross_entropy);
USE_OP(onehot_cross_entropy);
USE_OP(sgd);
USE_OP(mul);
USE_OP(mean);
Expand Down
15 changes: 6 additions & 9 deletions paddle/operators/cross_entropy_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,10 @@ class OnehotCrossEntropyGradientOp : public framework::OperatorWithKernel {

protected:
void InferShape(const framework::InferShapeContext &ctx) const override {
auto X_grad = ctx.Output<Tensor>(framework::GradVarName("X"));
auto dX = ctx.Output<Tensor>(framework::GradVarName("X"));
auto X = ctx.Input<Tensor>("X");

// TODO(superjom) add enforce here after helper functions ready
X_grad->Resize(X->dims());
dX->Resize(X->dims());
}
};

Expand All @@ -70,9 +69,7 @@ namespace ops = paddle::operators;
REGISTER_OP(onehot_cross_entropy, ops::OnehotCrossEntropyOp,
ops::OnehotCrossEntropyOpMaker, onehot_cross_entropy_grad,
ops::OnehotCrossEntropyGradientOp);
REGISTER_OP_CPU_KERNEL(
onehot_cross_entropy,
ops::OnehotCrossEntropyOpKernel<paddle::platform::CPUPlace, float>);
REGISTER_OP_CPU_KERNEL(
onehot_cross_entropy_grad,
ops::OnehotCrossEntropyGradientOpKernel<paddle::platform::CPUPlace, float>);
REGISTER_OP_CPU_KERNEL(onehot_cross_entropy,
ops::OnehotCrossEntropyOpKernel<float>);
REGISTER_OP_CPU_KERNEL(onehot_cross_entropy_grad,
ops::OnehotCrossEntropyGradientOpKernel<float>);
122 changes: 117 additions & 5 deletions paddle/operators/cross_entropy_op.cu
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,122 @@
See the License for the specific language governing permissions and
limitations under the License. */

#define EIGEN_USE_GPU
#include "paddle/operators/cross_entropy_op.h"
#include "paddle/framework/op_registry.h"
#include "paddle/platform/assert.h"

namespace paddle {
namespace operators {

using Tensor = framework::Tensor;

template <typename T>
__host__ __device__ T clipping_log(const T x) {
PADDLE_ASSERT(std::is_floating_point<T>::value);
const T kApproInf = 1e20;
T v = log(x);
if (v == INFINITY) {
return kApproInf;
}
if (v == -INFINITY) {
return -kApproInf;
}
return v;
}

template <typename T>
__global__ void CrossEntropyKernel(T* Y, const T* X, const int* label,
const int N, const int D) {
// TOOD(qingqing) define CUDA_1D_KERNEL_LOOP macro in a common file.
// CUDA_1D_KERNEL_LOOP(i, N) {
for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < N;
i += blockDim.x * gridDim.x) {
PADDLE_ASSERT(label[i] >= 0 && label[i] < D);
Y[i] = -clipping_log(X[i * D + label[i]]);
}
}

// TODO(qingqing): make zero setting an common function.
template <typename T>
__global__ void zero(T* X, const int N) {
for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < N;
i += blockDim.x * gridDim.x) {
X[i] = 0.0;
}
}

template <typename T>
__global__ void CrossEntropyGradientKernel(T* dX, const T* dY, const T* X,
const int* label, const int N,
const int D) {
// TOOD(qingqing) define CUDA_1D_KERNEL_LOOP macro in a common file.
// CUDA_1D_KERNEL_LOOP(i, N) {
for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < N;
i += blockDim.x * gridDim.x) {
int idx = i * D + label[i];
dX[idx] = -dY[i] / X[idx];
}
}

template <typename T>
class OnehotCrossEntropyOpCUDAKernel : public framework::OpKernel {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()),
"It must use GPUPlace.");

auto X = ctx.Input<Tensor>("X");
const T* Xdata = X->data<T>();
const int* label_data = ctx.Input<Tensor>("label")->data<int>();
auto Y = ctx.Output<Tensor>("Y");
Y->mutable_data<T>(ctx.GetPlace());
T* Ydata = Y->data<T>();

int N = X->dims()[0];
int D = X->dims()[1];
int block = 512;
int grid = (N + block - 1) / block;
// TODO(qingqing) launch kernel on specified stream
// base on ExecutionContext.
CrossEntropyKernel<T><<<grid, block>>>(Ydata, Xdata, label_data, N, D);
}
};

template <typename T>
class OnehotCrossEntropyGradientOpCUDAKernel : public framework::OpKernel {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()),
"It must use GPUPlace.");

auto X = ctx.Input<Tensor>("X");
auto dX = ctx.Output<Tensor>(framework::GradVarName("X"));
auto dY = ctx.Input<Tensor>(framework::GradVarName("Y"));
auto label = ctx.Input<Tensor>("label");

auto* dXdata = dX->template mutable_data<T>(ctx.GetPlace());
auto* dYdata = dY->template data<T>();
auto* Xdata = X->template data<T>();
auto* label_data = label->data<int>();

int N = X->dims()[0];
int D = X->dims()[1];
int block = 512;
int grid = (N * D + block - 1) / block;
zero<T><<<grid, block>>>(dXdata, N * D);

grid = (N + block - 1) / block;
// TODO(qingqing): launch kernel on specified stream
// base on ExecutionContext.
CrossEntropyGradientKernel<T><<<grid, block>>>(dXdata, dYdata, Xdata,
label_data, N, D);
}
};

} // namespace operators
} // namespace paddle

namespace ops = paddle::operators;
REGISTER_OP_GPU_KERNEL(
onehot_cross_entropy,
ops::OnehotCrossEntropyOpKernel<paddle::platform::GPUPlace, float>);
REGISTER_OP_GPU_KERNEL(onehot_cross_entropy,
ops::OnehotCrossEntropyOpCUDAKernel<float>);
REGISTER_OP_GPU_KERNEL(onehot_cross_entropy_grad,
ops::OnehotCrossEntropyGradientOpCUDAKernel<float>);
14 changes: 11 additions & 3 deletions paddle/operators/cross_entropy_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ namespace operators {
using Tensor = framework::Tensor;

template <typename T>
T tolerable_value(T x) {
inline T tolerable_value(const T x) {
static_assert(std::is_floating_point<T>::value,
"tolerable_value works only on float, "
"double and double double.");
Expand All @@ -39,10 +39,13 @@ T tolerable_value(T x) {
return x;
}

template <typename Place, typename T>
template <typename T>
class OnehotCrossEntropyOpKernel : public framework::OpKernel {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
PADDLE_ENFORCE(platform::is_cpu_place(ctx.GetPlace()),
"It must use CPUPlace.");

auto X = ctx.Input<Tensor>("X");
const T* Xdata = X->data<T>();
const int* label_data = ctx.Input<Tensor>("label")->data<int>();
Expand All @@ -62,10 +65,13 @@ class OnehotCrossEntropyOpKernel : public framework::OpKernel {
}
};

template <typename Place, typename T>
template <typename T>
class OnehotCrossEntropyGradientOpKernel : public framework::OpKernel {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
PADDLE_ENFORCE(platform::is_cpu_place(ctx.GetPlace()),
"It must use CPUPlace.");

auto X = ctx.Input<Tensor>("X");
auto dX = ctx.Output<Tensor>(framework::GradVarName("X"));
auto dY = ctx.Input<Tensor>(framework::GradVarName("Y"));
Expand All @@ -79,6 +85,8 @@ class OnehotCrossEntropyGradientOpKernel : public framework::OpKernel {
const int batch_size = X->dims()[0];
const int class_num = X->dims()[1];

// TODO(qingqing): make zero setting an common function.
memset(dXdata, 0, sizeof(T) * batch_size * class_num);
for (int i = 0; i < batch_size; ++i) {
int index = i * class_num + label_data[i];
dXdata[index] = -tolerable_value(dYdata[i] / Xdata[index]);
Expand Down
3 changes: 2 additions & 1 deletion python/paddle/v2/framework/tests/op_test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,8 @@ def test_all(self):
actual = numpy.array(scope.find_var(out_name).get_tensor())
expect = self.outputs[out_name]
self.assertTrue(
numpy.allclose(actual, expect),
numpy.allclose(
actual, expect, atol=1e-05),
"output name: " + out_name + "has diff")

obj.test_all = test_all
Expand Down
7 changes: 3 additions & 4 deletions python/paddle/v2/framework/tests/test_cross_entropy_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,8 @@ class TestCrossEntropy(unittest.TestCase):
__metaclass__ = OpTestMeta

def setUp(self):
# TODO this unit test is not passed
self.type = "onehot_cross_entropy"
batch_size = 100
batch_size = 30
class_num = 10
X = numpy.random.random((batch_size, class_num)).astype("float32")
label = 5 * numpy.ones(batch_size).astype("int32")
Expand All @@ -22,9 +21,9 @@ def setUp(self):


class CrossEntropyGradOpTest(GradientChecker):
def test_softmax_grad(self):
def test_check_grad(self):
op = create_op("onehot_cross_entropy")
batch_size = 100
batch_size = 30
class_num = 10
inputs = {
"X": numpy.random.uniform(
Expand Down

0 comments on commit f931140

Please sign in to comment.