From 3a507b44bdf41f082145e8c028adfb976c8571ac Mon Sep 17 00:00:00 2001 From: chengduoZH Date: Mon, 13 Nov 2017 17:55:08 +0800 Subject: [PATCH 1/4] add conv3d_trans_cudnn_op --- paddle/operators/CMakeLists.txt | 33 +++++++++++-------- ...cudnn_op.cc => conv_transpose_cudnn_op.cc} | 11 +++++++ ...cudnn_op.cu => conv_transpose_cudnn_op.cu} | 5 +++ 3 files changed, 36 insertions(+), 13 deletions(-) rename paddle/operators/{conv2d_transpose_cudnn_op.cc => conv_transpose_cudnn_op.cc} (82%) rename paddle/operators/{conv2d_transpose_cudnn_op.cu => conv_transpose_cudnn_op.cu} (97%) diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt index 709f7de2e4309..71740b8b0c638 100644 --- a/paddle/operators/CMakeLists.txt +++ b/paddle/operators/CMakeLists.txt @@ -55,6 +55,18 @@ function(op_library TARGET) set(pybind_flag 1) endif() + if ("${TARGET}" STREQUAL "compare_op") + set(pybind_flag 1) + file(APPEND ${pybind_file} "USE_OP(less_than);\nUSE_OP(equal);\n") + endif() + + # conv_op contains several operators + if ("${TARGET}" STREQUAL "conv_op") + set(pybind_flag 1) + # It's enough to just adding one operator to pybind + file(APPEND ${pybind_file} "USE_OP(conv2d);\n") + endif() + # pool_op contains several operators if ("${TARGET}" STREQUAL "pool_op") set(pybind_flag 1) @@ -62,9 +74,11 @@ function(op_library TARGET) file(APPEND ${pybind_file} "USE_OP(pool2d);\n") endif() - if ("${TARGET}" STREQUAL "compare_op") + # pool_cudnn_op contains several operators + if ("${TARGET}" STREQUAL "pool_cudnn_op") set(pybind_flag 1) - file(APPEND ${pybind_file} "USE_OP(less_than);\nUSE_OP(equal);\n") + # It's enough to just adding one operator to pybind + file(APPEND ${pybind_file} "USE_OP(pool2d_cudnn);\n") endif() # pool_with_index_op contains several operators @@ -74,25 +88,18 @@ function(op_library TARGET) file(APPEND ${pybind_file} "USE_OP(max_pool2d_with_index);\n") endif() - # conv_op contains several operators - if ("${TARGET}" STREQUAL "conv_op") - set(pybind_flag 1) - # It's enough to just adding one operator to pybind - file(APPEND ${pybind_file} "USE_OP(conv2d);\n") - endif() - # conv_transpose_op contains several operators if ("${TARGET}" STREQUAL "conv_transpose_op") set(pybind_flag 1) # It's enough to just adding one operator to pybind file(APPEND ${pybind_file} "USE_OP(conv2d_transpose);\n") endif() - - # pool_cudnn_op contains several operators - if ("${TARGET}" STREQUAL "pool_cudnn_op") + + # conv_transpose_cudnn_op contains two operators + if ("${TARGET}" STREQUAL "conv_transpose_cudnn_op") set(pybind_flag 1) # It's enough to just adding one operator to pybind - file(APPEND ${pybind_file} "USE_OP(pool2d_cudnn);\n") + file(APPEND ${pybind_file} "USE_OP(conv2d_transpose_cudnn);\n") endif() # save_restore_op contains several operators diff --git a/paddle/operators/conv2d_transpose_cudnn_op.cc b/paddle/operators/conv_transpose_cudnn_op.cc similarity index 82% rename from paddle/operators/conv2d_transpose_cudnn_op.cc rename to paddle/operators/conv_transpose_cudnn_op.cc index fce1357ce5af5..7ec3319cd0cd4 100644 --- a/paddle/operators/conv2d_transpose_cudnn_op.cc +++ b/paddle/operators/conv_transpose_cudnn_op.cc @@ -48,3 +48,14 @@ REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL( conv2d_transpose_cudnn_grad, ops::GemmConvTransposeGradKernel); + +REGISTER_OP(conv3d_transpose_cudnn, ops::ConvTransposeOp, + ops::CudnnConv3DTransposeOpMaker, conv3d_transpose_cudnn_grad, + ops::ConvTransposeOpGrad); + +REGISTER_OP_CPU_KERNEL( + conv3d_transpose_cudnn, + ops::GemmConvTransposeKernel); +REGISTER_OP_CPU_KERNEL( + conv3d_transpose_cudnn_grad, + ops::GemmConvTransposeGradKernel); diff --git a/paddle/operators/conv2d_transpose_cudnn_op.cu b/paddle/operators/conv_transpose_cudnn_op.cu similarity index 97% rename from paddle/operators/conv2d_transpose_cudnn_op.cu rename to paddle/operators/conv_transpose_cudnn_op.cu index 694526ec01214..cd31896f2c06e 100644 --- a/paddle/operators/conv2d_transpose_cudnn_op.cu +++ b/paddle/operators/conv_transpose_cudnn_op.cu @@ -237,3 +237,8 @@ REGISTER_OP_GPU_KERNEL(conv2d_transpose_cudnn, ops::CudnnConvTransposeOpKernel); REGISTER_OP_GPU_KERNEL(conv2d_transpose_cudnn_grad, ops::CudnnConvTransposeGradOpKernel); + +REGISTER_OP_GPU_KERNEL(conv3d_transpose_cudnn, + ops::CudnnConvTransposeOpKernel); +REGISTER_OP_GPU_KERNEL(conv3d_transpose_cudnn_grad, + ops::CudnnConvTransposeGradOpKernel); From 6fb4bb8efea3c21ef33b8568069c1cbc2a38a381 Mon Sep 17 00:00:00 2001 From: chengduoZH Date: Mon, 13 Nov 2017 17:58:44 +0800 Subject: [PATCH 2/4] add conv3d_trans_cudnn_op unit test --- paddle/operators/conv_transpose_cudnn_op.cc | 19 ++++++++++++++++++- .../tests/test_conv3d_transpose_op.py | 6 ++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/paddle/operators/conv_transpose_cudnn_op.cc b/paddle/operators/conv_transpose_cudnn_op.cc index 7ec3319cd0cd4..dbd1bc3c3bc2d 100644 --- a/paddle/operators/conv_transpose_cudnn_op.cc +++ b/paddle/operators/conv_transpose_cudnn_op.cc @@ -23,7 +23,24 @@ class CudnnConv2DTransposeOpMaker : public Conv2DTransposeOpMaker { framework::OpAttrChecker* op_checker) : Conv2DTransposeOpMaker(proto, op_checker) { AddAttr>("dilations", "dilations of convolution operator.") - .SetDefault(std::vector{1, 1}); + .SetDefault({1, 1}); + AddAttr("workspace_size_MB", + "workspace size for cudnn, in MB, " + "workspace is a section of GPU memory which will be " + "allocated/freed each time the operator runs, larger " + "workspace size can increase performance but also requires " + "better hardward. This size should be carefully setted.") + .SetDefault(4096); + } +}; + +class CudnnConv3DTransposeOpMaker : public Conv3DTransposeOpMaker { + public: + CudnnConv3DTransposeOpMaker(framework::OpProto* proto, + framework::OpAttrChecker* op_checker) + : Conv3DTransposeOpMaker(proto, op_checker) { + AddAttr>("dilations", "dilations of convolution operator.") + .SetDefault({1, 1, 1}); AddAttr("workspace_size_MB", "workspace size for cudnn, in MB, " "workspace is a section of GPU memory which will be " diff --git a/python/paddle/v2/framework/tests/test_conv3d_transpose_op.py b/python/paddle/v2/framework/tests/test_conv3d_transpose_op.py index 132fe7931438a..73ee260c5abe4 100644 --- a/python/paddle/v2/framework/tests/test_conv3d_transpose_op.py +++ b/python/paddle/v2/framework/tests/test_conv3d_transpose_op.py @@ -93,5 +93,11 @@ def init_op_type(self): self.op_type = "conv3d_transpose" +# ------------ test_cudnn ------------ +class TestCudnn(TestConv3dTransposeOp): + def init_op_type(self): + self.op_type = "conv3d_transpose_cudnn" + + if __name__ == '__main__': unittest.main() From 74912c7d4ed83c78c4c3076d306fae3923c5432f Mon Sep 17 00:00:00 2001 From: chengduoZH Date: Wed, 15 Nov 2017 15:37:40 +0800 Subject: [PATCH 3/4] fix data layout --- paddle/operators/conv_transpose_cudnn_op.cu | 20 +++++++++++++------- paddle/platform/cudnn_helper.h | 13 +++++++------ 2 files changed, 20 insertions(+), 13 deletions(-) diff --git a/paddle/operators/conv_transpose_cudnn_op.cu b/paddle/operators/conv_transpose_cudnn_op.cu index cd31896f2c06e..00e0ec255dcb1 100644 --- a/paddle/operators/conv_transpose_cudnn_op.cu +++ b/paddle/operators/conv_transpose_cudnn_op.cu @@ -54,15 +54,21 @@ class CudnnConvTransposeOpKernel : public framework::OpKernel { ScopedTensorDescriptor output_desc; ScopedFilterDescriptor filter_desc; ScopedConvolutionDescriptor conv_desc; - DataLayout layout = DataLayout::kNCHW; + DataLayout layout; + + if (strides.size() == 2U) { + layout = DataLayout::kNCHW; + } else { + layout = DataLayout::kNCDHW; + } - // N, M, H, W + // (N, M, H, W) or (N, M, D, H, W) cudnnTensorDescriptor_t cudnn_input_desc = input_desc.descriptor( layout, framework::vectorize2int(input->dims())); - // N, C, O_h, O_w + // (N, C, O_h, O_w) or (N, C, O_d, O_h, O_w) cudnnTensorDescriptor_t cudnn_output_desc = output_desc.descriptor( layout, framework::vectorize2int(output->dims())); - // M, C, K_h, K_w + // (M, C, K_h, K_w) or (M, C, K_d, K_h, K_w) cudnnFilterDescriptor_t cudnn_filter_desc = filter_desc.descriptor( layout, framework::vectorize2int(filter->dims())); cudnnConvolutionDescriptor_t cudnn_conv_desc = @@ -136,13 +142,13 @@ class CudnnConvTransposeGradOpKernel : public framework::OpKernel { ScopedConvolutionDescriptor conv_desc; DataLayout layout = DataLayout::kNCHW; - // Input: (N, M, H, W) + // Input: (N, M, H, W) or (N, M, D, H, W) cudnnTensorDescriptor_t cudnn_input_desc = input_desc.descriptor( layout, framework::vectorize2int(input->dims())); - // Output: (N, C, O_H, O_W) + // Output: (N, C, O_h, O_w) or (N, C, O_d, O_h, O_w) cudnnTensorDescriptor_t cudnn_output_desc = output_desc.descriptor( layout, framework::vectorize2int(output_grad->dims())); - // Filter (M, C, K_H, K_W) + // Filter (M, C, K_h, K_w) or (M, C, K_d K_h, K_w) cudnnFilterDescriptor_t cudnn_filter_desc = filter_desc.descriptor( layout, framework::vectorize2int(filter->dims())); diff --git a/paddle/platform/cudnn_helper.h b/paddle/platform/cudnn_helper.h index ce3421a3cb840..8d75fceae8bfb 100644 --- a/paddle/platform/cudnn_helper.h +++ b/paddle/platform/cudnn_helper.h @@ -1,11 +1,8 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 - Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -63,9 +60,10 @@ inline const char* cudnnGetErrorString(cudnnStatus_t status) { } \ } while (false) -enum class DataLayout { +enum class DataLayout { // Not use kNHWC, kNCHW, + kNCDHW, kNCHW_VECT_C, }; @@ -107,12 +105,15 @@ class CudnnDataType { } }; -inline cudnnTensorFormat_t GetCudnnTensorFormat(const DataLayout& order) { +inline cudnnTensorFormat_t GetCudnnTensorFormat( + const DataLayout& order) { // Not use switch (order) { case DataLayout::kNHWC: return CUDNN_TENSOR_NHWC; case DataLayout::kNCHW: return CUDNN_TENSOR_NCHW; + case DataLayout::kNCDHW: + return CUDNN_TENSOR_NCHW; // TODO(chengduoZH) : add CUDNN_TENSOR_NCDHW default: PADDLE_THROW("Unknown cudnn equivalent for order"); } @@ -139,7 +140,7 @@ class ScopedTensorDescriptor { strides[i] = dims[i + 1] * strides[i + 1]; } // Update tensor descriptor dims setting if groups > 1 - // FIXME(typhoonzero): Assume using NCHW order + // FIXME(typhoonzero): Assume using NCHW or NCDHW order std::vector dims_with_group(dims.begin(), dims.end()); // copy if (groups > 1) { dims_with_group[1] = dims_with_group[1] / groups; From c359e39b59d76abfb795e5eaf7d36bfec17c2bb9 Mon Sep 17 00:00:00 2001 From: chengduoZH Date: Fri, 17 Nov 2017 16:54:32 +0800 Subject: [PATCH 4/4] add double type kernel --- paddle/operators/conv_op.cc | 12 ++++++++---- paddle/operators/conv_op.cu.cc | 12 ++++++++---- paddle/operators/conv_transpose_op.cc | 12 ++++++++---- paddle/operators/conv_transpose_op.cu.cc | 12 ++++++++---- 4 files changed, 32 insertions(+), 16 deletions(-) diff --git a/paddle/operators/conv_op.cc b/paddle/operators/conv_op.cc index 687d741cb22a0..7a36a9b21aa6a 100644 --- a/paddle/operators/conv_op.cc +++ b/paddle/operators/conv_op.cc @@ -225,11 +225,15 @@ REGISTER_OP(conv3d, ops::ConvOp, ops::Conv3DOpMaker, conv3d_grad, ops::ConvOpGrad); REGISTER_OP_CPU_KERNEL(conv2d, - ops::GemmConvKernel); + ops::GemmConvKernel, + ops::GemmConvKernel); REGISTER_OP_CPU_KERNEL( - conv2d_grad, ops::GemmConvGradKernel); + conv2d_grad, ops::GemmConvGradKernel, + ops::GemmConvGradKernel); REGISTER_OP_CPU_KERNEL(conv3d, - ops::GemmConvKernel); + ops::GemmConvKernel, + ops::GemmConvKernel); REGISTER_OP_CPU_KERNEL( - conv3d_grad, ops::GemmConvGradKernel); + conv3d_grad, ops::GemmConvGradKernel, + ops::GemmConvGradKernel); diff --git a/paddle/operators/conv_op.cu.cc b/paddle/operators/conv_op.cu.cc index 8e6f9da455b72..546451234a1ed 100644 --- a/paddle/operators/conv_op.cu.cc +++ b/paddle/operators/conv_op.cu.cc @@ -17,11 +17,15 @@ namespace ops = paddle::operators; REGISTER_OP_GPU_KERNEL(conv2d, - ops::GemmConvKernel); + ops::GemmConvKernel, + ops::GemmConvKernel); REGISTER_OP_GPU_KERNEL( - conv2d_grad, ops::GemmConvGradKernel); + conv2d_grad, ops::GemmConvGradKernel, + ops::GemmConvGradKernel); REGISTER_OP_GPU_KERNEL(conv3d, - ops::GemmConvKernel); + ops::GemmConvKernel, + ops::GemmConvKernel); REGISTER_OP_GPU_KERNEL( - conv3d_grad, ops::GemmConvGradKernel); + conv3d_grad, ops::GemmConvGradKernel, + ops::GemmConvGradKernel); diff --git a/paddle/operators/conv_transpose_op.cc b/paddle/operators/conv_transpose_op.cc index 310e3f5c937bd..3e55ef036a7fb 100644 --- a/paddle/operators/conv_transpose_op.cc +++ b/paddle/operators/conv_transpose_op.cc @@ -185,17 +185,21 @@ REGISTER_OP(conv2d_transpose, ops::ConvTransposeOp, ops::Conv2DTransposeOpMaker, REGISTER_OP_CPU_KERNEL( conv2d_transpose, - ops::GemmConvTransposeKernel); + ops::GemmConvTransposeKernel, + ops::GemmConvTransposeKernel); REGISTER_OP_CPU_KERNEL( conv2d_transpose_grad, - ops::GemmConvTransposeGradKernel); + ops::GemmConvTransposeGradKernel, + ops::GemmConvTransposeGradKernel); REGISTER_OP(conv3d_transpose, ops::ConvTransposeOp, ops::Conv3DTransposeOpMaker, conv3d_transpose_grad, ops::ConvTransposeOpGrad); REGISTER_OP_CPU_KERNEL( conv3d_transpose, - ops::GemmConvTransposeKernel); + ops::GemmConvTransposeKernel, + ops::GemmConvTransposeKernel); REGISTER_OP_CPU_KERNEL( conv3d_transpose_grad, - ops::GemmConvTransposeGradKernel); + ops::GemmConvTransposeGradKernel, + ops::GemmConvTransposeGradKernel); diff --git a/paddle/operators/conv_transpose_op.cu.cc b/paddle/operators/conv_transpose_op.cu.cc index 401cddb379ced..4165eb0c7b048 100644 --- a/paddle/operators/conv_transpose_op.cu.cc +++ b/paddle/operators/conv_transpose_op.cu.cc @@ -18,14 +18,18 @@ namespace ops = paddle::operators; REGISTER_OP_GPU_KERNEL( conv2d_transpose, - ops::GemmConvTransposeKernel); + ops::GemmConvTransposeKernel, + ops::GemmConvTransposeKernel); REGISTER_OP_GPU_KERNEL( conv2d_transpose_grad, - ops::GemmConvTransposeGradKernel); + ops::GemmConvTransposeGradKernel, + ops::GemmConvTransposeGradKernel); REGISTER_OP_GPU_KERNEL( conv3d_transpose, - ops::GemmConvTransposeKernel); + ops::GemmConvTransposeKernel, + ops::GemmConvTransposeKernel); REGISTER_OP_GPU_KERNEL( conv3d_transpose_grad, - ops::GemmConvTransposeGradKernel); + ops::GemmConvTransposeGradKernel, + ops::GemmConvTransposeGradKernel);