diff --git a/paddle/operators/math/sequence_project.h b/paddle/operators/math/sequence_project.h index 3d8b5a2f397b5..1d799a0c1c297 100644 --- a/paddle/operators/math/sequence_project.h +++ b/paddle/operators/math/sequence_project.h @@ -90,8 +90,8 @@ template class SequenceProjectFunctor { public: void operator()(const platform::DeviceContext& context, - framework::LoDTensor& in, framework::LoDTensor& padding_data, - framework::LoDTensor& col, bool padding_trainable, + framework::LoDTensor& in, framework::Tensor& padding_data, + framework::Tensor& col, bool padding_trainable, int context_start, int context_length, int context_stride, int up_pad, int down_pad, bool gradient, bool input_grad, bool pad_grad) { diff --git a/paddle/operators/sequence_conv_op.cc b/paddle/operators/sequence_conv_op.cc index d286d334a24c6..463bca7a443b5 100644 --- a/paddle/operators/sequence_conv_op.cc +++ b/paddle/operators/sequence_conv_op.cc @@ -29,10 +29,6 @@ class SequenceConvOp : public framework::OperatorWithKernel { "Input(Filter) of SequenceConvOp should not be null."); PADDLE_ENFORCE(ctx->HasOutput("Out"), "Output(Out) of SequenceConvOp should not be null."); - // PaddingData mast be not empty. Otherwise(EnforceNotMet: enforce numel() > - // 0 failed, 0 <= 0) - PADDLE_ENFORCE(ctx->HasInput("PaddingData"), - "Input(PaddingData) of SequenceConvOp should not be null."); int context_length = ctx->Attrs().Get("context_length"); bool padding_trainable = ctx->Attrs().Get("padding_trainable"); @@ -48,6 +44,9 @@ class SequenceConvOp : public framework::OperatorWithKernel { "number_of_input_features)."); if (padding_trainable) { + PADDLE_ENFORCE( + ctx->HasInput("PaddingData"), + "Input(PaddingData) of SequenceConvOp should not be null."); framework::DDim padding_dim = ctx->GetInputDim("PaddingData"); int up_pad = std::max(0, -context_start); int down_pad = std::max(0, context_start + context_length - 1); @@ -106,11 +105,12 @@ class SequenceConvOpMaker : public framework::OpProtoAndCheckerMaker { "(A float LoDTensor) the input of SequenceConvOp, a vector of " "2-D matrix of size (minibatch, number_of_input_features)."); AddInput("PaddingData", - "(A float LoDTensor) the input of SequenceConvOp, a vector of " + "(Tensor) the input of SequenceConvOp, a vector of " "2-D matrix of size (up_pad + down_pad, " - "number_of_input_features). "); + "number_of_input_features). ") + .AsDispensable(); AddInput("Filter", - "(A float LoDTensor) the input of SequenceConvOp, a vector of " + "(Tensor) the input of SequenceConvOp, a vector of " "2-D matrix of size (context_length x number_of_input_features)."); AddOutput("Out", "(A float LoDTensor) the output of SequenceConvOp, a vector " diff --git a/paddle/operators/sequence_conv_op.h b/paddle/operators/sequence_conv_op.h index 3525bb752b416..6907c011a04c1 100644 --- a/paddle/operators/sequence_conv_op.h +++ b/paddle/operators/sequence_conv_op.h @@ -36,7 +36,7 @@ class SequenceConvKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& context) const override { auto* in = context.Input("X"); auto* out = context.Output("Out"); - auto filter = *context.Input("Filter"); + auto filter = *context.Input("Filter"); out->mutable_data(context.GetPlace()); // out->set_lod(in->lod()); @@ -50,9 +50,9 @@ class SequenceConvKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ(in->lod().size(), 1UL, "Only support one level sequence now."); - const LoDTensor* padding_data = nullptr; + const Tensor* padding_data = nullptr; if (padding_trainable) { - padding_data = context.Input("PaddingData"); + padding_data = context.Input("PaddingData"); } int up_pad = std::max(0, -context_start); @@ -63,7 +63,7 @@ class SequenceConvKernel : public framework::OpKernel { // use col_shape in the im2col calculation framework::DDim col_shape = {in->dims()[0], sequence_width * context_length}; - LoDTensor col; + Tensor col; col.mutable_data(col_shape, context.GetPlace()); // Because if padding_trainable is false, padding data should be zeros. auto temp = framework::EigenVector::Flatten(col); @@ -73,7 +73,7 @@ class SequenceConvKernel : public framework::OpKernel { paddle::operators::math::SequenceProjectFunctor seq_project_functor; LoDTensor* input = const_cast(in); - LoDTensor* pad_data = const_cast(padding_data); + Tensor* pad_data = const_cast(padding_data); seq_project_functor(context.device_context(), *input, *pad_data, col, padding_trainable, context_start, context_length, @@ -91,12 +91,11 @@ class SequenceConvGradKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& context) const override { auto* out_g = context.Input(framework::GradVarName("Out")); auto* in_g = context.Output(framework::GradVarName("X")); - auto* filter_g = - context.Output(framework::GradVarName("Filter")); + auto* filter_g = context.Output(framework::GradVarName("Filter")); auto* padding_data_g = - context.Output(framework::GradVarName("PaddingData")); + context.Output(framework::GradVarName("PaddingData")); auto* in = context.Input("X"); - auto* filter = context.Input("Filter"); + auto* filter = context.Input("Filter"); int context_start = context.Attr("context_start"); int context_length = context.Attr("context_length"); @@ -115,7 +114,7 @@ class SequenceConvGradKernel : public framework::OpKernel { // use col_shape in the im2col calculation framework::DDim col_shape = {in->dims()[0], sequence_width * context_length}; - LoDTensor col; + Tensor col; if (in_g || filter_g || (padding_trainable && padding_data_g)) { col.mutable_data(col_shape, context.GetPlace()); @@ -161,17 +160,17 @@ class SequenceConvGradKernel : public framework::OpKernel { functor(context.device_context(), filter_g, 0); Tensor filter_grad_ = *filter_g; - Tensor out_grad_ = *out_g; + LoDTensor out_grad_ = *out_g; - const LoDTensor* padding_data = nullptr; + const Tensor* padding_data = nullptr; if (padding_trainable) { - padding_data = context.Input("PaddingData"); + padding_data = context.Input("PaddingData"); } sequence_width = static_cast(in->dims()[1]); LoDTensor* input = const_cast(in); - LoDTensor* pad_data = const_cast(padding_data); + Tensor* pad_data = const_cast(padding_data); seq_project_functor(context.device_context(), *input, *pad_data, col, padding_trainable, context_start, context_length, diff --git a/python/paddle/v2/framework/tests/test_seq_conv.py b/python/paddle/v2/framework/tests/test_seq_conv.py index 2064c1cb11176..b7b3c0811c809 100644 --- a/python/paddle/v2/framework/tests/test_seq_conv.py +++ b/python/paddle/v2/framework/tests/test_seq_conv.py @@ -20,24 +20,29 @@ def setUp(self): # one level, batch size x = np.random.uniform(0.1, 1, [self.input_size[0], self.input_size[1]]).astype('float32') - - self.begin_pad = np.max([0, -self.context_start]) - self.end_pad = np.max([0, self.context_start + self.context_length - 1]) - self.total_pad = self.begin_pad + self.end_pad - if self.total_pad == 0: - self.total_pad = 1 - - # PaddingData mast be not empty. - # Otherwise(EnforceNotMet: enforce numel() > 0 failed, 0 <= 0) - padding_data = np.random.uniform( - 0.1, 1, [self.total_pad, self.input_size[1]]).astype('float32') w = np.random.uniform( 0.1, 1, [self.context_length, self.input_size[1]]).astype('float32') + + begin_pad = np.max([0, -self.context_start]) + end_pad = np.max([0, self.context_start + self.context_length - 1]) + total_pad = begin_pad + end_pad + padding_data = np.random.uniform( + 0.1, 1, [total_pad, self.input_size[1]]).astype('float32') + self.pad_data = padding_data self.inputs = { 'X': (x, self.lod), - 'PaddingData': (padding_data, [[0, self.total_pad]]), - 'Filter': (w, [[0, self.context_length]]) + 'Filter': w, } + self.inputs_val = ['X', 'Filter'] + self.inputs_val_no_x = ['Filter'] + self.inputs_val_no_f = ['X'] + + if total_pad != 0: + self.inputs['PaddingData'] = padding_data + self.inputs_val = ['X', 'PaddingData', 'Filter'] + self.inputs_val_no_x = ['PaddingData', 'Filter'] + self.inputs_val_no_f = ['PaddingData', 'X'] + self.attrs = { 'context_start': self.context_start, 'context_length': self.context_length, @@ -51,7 +56,7 @@ def setUp(self): def compute(self): x, lod = self.inputs['X'] filter = self.inputs['Filter'] - pading_data, _ = self.inputs['PaddingData'] + pading_data = self.pad_data out = np.zeros((self.input_size[0], self.context_length * self.input_size[1])).astype('float32') lod = lod[0] @@ -90,12 +95,12 @@ def compute(self): out[out_begin:out_end, j * self.input_size[1]:(j + 1) * self.input_size[1]] += in_sub - filter_dim = filter[0].shape + filter_dim = filter.shape output_dim = self.outputs['Out'].shape - filter[0].shape = filter_dim[0] * filter_dim[1] + filter.shape = filter_dim[0] * filter_dim[1] self.outputs['Out'].shape = (output_dim[0], ) - np.dot(out, filter[0], out=self.outputs['Out']) - filter[0].shape = filter_dim + np.dot(out, filter, out=self.outputs['Out']) + filter.shape = filter_dim self.outputs['Out'].shape = output_dim def test_check_output(self): @@ -104,16 +109,14 @@ def test_check_output(self): def test_check_grad(self): if self.padding_trainable: self.check_grad( - set(['X', 'PaddingData', 'Filter']), - 'Out', - max_relative_error=0.05) + set(self.inputs_val), 'Out', max_relative_error=0.05) def test_check_grad_input(self): self.check_grad( ['X'], 'Out', max_relative_error=0.05, - no_grad_set=set(['PaddingData', 'Filter'])) + no_grad_set=set(self.inputs_val_no_x)) def test_check_grad_padding_data(self): if self.padding_trainable: @@ -128,19 +131,20 @@ def test_check_grad_Filter(self): ['Filter'], 'Out', max_relative_error=0.05, - no_grad_set=set(['X', 'PaddingData'])) + no_grad_set=set(self.inputs_val_no_f)) def test_check_grad_input_filter(self): - self.check_grad( - ['X', 'Filter'], - 'Out', - max_relative_error=0.05, - no_grad_set=set(['PaddingData'])) + if self.padding_trainable: + self.check_grad( + ['X', 'Filter'], + 'Out', + max_relative_error=0.05, + no_grad_set=set(['PaddingData'])) def test_check_grad_padding_input(self): if self.padding_trainable: self.check_grad( - ['X', 'PaddingData'], + self.inputs_val_no_f, 'Out', max_relative_error=0.05, no_grad_set=set(['Filter'])) @@ -148,7 +152,7 @@ def test_check_grad_padding_input(self): def test_check_grad_padding_filter(self): if self.padding_trainable: self.check_grad( - ['PaddingData', 'Filter'], + self.inputs_val_no_x, 'Out', max_relative_error=0.05, no_grad_set=set(['X'])) @@ -191,69 +195,5 @@ def init_test_case(self): [self.input_size[0]]] -''' -class TestSeqProjectCases(TestSeqProject): - def setUp(self): - self.init_test_case() - self.op_type = 'sequence_project' - - num = 0 - for context_start in [-5, -3, -1, 0, 3]: - for context_length in [1, 2, 5, 7]: - for batch_size in [1, 2, 5, 7]: - for padding_trainable in [False, True]: - - if context_length == 1 and context_start == 0 and padding_trainable: - continue - - self.context_start = context_start - self.context_length = context_length - self.padding_trainable = padding_trainable - self.input_size = [batch_size, 23] - x = np.random.uniform(0.1, 1, - self.input_size).astype('float32') - self.lod = [[0, self.input_size[0]]] - if self.input_size[0] > 2: - idx = range(self.input_size[0]) - del idx[0] - self.lod = [ - [0] + np.sort(random.sample(idx, 2)).tolist() + - [self.input_size[0]] - ] - - self.begin_pad = np.max([0, -self.context_start]) - self.end_pad = np.max([0, self.context_start + self.context_length - 1]) - self.total_pad = self.begin_pad + self.end_pad - if self.total_pad == 0: - self.total_pad = 1 - # PaddingData mast be not empty. Otherwise(EnforceNotMet: enforce numel() > 0 failed, 0 <= 0) - padding_data = np.random.uniform( - 0.1, 1, [self.total_pad, self.input_size[1]]).astype('float32') - - self.inputs = { - 'X': (x, self.lod), - 'PaddingData': (padding_data, [[0, self.total_pad]]) - } - self.attrs = { - 'context_start': self.context_start, - 'context_length': self.context_length, - 'padding_trainable': self.padding_trainable, - 'context_stride': self.context_stride - } - out = np.zeros((self.input_size[0], self.input_size[1] * - self.context_length)).astype('float32') - self.outputs = {'Out': out} - print num - print self.attrs - print batch_size - print padding_trainable - print "$$$$$$$$$$$$$" - - self.compute() - self.test_check_output() - - num += 1 -''' - if __name__ == '__main__': unittest.main()