Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add unified RNN APIs #26588

Merged
merged 23 commits into from
Aug 27, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
288 changes: 223 additions & 65 deletions python/paddle/fluid/layers/rnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
'Decoder',
'BeamSearchDecoder',
'rnn',
'birnn',
'dynamic_decode',
'DecodeHelper',
'TrainingHelper',
Expand Down Expand Up @@ -438,92 +439,157 @@ def rnn(cell,
is_reverse=False,
**kwargs):
"""
:api_attr: Static Graph

rnn creates a recurrent neural network specified by RNNCell `cell`,
which performs :code:`cell.call()` repeatedly until reaches to the maximum
length of `inputs`.

Parameters:
cell(RNNCell): An instance of `RNNCell`.
inputs(Variable): A (possibly nested structure of) tensor variable[s].
The shape of tensor should be `[batch_size, sequence_length, ...]`
for `time_major == False` or `[sequence_length, batch_size, ...]`
for `time_major == True`. It represents the inputs to be unrolled
in RNN.
initial_states(Variable, optional): A (possibly nested structure of)
tensor variable[s], representing the initial state for RNN.
If not provided, `cell.get_initial_states` would be used to produce
the initial state. Default None.
sequence_length(Variable, optional): A tensor with shape `[batch_size]`.
It stores real length of each instance, thus enables users to extract
the last valid state when past a batch element's sequence length for
correctness. If not provided, the paddings would be treated same as
non-padding inputs. Default None.
time_major(bool, optional): Indicate the data layout of Tensor included
in `input` and `output` tensors. If `False`, the data layout would
be batch major with shape `[batch_size, sequence_length, ...]`. If
`True`, the data layout would be time major with shape
`[sequence_length, batch_size, ...]`. Default: `False`.
is_reverse(bool, optional): Indicate whether to calculate in the reverse
order of input sequences. Default: `False`.
**kwargs: Additional keyword arguments. Arguments passed to `cell.call`.
which performs :code:`cell.call()` (for dygraph mode :code:`cell.forward`)
repeatedly until reaches to the maximum length of `inputs`.

Arguments:
cell(RNNCellBase): An instance of `RNNCellBase`.
inputs(Tensor): the input sequences.
If time_major is True, the shape is
`[time_steps, batch_size, input_size]`
else the shape is `[batch_size, time_steps, input_size]`.
initial_states(Tensor|tuple|list, optional): the initial state of the
rnn cell. Tensor or a possibly nested structure of tensors. If not
provided, `cell.get_initial_states` would be called to produce
the initial state. Defaults to None.
sequence_length (Tensor, optional): shape `[batch_size]`, dtype: int64
or int32. The valid lengths of input sequences. Defaults to None.
If `sequence_length` is not None, the inputs are treated as
padded sequences. In each input sequence, elements whose time step
index are not less than the valid length are treated as paddings.
time_major (bool): Whether the first dimension of the input means the
time steps. Defaults to False.
is_reverse (bool, optional): Indicate whether to calculate in the reverse
order of input sequences. Defaults to False.
**kwargs: Additional keyword arguments to pass to `forward` of the cell.

Returns:
tuple: A tuple( :code:`(final_outputs, final_states)` ) including the final \
outputs and states, both are Tensor or nested structure of Tensor. \
`final_outputs` has the same structure and data types as \
the returned `outputs` of :code:`cell.call` , and each Tenser in `final_outputs` \
stacks all time steps' counterpart in `outputs` thus has shape `[batch_size, sequence_length, ...]` \
for `time_major == False` or `[sequence_length, batch_size, ...]` for `time_major == True`. \
`final_states` is the counterpart at last time step of initial states, \
thus has the same structure with it and has tensors with same shapes \
and data types.
(outputs, final_states)
outputs (Tensor|list|tuple): the output sequence. Tensor or nested
structure of Tensors.
If `time_major` is True, the shape of each tensor in outpus is
`[time_steps, batch_size, hidden_size]`, else
`[batch_size, time_steps, hidden_size]`.
final_states (Tensor|list|tuple): final states. A (possibly nested structure of)
tensor[s], representing the final state for RNN. It has the same
structure of intial state. Each tensor in final states has the same
shape and dtype as the corresponding tensor in initial states.


Examples:

.. code-block:: python

import paddle.fluid as fluid

inputs = fluid.data(name="inputs",
shape=[-1, 32, 128],
dtype="float32")
cell = fluid.layers.GRUCell(hidden_size=128)
outputs = fluid.layers.rnn(cell=cell, inputs=inputs)
import paddle
paddle.disable_static()

cell = paddle.nn.SimpleRNNCell(16, 32)

inputs = paddle.rand((4, 23, 16))
prev_h = paddle.randn((4, 32))
outputs, final_states = paddle.nn.functional.rnn(cell, inputs, prev_h)

"""
if in_dygraph_mode():
return _rnn_dynamic_graph(cell, inputs, initial_states, sequence_length,
time_major, is_reverse, **kwargs)
else:
return _rnn_static_graph(cell, inputs, initial_states, sequence_length,
time_major, is_reverse, **kwargs)


class ArrayWrapper(object):
def __init__(self, x):
self.array = [x]

def append(self, x):
self.array.append(x)
return self


def _maybe_copy(state, new_state, step_mask):
"""update rnn state or just pass the old state through"""
new_state = nn.elementwise_mul(new_state, step_mask, axis=0) \
+ nn.elementwise_mul(state, (1 - step_mask), axis=0)
return new_state


def _transpose_batch_time(x):
perm = [1, 0] + list(range(2, len(x.shape)))
return nn.transpose(x, perm)


def _rnn_dynamic_graph(cell,
inputs,
initial_states=None,
sequence_length=None,
time_major=False,
is_reverse=False,
**kwargs):
time_step_index = 0 if time_major else 1
flat_inputs = flatten(inputs)
time_steps = flat_inputs[0].shape[time_step_index]

if not time_major:
inputs = map_structure(_transpose_batch_time, inputs)

if sequence_length is not None:
mask = sequence_lod.sequence_mask(
sequence_length, maxlen=time_steps, dtype=inputs.dtype)
mask = nn.transpose(mask, [1, 0])

if is_reverse:
inputs = map_structure(lambda x: tensor.reverse(x, axis=[0]), inputs)
mask = tensor.reverse(mask, axis=[0]) \
if sequence_length is not None else None

states = initial_states
outputs = []
for i in range(time_steps):
step_inputs = map_structure(lambda x: x[i], inputs)
step_outputs, new_states = cell(step_inputs, states, **kwargs)
if sequence_length is not None:
new_states = map_structure(
partial(
_maybe_copy, step_mask=mask[i]), states, new_states)
states = new_states
outputs = map_structure(lambda x: ArrayWrapper(x),
step_outputs) if i == 0 else map_structure(
lambda x, x_array: x_array.append(x),
step_outputs, outputs)

final_outputs = map_structure(
lambda x: nn.stack(x.array, axis=time_step_index),
outputs)

if is_reverse:
final_outputs = map_structure(
lambda x: tensor.reverse(x, axis=time_step_index),
final_outputs)

final_states = new_states
return final_outputs, final_states


def _rnn_static_graph(cell,
inputs,
initial_states=None,
sequence_length=None,
time_major=False,
is_reverse=False,
**kwargs):
check_type(inputs, 'inputs', (Variable, list, tuple), 'rnn')
if isinstance(inputs, (list, tuple)):
for i, input_x in enumerate(inputs):
check_variable_and_dtype(input_x, 'inputs[' + str(i) + ']',
['float32', 'float64'], 'rnn')
check_type(initial_states, 'initial_states',
(Variable, list, tuple, type(None)), 'rnn')
if isinstance(initial_states, (list, tuple)):
states = map_structure(lambda x: x, initial_states)[0]
for i, state in enumerate(states):
if isinstance(state, (list, tuple)):
for j, state_j in enumerate(state):
check_variable_and_dtype(state_j, 'state_j[' + str(j) + ']',
['float32', 'float64'], 'rnn')
else:
check_variable_and_dtype(state, 'states[' + str(i) + ']',
['float32', 'float64'], 'rnn')

check_type(sequence_length, 'sequence_length', (Variable, type(None)),
'rnn')

def _maybe_copy(state, new_state, step_mask):
# TODO: use where_op
new_state = nn.elementwise_mul(
new_state, step_mask, axis=0) - nn.elementwise_mul(
state, (step_mask - 1), axis=0)
return new_state

def _transpose_batch_time(x):
return nn.transpose(x, [1, 0] + list(range(2, len(x.shape))))

def _switch_grad(x, stop=False):
x.stop_gradient = stop
return x
Expand Down Expand Up @@ -582,6 +648,98 @@ def _switch_grad(x, stop=False):
return (final_outputs, final_states)


def birnn(cell_fw,
cell_bw,
inputs,
initial_states=None,
sequence_length=None,
time_major=False,
**kwargs):
"""
birnn creates a bidirectional recurrent neural network specified by
RNNCell `cell_fw` and `cell_bw`, which performs :code:`cell.call()`
(for dygraph mode :code:`cell.forward`) repeatedly until reaches to
the maximum length of `inputs` and then concat the ouputs for both RNNs
along the last axis.

Arguments:
cell_fw(RNNCellBase): An instance of `RNNCellBase`.
cell_bw(RNNCellBase): An instance of `RNNCellBase`.
inputs(Tensor): the input sequences.
If time_major is True, the shape is
`[time_steps, batch_size, input_size]`
else the shape is `[batch_size, time_steps, input_size]`.
initial_states(tuple, optional): A tuple of initial states of
`cell_fw` and `cell_bw`.
If not provided, `cell.get_initial_states` would be called to
produce initial state for each cell. Defaults to None.
sequence_length (Tensor, optional): shape `[batch_size]`, dtype: int64
or int32. The valid lengths of input sequences. Defaults to None.
If `sequence_length` is not None, the inputs are treated as
padded sequences. In each input sequence, elements whose time step
index are not less than the valid length are treated as paddings.
time_major (bool): Whether the first dimension of the input means the
time steps. Defaults to False.
**kwargs: Additional keyword arguments to pass to `forward` of each cell.

Returns:
(outputs, final_states)
outputs (Tensor): the outputs of the bidirectional RNN. It is the
concatenation of the outputs from the forward RNN and backward
RNN along the last axis.
If time major is True, the shape is `[time_steps, batch_size, size]`,
else the shape is `[batch_size, time_steps, size]`, where size is
`cell_fw.hidden_size + cell_bw.hidden_size`.
final_states (tuple): A tuple of the final states of the forward
cell and backward cell.

Examples:

.. code-block:: python

import paddle
paddle.disable_static()

cell_fw = paddle.nn.LSTMCell(16, 32)
cell_bw = paddle.nn.LSTMCell(16, 32)

inputs = paddle.rand((4, 23, 16))
hf, cf = paddle.rand((4, 32)), paddle.rand((4, 32))
hb, cb = paddle.rand((4, 32)), paddle.rand((4, 32))
initial_states = ((hf, cf), (hb, cb))
outputs, final_states = paddle.nn.functional.birnn(
cell_fw, cell_bw, inputs, initial_states)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

1、示例输入一般不要用随机生成,最好是具体的例子
2、注释一下具体输出内容

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

不用随机生成也无法保证输出。

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

所以含参 Layer 相关的一律不写输入输出

"""
if initial_states is None:
states_fw = cell_fw.get_initial_states(
batch_ref=inputs, batch_dim_idx=1 if time_major else 0)
states_bw = cell_fw.get_initial_states(
batch_ref=inputs, batch_dim_idx=1 if time_major else 0)
else:
states_fw, states_bw = initial_states
outputs_fw, states_fw = rnn(cell_fw,
inputs,
states_fw,
sequence_length,
time_major=time_major,
**kwargs)

outputs_bw, states_bw = rnn(cell_bw,
inputs,
states_bw,
sequence_length,
time_major=time_major,
is_reverse=True,
**kwargs)

outputs = map_structure(lambda x, y: tensor.concat([x, y], -1), outputs_fw,
outputs_bw)

final_states = (states_fw, states_bw)
return outputs, final_states


class Decoder(object):
"""
:api_attr: Static Graph
Expand Down
1 change: 1 addition & 0 deletions python/paddle/fluid/tests/unittests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -542,6 +542,7 @@ endif()

add_subdirectory(sequence)
add_subdirectory(dygraph_to_static)
add_subdirectory(rnn)

if (WITH_MKLDNN)
add_subdirectory(mkldnn)
Expand Down
6 changes: 6 additions & 0 deletions python/paddle/fluid/tests/unittests/rnn/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py")
string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}")

foreach(TEST_OP ${TEST_OPS})
py_test_modules(${TEST_OP} MODULES ${TEST_OP})
endforeach(TEST_OP)
13 changes: 13 additions & 0 deletions python/paddle/fluid/tests/unittests/rnn/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
Loading