Merge pull request #375 from lfchener/fix

Change StaticRNN to fluid.layers.rnn
pull/389/head
Yibing Liu 5 years ago committed by GitHub
commit 36825f5d11
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -59,55 +59,62 @@ def conv_bn_layer(input, filter_size, num_channels_in, num_channels_out, stride,
return padding_reset return padding_reset
def simple_rnn(input, size, param_attr=None, bias_attr=None, is_reverse=False): class RNNCell(fluid.layers.RNNCell):
'''A simple rnn layer. """A simple rnn cell."""
:param input: input layer.
:type input: Variable def __init__(self,
:param size: Dimension of RNN cells. hidden_size,
:type size: int param_attr=None,
bias_attr=None,
hidden_activation=None,
activation=None,
dtype="float32",
name="RNNCell"):
"""Initialize simple rnn cell.
:param hidden_size: Dimension of RNN cells.
:type hidden_size: int
:param param_attr: Parameter properties of hidden layer weights that :param param_attr: Parameter properties of hidden layer weights that
can be learned can be learned
:type param_attr: ParamAttr :type param_attr: ParamAttr
:param bias_attr: Bias properties of hidden layer weights that can be learned :param bias_attr: Bias properties of hidden layer weights that can be learned
:type bias_attr: ParamAttr :type bias_attr: ParamAttr
:param is_reverse: Whether to calculate the inverse RNN :param hidden_activation: Activation for hidden cell
:type is_reverse: bool :type hidden_activation: Activation
:return: A simple RNN layer. :param activation: Activation for output
:rtype: Variable :type activation: Activation
''' :param name: Name of cell
if is_reverse: :type name: string
input = fluid.layers.sequence_reverse(x=input) """
pad_value = fluid.layers.assign(input=np.array([0.0], dtype=np.float32)) self.hidden_size = hidden_size
input, length = fluid.layers.sequence_pad(input, pad_value) self.param_attr = param_attr
rnn = fluid.layers.StaticRNN() self.bias_attr = bias_attr
input = fluid.layers.transpose(input, [1, 0, 2]) self.hidden_activation = hidden_activation
with rnn.step(): self.activation = activation or fluid.layers.brelu
in_ = rnn.step_input(input) self.name = name
mem = rnn.memory(shape=[-1, size], batch_ref=in_)
out = fluid.layers.fc( def call(self, inputs, states):
input=mem, new_hidden = fluid.layers.fc(
size=size, input=states,
act=None, size=self.hidden_size,
param_attr=param_attr, act=self.hidden_activation,
bias_attr=bias_attr) param_attr=self.param_attr,
out = fluid.layers.elementwise_add(out, in_) bias_attr=self.bias_attr)
out = fluid.layers.brelu(out) new_hidden = fluid.layers.elementwise_add(new_hidden, inputs)
rnn.update_memory(mem, out) new_hidden = self.activation(new_hidden)
rnn.output(out)
return new_hidden, new_hidden
out = rnn()
out = fluid.layers.transpose(out, [1, 0, 2]) @property
out = fluid.layers.sequence_unpad(x=out, length=length) def state_shape(self):
return [self.hidden_size]
if is_reverse:
out = fluid.layers.sequence_reverse(x=out)
return out
def bidirectional_simple_rnn_bn_layer(name, input, size, share_weights): def bidirectional_simple_rnn_bn_layer(name, input, size, share_weights):
"""Bidirectonal simple rnn layer with sequence-wise batch normalization. """Bidirectonal simple rnn layer with sequence-wise batch normalization.
The batch normalization is only performed on input-state weights. The batch normalization is only performed on input-state weights.
:param name: Name of the layer parameters. :param name: Name of the layer parameters.
:type name: string :type name: string
:param input: Input layer. :param input: Input layer.
@ -120,6 +127,20 @@ def bidirectional_simple_rnn_bn_layer(name, input, size, share_weights):
:return: Bidirectional simple rnn layer. :return: Bidirectional simple rnn layer.
:rtype: Variable :rtype: Variable
""" """
forward_cell = RNNCell(
hidden_size=size,
activation=fluid.layers.brelu,
param_attr=fluid.ParamAttr(name=name + '_forward_rnn_weight'),
bias_attr=fluid.ParamAttr(name=name + '_forward_rnn_bias'))
reverse_cell = RNNCell(
hidden_size=size,
activation=fluid.layers.brelu,
param_attr=fluid.ParamAttr(name=name + '_reverse_rnn_weight'),
bias_attr=fluid.ParamAttr(name=name + '_reverse_rnn_bias'))
pad_value = fluid.layers.assign(input=np.array([0.0], dtype=np.float32))
if share_weights: if share_weights:
#input-hidden weights shared between bi-directional rnn. #input-hidden weights shared between bi-directional rnn.
input_proj = fluid.layers.fc( input_proj = fluid.layers.fc(
@ -130,28 +151,14 @@ def bidirectional_simple_rnn_bn_layer(name, input, size, share_weights):
bias_attr=False) bias_attr=False)
# batch norm is only performed on input-state projection # batch norm is only performed on input-state projection
input_proj_bn = fluid.layers.batch_norm( input_proj_bn_forward = fluid.layers.batch_norm(
input=input_proj, input=input_proj,
act=None, act=None,
param_attr=fluid.ParamAttr(name=name + '_batch_norm_weight'), param_attr=fluid.ParamAttr(name=name + '_batch_norm_weight'),
bias_attr=fluid.ParamAttr(name=name + '_batch_norm_bias'), bias_attr=fluid.ParamAttr(name=name + '_batch_norm_bias'),
moving_mean_name=name + '_batch_norm_moving_mean', moving_mean_name=name + '_batch_norm_moving_mean',
moving_variance_name=name + '_batch_norm_moving_variance') moving_variance_name=name + '_batch_norm_moving_variance')
#forward and backword in time input_proj_bn_reverse = input_proj_bn_forward
forward_rnn = simple_rnn(
input=input_proj_bn,
size=size,
param_attr=fluid.ParamAttr(name=name + '_forward_rnn_weight'),
bias_attr=fluid.ParamAttr(name=name + '_forward_rnn_bias'),
is_reverse=False)
reverse_rnn = simple_rnn(
input=input_proj_bn,
size=size,
param_attr=fluid.ParamAttr(name=name + '_reverse_rnn_weight'),
bias_attr=fluid.ParamAttr(name=name + '_reverse_rnn_bias'),
is_reverse=True)
else: else:
input_proj_forward = fluid.layers.fc( input_proj_forward = fluid.layers.fc(
input=input, input=input,
@ -159,7 +166,7 @@ def bidirectional_simple_rnn_bn_layer(name, input, size, share_weights):
act=None, act=None,
param_attr=fluid.ParamAttr(name=name + '_forward_fc_weight'), param_attr=fluid.ParamAttr(name=name + '_forward_fc_weight'),
bias_attr=False) bias_attr=False)
input_proj_backward = fluid.layers.fc( input_proj_reverse = fluid.layers.fc(
input=input, input=input,
size=size, size=size,
act=None, act=None,
@ -174,8 +181,8 @@ def bidirectional_simple_rnn_bn_layer(name, input, size, share_weights):
bias_attr=fluid.ParamAttr(name=name + '_forward_batch_norm_bias'), bias_attr=fluid.ParamAttr(name=name + '_forward_batch_norm_bias'),
moving_mean_name=name + '_forward_batch_norm_moving_mean', moving_mean_name=name + '_forward_batch_norm_moving_mean',
moving_variance_name=name + '_forward_batch_norm_moving_variance') moving_variance_name=name + '_forward_batch_norm_moving_variance')
input_proj_bn_backward = fluid.layers.batch_norm( input_proj_bn_reverse = fluid.layers.batch_norm(
input=input_proj_backward, input=input_proj_reverse,
act=None, act=None,
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
name=name + '_reverse_batch_norm_weight'), name=name + '_reverse_batch_norm_weight'),
@ -183,18 +190,20 @@ def bidirectional_simple_rnn_bn_layer(name, input, size, share_weights):
moving_mean_name=name + '_reverse_batch_norm_moving_mean', moving_mean_name=name + '_reverse_batch_norm_moving_mean',
moving_variance_name=name + '_reverse_batch_norm_moving_variance') moving_variance_name=name + '_reverse_batch_norm_moving_variance')
# forward and backward in time # forward and backward in time
forward_rnn = simple_rnn( input, length = fluid.layers.sequence_pad(input_proj_bn_forward, pad_value)
input=input_proj_bn_forward, forward_rnn, _ = fluid.layers.rnn(
size=size, cell=forward_cell, inputs=input, time_major=False, is_reverse=False)
param_attr=fluid.ParamAttr(name=name + '_forward_rnn_weight'), forward_rnn = fluid.layers.sequence_unpad(x=forward_rnn, length=length)
bias_attr=fluid.ParamAttr(name=name + '_forward_rnn_bias'),
is_reverse=False) input, length = fluid.layers.sequence_pad(input_proj_bn_reverse, pad_value)
reverse_rnn = simple_rnn( reverse_rnn, _ = fluid.layers.rnn(
input=input_proj_bn_backward, cell=reverse_cell,
size=size, inputs=input,
param_attr=fluid.ParamAttr(name=name + '_reverse_rnn_weight'), sequence_length=length,
bias_attr=fluid.ParamAttr(name=name + '_reverse_rnn_bias'), time_major=False,
is_reverse=True) is_reverse=True)
reverse_rnn = fluid.layers.sequence_unpad(x=reverse_rnn, length=length)
out = fluid.layers.concat(input=[forward_rnn, reverse_rnn], axis=1) out = fluid.layers.concat(input=[forward_rnn, reverse_rnn], axis=1)
return out return out
@ -202,6 +211,7 @@ def bidirectional_simple_rnn_bn_layer(name, input, size, share_weights):
def bidirectional_gru_bn_layer(name, input, size, act): def bidirectional_gru_bn_layer(name, input, size, act):
"""Bidirectonal gru layer with sequence-wise batch normalization. """Bidirectonal gru layer with sequence-wise batch normalization.
The batch normalization is only performed on input-state weights. The batch normalization is only performed on input-state weights.
:param name: Name of the layer. :param name: Name of the layer.
:type name: string :type name: string
:param input: Input layer. :param input: Input layer.
@ -219,7 +229,7 @@ def bidirectional_gru_bn_layer(name, input, size, act):
act=None, act=None,
param_attr=fluid.ParamAttr(name=name + '_forward_fc_weight'), param_attr=fluid.ParamAttr(name=name + '_forward_fc_weight'),
bias_attr=False) bias_attr=False)
input_proj_backward = fluid.layers.fc( input_proj_reverse = fluid.layers.fc(
input=input, input=input,
size=size * 3, size=size * 3,
act=None, act=None,
@ -233,8 +243,8 @@ def bidirectional_gru_bn_layer(name, input, size, act):
bias_attr=fluid.ParamAttr(name=name + '_forward_batch_norm_bias'), bias_attr=fluid.ParamAttr(name=name + '_forward_batch_norm_bias'),
moving_mean_name=name + '_forward_batch_norm_moving_mean', moving_mean_name=name + '_forward_batch_norm_moving_mean',
moving_variance_name=name + '_forward_batch_norm_moving_variance') moving_variance_name=name + '_forward_batch_norm_moving_variance')
input_proj_bn_backward = fluid.layers.batch_norm( input_proj_bn_reverse = fluid.layers.batch_norm(
input=input_proj_backward, input=input_proj_reverse,
act=None, act=None,
param_attr=fluid.ParamAttr(name=name + '_reverse_batch_norm_weight'), param_attr=fluid.ParamAttr(name=name + '_reverse_batch_norm_weight'),
bias_attr=fluid.ParamAttr(name=name + '_reverse_batch_norm_bias'), bias_attr=fluid.ParamAttr(name=name + '_reverse_batch_norm_bias'),
@ -250,7 +260,7 @@ def bidirectional_gru_bn_layer(name, input, size, act):
bias_attr=fluid.ParamAttr(name=name + '_forward_gru_bias'), bias_attr=fluid.ParamAttr(name=name + '_forward_gru_bias'),
is_reverse=False) is_reverse=False)
reverse_gru = fluid.layers.dynamic_gru( reverse_gru = fluid.layers.dynamic_gru(
input=input_proj_bn_backward, input=input_proj_bn_reverse,
size=size, size=size,
gate_activation='sigmoid', gate_activation='sigmoid',
candidate_activation=act, candidate_activation=act,
@ -262,6 +272,7 @@ def bidirectional_gru_bn_layer(name, input, size, act):
def conv_group(input, num_stacks, seq_len_data, masks): def conv_group(input, num_stacks, seq_len_data, masks):
"""Convolution group with stacked convolution layers. """Convolution group with stacked convolution layers.
:param input: Input layer. :param input: Input layer.
:type input: Variable :type input: Variable
:param num_stacks: Number of stacked convolution layers. :param num_stacks: Number of stacked convolution layers.
@ -315,6 +326,7 @@ def conv_group(input, num_stacks, seq_len_data, masks):
def rnn_group(input, size, num_stacks, num_conv_layers, use_gru, def rnn_group(input, size, num_stacks, num_conv_layers, use_gru,
share_rnn_weights): share_rnn_weights):
"""RNN group with stacked bidirectional simple RNN or GRU layers. """RNN group with stacked bidirectional simple RNN or GRU layers.
:param input: Input layer. :param input: Input layer.
:type input: Variable :type input: Variable
:param size: Dimension of RNN cells in each layer. :param size: Dimension of RNN cells in each layer.
@ -359,6 +371,7 @@ def deep_speech_v2_network(audio_data,
use_gru=False, use_gru=False,
share_rnn_weights=True): share_rnn_weights=True):
"""The DeepSpeech2 network structure. """The DeepSpeech2 network structure.
:param audio_data: Audio spectrogram data layer. :param audio_data: Audio spectrogram data layer.
:type audio_data: Variable :type audio_data: Variable
:param text_data: Transcription text data layer. :param text_data: Transcription text data layer.

Loading…
Cancel
Save