From b86bff118e2888a5372b010b5f074256ab39dc9d Mon Sep 17 00:00:00 2001 From: lfchener Date: Mon, 28 Oct 2019 11:56:49 +0000 Subject: [PATCH 1/3] Change StaticRNN to fluid.layers.rnn --- model_utils/network.py | 143 +++++++++++++++++++++++++---------------- 1 file changed, 86 insertions(+), 57 deletions(-) diff --git a/model_utils/network.py b/model_utils/network.py index 3a4f1dc3..dbbf75e7 100644 --- a/model_utils/network.py +++ b/model_utils/network.py @@ -59,50 +59,53 @@ def conv_bn_layer(input, filter_size, num_channels_in, num_channels_out, stride, return padding_reset -def simple_rnn(input, size, param_attr=None, bias_attr=None, is_reverse=False): - '''A simple rnn layer. - :param input: input layer. - :type input: Variable - :param size: Dimension of RNN cells. - :type size: int +class RNNCell(fluid.layers.RNNCell): + '''A simple rnn cell. + :param hidden_size: Dimension of RNN cells. + :type hidden_size: int :param param_attr: Parameter properties of hidden layer weights that can be learned :type param_attr: ParamAttr :param bias_attr: Bias properties of hidden layer weights that can be learned :type bias_attr: ParamAttr - :param is_reverse: Whether to calculate the inverse RNN - :type is_reverse: bool - :return: A simple RNN layer. - :rtype: Variable + :param hidden_activation: Activation for hidden cell + :type hidden_activation: Activation + :param activation: Activation for output + :type activation: Activation + :param name: Name of cell + :type name: string ''' - if is_reverse: - input = fluid.layers.sequence_reverse(x=input) - - pad_value = fluid.layers.assign(input=np.array([0.0], dtype=np.float32)) - input, length = fluid.layers.sequence_pad(input, pad_value) - rnn = fluid.layers.StaticRNN() - input = fluid.layers.transpose(input, [1, 0, 2]) - with rnn.step(): - in_ = rnn.step_input(input) - mem = rnn.memory(shape=[-1, size], batch_ref=in_) - out = fluid.layers.fc( - input=mem, - size=size, - act=None, - param_attr=param_attr, - bias_attr=bias_attr) - out = fluid.layers.elementwise_add(out, in_) - out = fluid.layers.brelu(out) - rnn.update_memory(mem, out) - rnn.output(out) - - out = rnn() - out = fluid.layers.transpose(out, [1, 0, 2]) - out = fluid.layers.sequence_unpad(x=out, length=length) - - if is_reverse: - out = fluid.layers.sequence_reverse(x=out) - return out + + def __init__(self, + hidden_size, + param_attr=None, + bias_attr=None, + hidden_activation=None, + activation=None, + dtype="float32", + name="RNNCell"): + self.hidden_size = hidden_size + self.param_attr = param_attr + self.bias_attr = bias_attr + self.hidden_activation = hidden_activation + self.activation = activation or fluid.layers.brelu + self.name = name + + def call(self, inputs, states): + new_hidden = fluid.layers.fc( + input=states, + size=self.hidden_size, + act=self.hidden_activation, + param_attr=self.param_attr, + bias_attr=self.bias_attr) + new_hidden = fluid.layers.elementwise_add(new_hidden, inputs) + new_hidden = self.activation(new_hidden) + + return new_hidden, new_hidden + + @property + def state_shape(self): + return [self.hidden_size] def bidirectional_simple_rnn_bn_layer(name, input, size, share_weights): @@ -137,20 +140,32 @@ def bidirectional_simple_rnn_bn_layer(name, input, size, share_weights): bias_attr=fluid.ParamAttr(name=name + '_batch_norm_bias'), moving_mean_name=name + '_batch_norm_moving_mean', moving_variance_name=name + '_batch_norm_moving_variance') - #forward and backword in time - forward_rnn = simple_rnn( - input=input_proj_bn, - size=size, + #forward and backword in time + forward_cell = RNNCell( + hidden_size=size, + activation=fluid.layers.brelu, param_attr=fluid.ParamAttr(name=name + '_forward_rnn_weight'), - bias_attr=fluid.ParamAttr(name=name + '_forward_rnn_bias'), - is_reverse=False) + bias_attr=fluid.ParamAttr(name=name + '_forward_rnn_bias')) - reverse_rnn = simple_rnn( - input=input_proj_bn, - size=size, + pad_value = fluid.layers.assign(input=np.array([0.0], dtype=np.float32)) + input, length = fluid.layers.sequence_pad(input_proj_bn, pad_value) + forward_rnn, _ = fluid.layers.rnn( + cell=forward_cell, inputs=input, time_major=False, is_reverse=False) + forward_rnn = fluid.layers.sequence_unpad(x=forward_rnn, length=length) + + reverse_cell = RNNCell( + hidden_size=size, + activation=fluid.layers.brelu, param_attr=fluid.ParamAttr(name=name + '_reverse_rnn_weight'), - bias_attr=fluid.ParamAttr(name=name + '_reverse_rnn_bias'), + bias_attr=fluid.ParamAttr(name=name + '_reverse_rnn_bias')) + input, length = fluid.layers.sequence_pad(input_proj_bn, pad_value) + reverse_rnn, _ = fluid.layers.rnn( + cell=reverse_cell, + inputs=input, + sequence_length=length, + time_major=False, is_reverse=True) + reverse_rnn = fluid.layers.sequence_unpad(x=reverse_rnn, length=length) else: input_proj_forward = fluid.layers.fc( @@ -183,18 +198,32 @@ def bidirectional_simple_rnn_bn_layer(name, input, size, share_weights): moving_mean_name=name + '_reverse_batch_norm_moving_mean', moving_variance_name=name + '_reverse_batch_norm_moving_variance') # forward and backward in time - forward_rnn = simple_rnn( - input=input_proj_bn_forward, - size=size, + forward_cell = RNNCell( + hidden_size=size, + activation=fluid.layers.brelu, param_attr=fluid.ParamAttr(name=name + '_forward_rnn_weight'), - bias_attr=fluid.ParamAttr(name=name + '_forward_rnn_bias'), - is_reverse=False) - reverse_rnn = simple_rnn( - input=input_proj_bn_backward, - size=size, + bias_attr=fluid.ParamAttr(name=name + '_forward_rnn_bias')) + + pad_value = fluid.layers.assign(input=np.array([0.0], dtype=np.float32)) + input, length = fluid.layers.sequence_pad(input_proj_bn, pad_value) + forward_rnn, _ = fluid.layers.rnn( + cell=forward_cell, inputs=input, time_major=False, is_reverse=False) + forward_rnn = fluid.layers.sequence_unpad(x=forward_rnn, length=length) + + reverse_cell = RNNCell( + hidden_size=size, + activation=fluid.layers.brelu, param_attr=fluid.ParamAttr(name=name + '_reverse_rnn_weight'), - bias_attr=fluid.ParamAttr(name=name + '_reverse_rnn_bias'), + bias_attr=fluid.ParamAttr(name=name + '_reverse_rnn_bias')) + input, length = fluid.layers.sequence_pad(input_proj_bn, pad_value) + reverse_rnn, _ = fluid.layers.rnn( + cell=reverse_cell, + inputs=input, + sequence_length=length, + time_major=False, is_reverse=True) + reverse_rnn = fluid.layers.sequence_unpad(x=reverse_rnn, length=length) + out = fluid.layers.concat(input=[forward_rnn, reverse_rnn], axis=1) return out From 8172681b556c96eb40f822a14fba342d026197fd Mon Sep 17 00:00:00 2001 From: lfchener Date: Tue, 29 Oct 2019 06:50:28 +0000 Subject: [PATCH 2/3] Change StaticRNN to fluid.layers.rnn. --- model_utils/network.py | 90 +++++++++++++++++++----------------------- 1 file changed, 41 insertions(+), 49 deletions(-) diff --git a/model_utils/network.py b/model_utils/network.py index dbbf75e7..0749dc5d 100644 --- a/model_utils/network.py +++ b/model_utils/network.py @@ -60,22 +60,6 @@ def conv_bn_layer(input, filter_size, num_channels_in, num_channels_out, stride, class RNNCell(fluid.layers.RNNCell): - '''A simple rnn cell. - :param hidden_size: Dimension of RNN cells. - :type hidden_size: int - :param param_attr: Parameter properties of hidden layer weights that - can be learned - :type param_attr: ParamAttr - :param bias_attr: Bias properties of hidden layer weights that can be learned - :type bias_attr: ParamAttr - :param hidden_activation: Activation for hidden cell - :type hidden_activation: Activation - :param activation: Activation for output - :type activation: Activation - :param name: Name of cell - :type name: string - ''' - def __init__(self, hidden_size, param_attr=None, @@ -84,6 +68,22 @@ class RNNCell(fluid.layers.RNNCell): activation=None, dtype="float32", name="RNNCell"): + '''A simple rnn cell. + :param hidden_size: Dimension of RNN cells. + :type hidden_size: int + :param param_attr: Parameter properties of hidden layer weights that + can be learned + :type param_attr: ParamAttr + :param bias_attr: Bias properties of hidden layer weights that can be learned + :type bias_attr: ParamAttr + :param hidden_activation: Activation for hidden cell + :type hidden_activation: Activation + :param activation: Activation for output + :type activation: Activation + :param name: Name of cell + :type name: string + ''' + self.hidden_size = hidden_size self.param_attr = param_attr self.bias_attr = bias_attr @@ -123,6 +123,20 @@ def bidirectional_simple_rnn_bn_layer(name, input, size, share_weights): :return: Bidirectional simple rnn layer. :rtype: Variable """ + forward_cell = RNNCell( + hidden_size=size, + activation=fluid.layers.brelu, + param_attr=fluid.ParamAttr(name=name + '_forward_rnn_weight'), + bias_attr=fluid.ParamAttr(name=name + '_forward_rnn_bias')) + + reverse_cell = RNNCell( + hidden_size=size, + activation=fluid.layers.brelu, + param_attr=fluid.ParamAttr(name=name + '_reverse_rnn_weight'), + bias_attr=fluid.ParamAttr(name=name + '_reverse_rnn_bias')) + + pad_value = fluid.layers.assign(input=np.array([0.0], dtype=np.float32)) + if share_weights: #input-hidden weights shared between bi-directional rnn. input_proj = fluid.layers.fc( @@ -141,24 +155,12 @@ def bidirectional_simple_rnn_bn_layer(name, input, size, share_weights): moving_mean_name=name + '_batch_norm_moving_mean', moving_variance_name=name + '_batch_norm_moving_variance') #forward and backword in time - forward_cell = RNNCell( - hidden_size=size, - activation=fluid.layers.brelu, - param_attr=fluid.ParamAttr(name=name + '_forward_rnn_weight'), - bias_attr=fluid.ParamAttr(name=name + '_forward_rnn_bias')) - pad_value = fluid.layers.assign(input=np.array([0.0], dtype=np.float32)) input, length = fluid.layers.sequence_pad(input_proj_bn, pad_value) forward_rnn, _ = fluid.layers.rnn( cell=forward_cell, inputs=input, time_major=False, is_reverse=False) forward_rnn = fluid.layers.sequence_unpad(x=forward_rnn, length=length) - reverse_cell = RNNCell( - hidden_size=size, - activation=fluid.layers.brelu, - param_attr=fluid.ParamAttr(name=name + '_reverse_rnn_weight'), - bias_attr=fluid.ParamAttr(name=name + '_reverse_rnn_bias')) - input, length = fluid.layers.sequence_pad(input_proj_bn, pad_value) reverse_rnn, _ = fluid.layers.rnn( cell=reverse_cell, inputs=input, @@ -174,7 +176,7 @@ def bidirectional_simple_rnn_bn_layer(name, input, size, share_weights): act=None, param_attr=fluid.ParamAttr(name=name + '_forward_fc_weight'), bias_attr=False) - input_proj_backward = fluid.layers.fc( + input_proj_reverse = fluid.layers.fc( input=input, size=size, act=None, @@ -189,8 +191,8 @@ def bidirectional_simple_rnn_bn_layer(name, input, size, share_weights): bias_attr=fluid.ParamAttr(name=name + '_forward_batch_norm_bias'), moving_mean_name=name + '_forward_batch_norm_moving_mean', moving_variance_name=name + '_forward_batch_norm_moving_variance') - input_proj_bn_backward = fluid.layers.batch_norm( - input=input_proj_backward, + input_proj_bn_reverse = fluid.layers.batch_norm( + input=input_proj_reverse, act=None, param_attr=fluid.ParamAttr( name=name + '_reverse_batch_norm_weight'), @@ -198,24 +200,14 @@ def bidirectional_simple_rnn_bn_layer(name, input, size, share_weights): moving_mean_name=name + '_reverse_batch_norm_moving_mean', moving_variance_name=name + '_reverse_batch_norm_moving_variance') # forward and backward in time - forward_cell = RNNCell( - hidden_size=size, - activation=fluid.layers.brelu, - param_attr=fluid.ParamAttr(name=name + '_forward_rnn_weight'), - bias_attr=fluid.ParamAttr(name=name + '_forward_rnn_bias')) - - pad_value = fluid.layers.assign(input=np.array([0.0], dtype=np.float32)) - input, length = fluid.layers.sequence_pad(input_proj_bn, pad_value) + input, length = fluid.layers.sequence_pad(input_proj_bn_forward, + pad_value) forward_rnn, _ = fluid.layers.rnn( cell=forward_cell, inputs=input, time_major=False, is_reverse=False) forward_rnn = fluid.layers.sequence_unpad(x=forward_rnn, length=length) - reverse_cell = RNNCell( - hidden_size=size, - activation=fluid.layers.brelu, - param_attr=fluid.ParamAttr(name=name + '_reverse_rnn_weight'), - bias_attr=fluid.ParamAttr(name=name + '_reverse_rnn_bias')) - input, length = fluid.layers.sequence_pad(input_proj_bn, pad_value) + input, length = fluid.layers.sequence_pad(input_proj_bn_reverse, + pad_value) reverse_rnn, _ = fluid.layers.rnn( cell=reverse_cell, inputs=input, @@ -248,7 +240,7 @@ def bidirectional_gru_bn_layer(name, input, size, act): act=None, param_attr=fluid.ParamAttr(name=name + '_forward_fc_weight'), bias_attr=False) - input_proj_backward = fluid.layers.fc( + input_proj_reverse = fluid.layers.fc( input=input, size=size * 3, act=None, @@ -262,8 +254,8 @@ def bidirectional_gru_bn_layer(name, input, size, act): bias_attr=fluid.ParamAttr(name=name + '_forward_batch_norm_bias'), moving_mean_name=name + '_forward_batch_norm_moving_mean', moving_variance_name=name + '_forward_batch_norm_moving_variance') - input_proj_bn_backward = fluid.layers.batch_norm( - input=input_proj_backward, + input_proj_bn_reverse = fluid.layers.batch_norm( + input=input_proj_reverse, act=None, param_attr=fluid.ParamAttr(name=name + '_reverse_batch_norm_weight'), bias_attr=fluid.ParamAttr(name=name + '_reverse_batch_norm_bias'), @@ -279,7 +271,7 @@ def bidirectional_gru_bn_layer(name, input, size, act): bias_attr=fluid.ParamAttr(name=name + '_forward_gru_bias'), is_reverse=False) reverse_gru = fluid.layers.dynamic_gru( - input=input_proj_bn_backward, + input=input_proj_bn_reverse, size=size, gate_activation='sigmoid', candidate_activation=act, From 0d5ed1b45a643f3ce8e0c64a10f7613cf8dadf20 Mon Sep 17 00:00:00 2001 From: lfchener Date: Tue, 29 Oct 2019 12:12:58 +0000 Subject: [PATCH 3/3] Change StaticRNN to fluid.layers.rnn --- model_utils/network.py | 60 ++++++++++++++++++------------------------ 1 file changed, 26 insertions(+), 34 deletions(-) diff --git a/model_utils/network.py b/model_utils/network.py index 0749dc5d..e27ff02c 100644 --- a/model_utils/network.py +++ b/model_utils/network.py @@ -60,6 +60,8 @@ def conv_bn_layer(input, filter_size, num_channels_in, num_channels_out, stride, class RNNCell(fluid.layers.RNNCell): + """A simple rnn cell.""" + def __init__(self, hidden_size, param_attr=None, @@ -68,7 +70,8 @@ class RNNCell(fluid.layers.RNNCell): activation=None, dtype="float32", name="RNNCell"): - '''A simple rnn cell. + """Initialize simple rnn cell. + :param hidden_size: Dimension of RNN cells. :type hidden_size: int :param param_attr: Parameter properties of hidden layer weights that @@ -82,7 +85,7 @@ class RNNCell(fluid.layers.RNNCell): :type activation: Activation :param name: Name of cell :type name: string - ''' + """ self.hidden_size = hidden_size self.param_attr = param_attr @@ -111,6 +114,7 @@ class RNNCell(fluid.layers.RNNCell): def bidirectional_simple_rnn_bn_layer(name, input, size, share_weights): """Bidirectonal simple rnn layer with sequence-wise batch normalization. The batch normalization is only performed on input-state weights. + :param name: Name of the layer parameters. :type name: string :param input: Input layer. @@ -147,28 +151,14 @@ def bidirectional_simple_rnn_bn_layer(name, input, size, share_weights): bias_attr=False) # batch norm is only performed on input-state projection - input_proj_bn = fluid.layers.batch_norm( + input_proj_bn_forward = fluid.layers.batch_norm( input=input_proj, act=None, param_attr=fluid.ParamAttr(name=name + '_batch_norm_weight'), bias_attr=fluid.ParamAttr(name=name + '_batch_norm_bias'), moving_mean_name=name + '_batch_norm_moving_mean', moving_variance_name=name + '_batch_norm_moving_variance') - #forward and backword in time - - input, length = fluid.layers.sequence_pad(input_proj_bn, pad_value) - forward_rnn, _ = fluid.layers.rnn( - cell=forward_cell, inputs=input, time_major=False, is_reverse=False) - forward_rnn = fluid.layers.sequence_unpad(x=forward_rnn, length=length) - - reverse_rnn, _ = fluid.layers.rnn( - cell=reverse_cell, - inputs=input, - sequence_length=length, - time_major=False, - is_reverse=True) - reverse_rnn = fluid.layers.sequence_unpad(x=reverse_rnn, length=length) - + input_proj_bn_reverse = input_proj_bn_forward else: input_proj_forward = fluid.layers.fc( input=input, @@ -199,22 +189,20 @@ def bidirectional_simple_rnn_bn_layer(name, input, size, share_weights): bias_attr=fluid.ParamAttr(name=name + '_reverse_batch_norm_bias'), moving_mean_name=name + '_reverse_batch_norm_moving_mean', moving_variance_name=name + '_reverse_batch_norm_moving_variance') - # forward and backward in time - input, length = fluid.layers.sequence_pad(input_proj_bn_forward, - pad_value) - forward_rnn, _ = fluid.layers.rnn( - cell=forward_cell, inputs=input, time_major=False, is_reverse=False) - forward_rnn = fluid.layers.sequence_unpad(x=forward_rnn, length=length) - - input, length = fluid.layers.sequence_pad(input_proj_bn_reverse, - pad_value) - reverse_rnn, _ = fluid.layers.rnn( - cell=reverse_cell, - inputs=input, - sequence_length=length, - time_major=False, - is_reverse=True) - reverse_rnn = fluid.layers.sequence_unpad(x=reverse_rnn, length=length) + # forward and backward in time + input, length = fluid.layers.sequence_pad(input_proj_bn_forward, pad_value) + forward_rnn, _ = fluid.layers.rnn( + cell=forward_cell, inputs=input, time_major=False, is_reverse=False) + forward_rnn = fluid.layers.sequence_unpad(x=forward_rnn, length=length) + + input, length = fluid.layers.sequence_pad(input_proj_bn_reverse, pad_value) + reverse_rnn, _ = fluid.layers.rnn( + cell=reverse_cell, + inputs=input, + sequence_length=length, + time_major=False, + is_reverse=True) + reverse_rnn = fluid.layers.sequence_unpad(x=reverse_rnn, length=length) out = fluid.layers.concat(input=[forward_rnn, reverse_rnn], axis=1) return out @@ -223,6 +211,7 @@ def bidirectional_simple_rnn_bn_layer(name, input, size, share_weights): def bidirectional_gru_bn_layer(name, input, size, act): """Bidirectonal gru layer with sequence-wise batch normalization. The batch normalization is only performed on input-state weights. + :param name: Name of the layer. :type name: string :param input: Input layer. @@ -283,6 +272,7 @@ def bidirectional_gru_bn_layer(name, input, size, act): def conv_group(input, num_stacks, seq_len_data, masks): """Convolution group with stacked convolution layers. + :param input: Input layer. :type input: Variable :param num_stacks: Number of stacked convolution layers. @@ -336,6 +326,7 @@ def conv_group(input, num_stacks, seq_len_data, masks): def rnn_group(input, size, num_stacks, num_conv_layers, use_gru, share_rnn_weights): """RNN group with stacked bidirectional simple RNN or GRU layers. + :param input: Input layer. :type input: Variable :param size: Dimension of RNN cells in each layer. @@ -380,6 +371,7 @@ def deep_speech_v2_network(audio_data, use_gru=False, share_rnn_weights=True): """The DeepSpeech2 network structure. + :param audio_data: Audio spectrogram data layer. :type audio_data: Variable :param text_data: Transcription text data layer.