Add more comments.

pull/2/head
yangyaming 7 years ago
parent b3ebf3fd62
commit f38d948193

@ -320,6 +320,9 @@ class DataGenerator(object):
if flatten: if flatten:
padded_audio = padded_audio.flatten() padded_audio = padded_audio.flatten()
# Stride size for conv0 is (3, 2)
# Stride size for conv1 to convN is (1, 2)
# Same as the network, hard-coded here
padded_instance = [padded_audio, text] padded_instance = [padded_audio, text]
padded_conv0_h = (padded_audio.shape[0] - 1) // 2 + 1 padded_conv0_h = (padded_audio.shape[0] - 1) // 2 + 1
padded_conv0_w = (padded_audio.shape[1] - 1) // 3 + 1 padded_conv0_w = (padded_audio.shape[1] - 1) // 3 + 1
@ -327,6 +330,8 @@ class DataGenerator(object):
padded_instance += [ padded_instance += [
[0], # sequence offset, always 0 [0], # sequence offset, always 0
[valid_w], # valid sequence length [valid_w], # valid sequence length
# Index ranges for channel, height and width
# Please refer scale_sub_region layer to see details
[1, 32, 1, padded_conv0_h, valid_w + 1, padded_conv0_w] [1, 32, 1, padded_conv0_h, valid_w + 1, padded_conv0_w]
] ]
pre_padded_h = padded_conv0_h pre_padded_h = padded_conv0_h

@ -270,7 +270,7 @@ def deep_speech_v2_network(audio_data,
block_x=1, block_x=1,
block_y=conv_group_height) block_y=conv_group_height)
# remove padding part # remove padding part
remove_padding = paddle.layer.sub_seq( remove_padding_data = paddle.layer.sub_seq(
input=conv2seq, input=conv2seq,
offsets=seq_offset_data, offsets=seq_offset_data,
sizes=seq_len_data, sizes=seq_len_data,
@ -278,7 +278,7 @@ def deep_speech_v2_network(audio_data,
bias_attr=False) bias_attr=False)
# rnn group # rnn group
rnn_group_output = rnn_group( rnn_group_output = rnn_group(
input=remove_padding, input=remove_padding_data,
size=rnn_size, size=rnn_size,
num_stacks=num_rnn_layers, num_stacks=num_rnn_layers,
use_gru=use_gru, use_gru=use_gru,

Loading…
Cancel
Save