From 2451a177b0875f992119b4dc2377b422914d5fc9 Mon Sep 17 00:00:00 2001 From: huangyuxin Date: Fri, 27 Aug 2021 08:59:39 +0000 Subject: [PATCH] fix paddling len bug --- deepspeech/exps/deepspeech2/model.py | 10 ++++++---- deepspeech/models/ds2_online/deepspeech2.py | 4 ++-- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/deepspeech/exps/deepspeech2/model.py b/deepspeech/exps/deepspeech2/model.py index 0e0e83c0..f3e3fcad 100644 --- a/deepspeech/exps/deepspeech2/model.py +++ b/deepspeech/exps/deepspeech2/model.py @@ -455,10 +455,12 @@ class DeepSpeech2ExportTester(DeepSpeech2Tester): x_batch = audio.numpy() batch_size, Tmax, x_dim = x_batch.shape x_len_batch = audio_len.numpy().astype(np.int64) - - padding_len_batch = chunk_stride - ( - Tmax - chunk_size - ) % chunk_stride # The length of padding for the batch + if (Tmax - chunk_size) % chunk_stride != 0: + padding_len_batch = chunk_stride - ( + Tmax - chunk_size + ) % chunk_stride # The length of padding for the batch + else: + padding_len_batch = 0 x_list = np.split(x_batch, batch_size, axis=0) x_len_list = np.split(x_len_batch, batch_size, axis=0) diff --git a/deepspeech/models/ds2_online/deepspeech2.py b/deepspeech/models/ds2_online/deepspeech2.py index d092b154..d0fbdcf6 100644 --- a/deepspeech/models/ds2_online/deepspeech2.py +++ b/deepspeech/models/ds2_online/deepspeech2.py @@ -100,12 +100,12 @@ class CRNNEncoder(nn.Layer): """Compute Encoder outputs Args: - x (Tensor): [B, feature_size, D] + x (Tensor): [B, T, D] x_lens (Tensor): [B] init_state_h_box(Tensor): init_states h for RNN layers: [num_rnn_layers * num_directions, batch_size, hidden_size] init_state_c_box(Tensor): init_states c for RNN layers: [num_rnn_layers * num_directions, batch_size, hidden_size] Return: - x (Tensor): encoder outputs, [B, size, D] + x (Tensor): encoder outputs, [B, T, D] x_lens (Tensor): encoder length, [B] final_state_h_box(Tensor): final_states h for RNN layers: [num_rnn_layers * num_directions, batch_size, hidden_size] final_state_c_box(Tensor): final_states c for RNN layers: [num_rnn_layers * num_directions, batch_size, hidden_size]