diff --git a/deepspeech/models/ds2_online/deepspeech2.py b/deepspeech/models/ds2_online/deepspeech2.py index 97842e8e..8a3d7210 100644 --- a/deepspeech/models/ds2_online/deepspeech2.py +++ b/deepspeech/models/ds2_online/deepspeech2.py @@ -295,7 +295,7 @@ class DeepSpeech2ModelOnline(nn.Layer): probs.numpy(), eouts_len, vocab_list, decoding_method, lang_model_path, beam_alpha, beam_beta, beam_size, cutoff_prob, cutoff_top_n, num_processes) - + """ @paddle.no_grad() def decode_by_chunk(self, eouts_prefix, eouts_len_prefix, chunk_state_list, audio_chunk, audio_len_chunk, vocab_list, @@ -349,14 +349,14 @@ class DeepSpeech2ModelOnline(nn.Layer): probs.numpy(), eouts_len, vocab_list, decoding_method, lang_model_path, beam_alpha, beam_beta, beam_size, cutoff_prob, cutoff_top_n, num_processes) - + """ """ decocd_prob, decode_prob_chunk_by_chunk decode_prob_by_chunk is only used for test """ - + """ @paddle.no_grad() def decode_prob(self, audio, audio_len): eouts, eouts_len, final_state_list = self.encoder(audio, audio_len) @@ -385,6 +385,7 @@ class DeepSpeech2ModelOnline(nn.Layer): eouts_lens = eouts_chunk_lens probs = self.decoder.softmax(eouts) return probs, eouts, eouts_lens, final_state_list + """ @classmethod def from_pretrained(cls, dataloader, config, checkpoint_path): diff --git a/tests/deepspeech2_online_model_test.py b/tests/deepspeech2_online_model_test.py index 7f06b9d2..307d6495 100644 --- a/tests/deepspeech2_online_model_test.py +++ b/tests/deepspeech2_online_model_test.py @@ -105,6 +105,51 @@ class TestDeepSpeech2ModelOnline(unittest.TestCase): loss = model(self.audio, self.audio_len, self.text, self.text_len) self.assertEqual(loss.numel(), 1) + def test_ds2_6(self): + model = DeepSpeech2ModelOnline( + feat_size=self.feat_dim, + dict_size=10, + num_conv_layers=2, + num_rnn_layers=1, + rnn_size=1024, + num_fc_layers=2, + fc_layers_size_list=[512, 256], + use_gru=True) + model.eval() + paddle.device.set_device("cpu") + de_ch_size = 9 + + eouts, eouts_lens, final_state_list = model.encoder( + self.audio, self.audio_len) + eouts_by_chk_list, eouts_lens_by_chk_list, final_state_list_by_chk = model.encoder.forward_chunk_by_chunk( + self.audio, self.audio_len, de_ch_size) + eouts_by_chk = paddle.concat(eouts_by_chk_list, axis = 1) + eouts_lens_by_chk = paddle.add_n(eouts_lens_by_chk_list) + decode_max_len = eouts.shape[1] + print ("dml", decode_max_len) + eouts_by_chk = eouts_by_chk[:, :decode_max_len, :] + self.assertEqual( + paddle.sum( + paddle.abs(paddle.subtract(eouts_lens, eouts_lens_by_chk))), 0) + self.assertEqual( + paddle.sum(paddle.abs(paddle.subtract(eouts, eouts_by_chk))), 0) + self.assertEqual(paddle.allclose(eouts_by_chk, eouts), True) + """ + print ("conv_x", conv_x) + print ("conv_x_by_chk", conv_x_by_chk) + print ("final_state_list", final_state_list) + #print ("final_state_list_by_chk", final_state_list_by_chk) + print (paddle.sum(paddle.abs(paddle.subtract(eouts[:,:de_ch_size,:], eouts_by_chk[:,:de_ch_size,:])))) + print (paddle.allclose(eouts[:,:de_ch_size,:], eouts_by_chk[:,:de_ch_size,:])) + print (paddle.sum(paddle.abs(paddle.subtract(eouts[:,de_ch_size:de_ch_size*2,:], eouts_by_chk[:,de_ch_size:de_ch_size*2,:])))) + print (paddle.allclose(eouts[:,de_ch_size:de_ch_size*2,:], eouts_by_chk[:,de_ch_size:de_ch_size*2,:])) + print (paddle.sum(paddle.abs(paddle.subtract(eouts[:,de_ch_size*2:de_ch_size*3,:], eouts_by_chk[:,de_ch_size*2:de_ch_size*3,:])))) + print (paddle.allclose(eouts[:,de_ch_size*2:de_ch_size*3,:], eouts_by_chk[:,de_ch_size*2:de_ch_size*3,:])) + print (paddle.sum(paddle.abs(paddle.subtract(eouts, eouts_by_chk)))) + print (paddle.sum(paddle.abs(paddle.subtract(eouts, eouts_by_chk)))) + print (paddle.allclose(eouts[:,:,:], eouts_by_chk[:,:,:])) + """ + """ def split_into_chunk(self, x, x_lens, decoder_chunk_size, subsampling_rate, receptive_field_length): chunk_size = (decoder_chunk_size - 1 @@ -134,7 +179,7 @@ class TestDeepSpeech2ModelOnline(unittest.TestCase): return x_chunk_list, x_chunk_lens_list - def test_ds2_6(self): + def test_ds2_7(self): model = DeepSpeech2ModelOnline( feat_size=self.feat_dim, dict_size=10, @@ -157,7 +202,7 @@ class TestDeepSpeech2ModelOnline(unittest.TestCase): chunk_state_list = [None] * model.encoder.num_rnn_layers for i, audio_chunk in enumerate(audio_chunk_list): audio_chunk_lens = audio_chunk_lens_list[i] - probs_pre_chunks, eouts_prefix, eouts_lens_prefix, chunk_state_list = model.decode_prob_by_chunk( + eouts_prefix, eouts_lens_prefix, chunk_state_list = model.decode_prob_by_chunk( audio_chunk, audio_chunk_lens, eouts_prefix, eouts_lens_prefix, chunk_state_list) # print (i, probs_pre_chunks.shape) @@ -168,53 +213,7 @@ class TestDeepSpeech2ModelOnline(unittest.TestCase): decode_max_len = probs.shape[1] probs_pre_chunks = probs_pre_chunks[:, :decode_max_len, :] self.assertEqual(paddle.allclose(probs, probs_pre_chunks), True) - - def test_ds2_7(self): - model = DeepSpeech2ModelOnline( - feat_size=self.feat_dim, - dict_size=10, - num_conv_layers=2, - num_rnn_layers=1, - rnn_size=1024, - num_fc_layers=2, - fc_layers_size_list=[512, 256], - use_gru=True) - model.eval() - paddle.device.set_device("cpu") - de_ch_size = 9 - - probs, eouts, eouts_lens, final_state_list = model.decode_prob( - self.audio, self.audio_len) - probs_by_chk, eouts_by_chk, eouts_lens_by_chk, final_state_list_by_chk = model.decode_prob_chunk_by_chunk( - self.audio, self.audio_len, de_ch_size) - decode_max_len = probs.shape[1] - probs_by_chk = probs_by_chk[:, :decode_max_len, :] - eouts_by_chk = eouts_by_chk[:, :decode_max_len, :] - self.assertEqual( - paddle.sum( - paddle.abs(paddle.subtract(eouts_lens, eouts_lens_by_chk))), 0) - self.assertEqual( - paddle.sum(paddle.abs(paddle.subtract(eouts, eouts_by_chk))), 0) - self.assertEqual( - paddle.sum( - paddle.abs(paddle.subtract(probs, probs_by_chk))).numpy(), 0) - self.assertEqual(paddle.allclose(eouts_by_chk, eouts), True) - self.assertEqual(paddle.allclose(probs_by_chk, probs), True) - """ - print ("conv_x", conv_x) - print ("conv_x_by_chk", conv_x_by_chk) - print ("final_state_list", final_state_list) - #print ("final_state_list_by_chk", final_state_list_by_chk) - print (paddle.sum(paddle.abs(paddle.subtract(eouts[:,:de_ch_size,:], eouts_by_chk[:,:de_ch_size,:])))) - print (paddle.allclose(eouts[:,:de_ch_size,:], eouts_by_chk[:,:de_ch_size,:])) - print (paddle.sum(paddle.abs(paddle.subtract(eouts[:,de_ch_size:de_ch_size*2,:], eouts_by_chk[:,de_ch_size:de_ch_size*2,:])))) - print (paddle.allclose(eouts[:,de_ch_size:de_ch_size*2,:], eouts_by_chk[:,de_ch_size:de_ch_size*2,:])) - print (paddle.sum(paddle.abs(paddle.subtract(eouts[:,de_ch_size*2:de_ch_size*3,:], eouts_by_chk[:,de_ch_size*2:de_ch_size*3,:])))) - print (paddle.allclose(eouts[:,de_ch_size*2:de_ch_size*3,:], eouts_by_chk[:,de_ch_size*2:de_ch_size*3,:])) - print (paddle.sum(paddle.abs(paddle.subtract(eouts, eouts_by_chk)))) - print (paddle.sum(paddle.abs(paddle.subtract(eouts, eouts_by_chk)))) - print (paddle.allclose(eouts[:,:,:], eouts_by_chk[:,:,:])) - """ + """ if __name__ == '__main__':