diff --git a/paddlespeech/s2t/io/dataloader.py b/paddlespeech/s2t/io/dataloader.py index db6292f2c..5065c31ed 100644 --- a/paddlespeech/s2t/io/dataloader.py +++ b/paddlespeech/s2t/io/dataloader.py @@ -404,6 +404,12 @@ class DataLoaderFactory(): config['subsampling_factor'] = 1 config['num_encs'] = 1 config['shortest_first'] = False + config['minibatches'] = 0 + config['batch_count'] = 'auto' + config['batch_bins'] = 0 + config['batch_frames_in'] = 0 + config['batch_frames_out'] = 0 + config['batch_frames_inout'] = 0 elif mode == 'valid': config['manifest'] = config.dev_manifest config['train_mode'] = False diff --git a/paddlespeech/s2t/models/u2_st/u2_st.py b/paddlespeech/s2t/models/u2_st/u2_st.py index 339af4b74..3fe1d352f 100644 --- a/paddlespeech/s2t/models/u2_st/u2_st.py +++ b/paddlespeech/s2t/models/u2_st/u2_st.py @@ -170,8 +170,8 @@ class U2STBaseModel(nn.Layer): ys_in_lens = ys_pad_lens + 1 # 1. Forward decoder - decoder_out, _ = self.st_decoder(encoder_out, encoder_mask, ys_in_pad, - ys_in_lens) + decoder_out, *_ = self.st_decoder(encoder_out, encoder_mask, ys_in_pad, + ys_in_lens) # 2. Compute attention loss loss_att = self.criterion_att(decoder_out, ys_out_pad) @@ -203,8 +203,8 @@ class U2STBaseModel(nn.Layer): ys_in_lens = ys_pad_lens + 1 # 1. Forward decoder - decoder_out, _ = self.decoder(encoder_out, encoder_mask, ys_in_pad, - ys_in_lens) + decoder_out, *_ = self.decoder(encoder_out, encoder_mask, ys_in_pad, + ys_in_lens) # 2. Compute attention loss loss_att = self.criterion_att(decoder_out, ys_out_pad) diff --git a/paddlespeech/s2t/modules/decoder.py b/paddlespeech/s2t/modules/decoder.py index 4ddf057b6..1881a865c 100644 --- a/paddlespeech/s2t/modules/decoder.py +++ b/paddlespeech/s2t/modules/decoder.py @@ -110,14 +110,14 @@ class TransformerDecoder(BatchScorerInterface, nn.Layer): concat_after=concat_after, ) for _ in range(num_blocks) ]) - def forward( - self, - memory: paddle.Tensor, - memory_mask: paddle.Tensor, - ys_in_pad: paddle.Tensor, - ys_in_lens: paddle.Tensor, - r_ys_in_pad: paddle.Tensor=paddle.empty([0]), - reverse_weight: float=0.0) -> Tuple[paddle.Tensor, paddle.Tensor]: + def forward(self, + memory: paddle.Tensor, + memory_mask: paddle.Tensor, + ys_in_pad: paddle.Tensor, + ys_in_lens: paddle.Tensor, + r_ys_in_pad: paddle.Tensor=paddle.empty([0]), + reverse_weight: float=0.0 + ) -> Tuple[paddle.Tensor, paddle.Tensor, paddle.Tensor]: """Forward decoder. Args: memory: encoded memory, float32 (batch, maxlen_in, feat)