diff --git a/deepspeech/models/deepspeech2.py b/deepspeech/models/deepspeech2.py index 01edbbae6..cdf32cf37 100644 --- a/deepspeech/models/deepspeech2.py +++ b/deepspeech/models/deepspeech2.py @@ -172,15 +172,14 @@ class DeepSpeech2Model(nn.Layer): """Compute Model loss Args: - audio (Tenosr): [B, D, T] - text (Tensor): [B, T] + audio (Tenosr): [B, T, D] + text (Tensor): [B, U] audio_len (Tensor): [B] text_len (Tensor): [B] Returns: loss (Tenosr): [1] """ - eouts, eouts_len = self.encoder(audio, audio_len) loss = self.decoder(eouts, eouts_len, text, text_len) return loss @@ -259,7 +258,7 @@ class DeepSpeech2InferModel(DeepSpeech2Model): """export model function Args: - audio (Tensor): [B, D, T] + audio (Tensor): [B, T, D] audio_len (Tensor): [B] Returns: