diff --git a/paddlespeech/s2t/exps/deepspeech2/bin/test_wav.py b/paddlespeech/s2t/exps/deepspeech2/bin/test_wav.py index d087405d..0b763684 100644 --- a/paddlespeech/s2t/exps/deepspeech2/bin/test_wav.py +++ b/paddlespeech/s2t/exps/deepspeech2/bin/test_wav.py @@ -75,7 +75,7 @@ class DeepSpeech2Tester_hub(): feat = self.preprocessing(audio, **self.preprocess_args) logger.info(f"feat shape: {feat.shape}") - audio_len = paddle.to_tensor(feat.shape[0]) + audio_len = paddle.to_tensor(feat.shape[0]).unsqueeze(0) audio = paddle.to_tensor(feat, dtype='float32').unsqueeze(axis=0) result_transcripts = self.compute_result_transcripts( diff --git a/paddlespeech/s2t/exps/u2/bin/quant.py b/paddlespeech/s2t/exps/u2/bin/quant.py index 73a9794f..72c64e46 100755 --- a/paddlespeech/s2t/exps/u2/bin/quant.py +++ b/paddlespeech/s2t/exps/u2/bin/quant.py @@ -75,7 +75,7 @@ class U2Infer(): feat = self.preprocessing(audio, **self.preprocess_args) logger.info(f"feat shape: {feat.shape}") - ilen = paddle.to_tensor(feat.shape[0]) + ilen = paddle.to_tensor(feat.shape[0]).unsqueeze(0) xs = paddle.to_tensor(feat, dtype='float32').unsqueeze(0) decode_config = self.config.decode logger.info(f"decode cfg: {decode_config}") diff --git a/paddlespeech/s2t/exps/u2/bin/test_wav.py b/paddlespeech/s2t/exps/u2/bin/test_wav.py index a6228a12..0d1a3b3c 100644 --- a/paddlespeech/s2t/exps/u2/bin/test_wav.py +++ b/paddlespeech/s2t/exps/u2/bin/test_wav.py @@ -78,7 +78,7 @@ class U2Infer(): if self.args.debug: np.savetxt("feat.transform.txt", feat) - ilen = paddle.to_tensor(feat.shape[0]) + ilen = paddle.to_tensor(feat.shape[0]).unsqueeze(0) xs = paddle.to_tensor(feat, dtype='float32').unsqueeze(0) decode_config = self.config.decode logger.info(f"decode cfg: {decode_config}")