From 5e8c727fd6785e900feaa455e21b1ab93f7dc0b6 Mon Sep 17 00:00:00 2001 From: megemini Date: Fri, 29 Nov 2024 19:29:46 +0800 Subject: [PATCH] =?UTF-8?q?[Hackathon=207th]=20=E4=BF=AE=E5=A4=8D=20`tal?= =?UTF-8?q?=5Fcs`=20=E6=B5=8B=E8=AF=95=E4=B8=AD=200D=20tensor=20to=201D=20?= =?UTF-8?q?(#3913)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * [Fix] 0D tensor to 1D * [Update] feat dim --- paddlespeech/s2t/exps/deepspeech2/bin/test_wav.py | 2 +- paddlespeech/s2t/exps/u2/bin/quant.py | 2 +- paddlespeech/s2t/exps/u2/bin/test_wav.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/paddlespeech/s2t/exps/deepspeech2/bin/test_wav.py b/paddlespeech/s2t/exps/deepspeech2/bin/test_wav.py index d087405d..0b763684 100644 --- a/paddlespeech/s2t/exps/deepspeech2/bin/test_wav.py +++ b/paddlespeech/s2t/exps/deepspeech2/bin/test_wav.py @@ -75,7 +75,7 @@ class DeepSpeech2Tester_hub(): feat = self.preprocessing(audio, **self.preprocess_args) logger.info(f"feat shape: {feat.shape}") - audio_len = paddle.to_tensor(feat.shape[0]) + audio_len = paddle.to_tensor(feat.shape[0]).unsqueeze(0) audio = paddle.to_tensor(feat, dtype='float32').unsqueeze(axis=0) result_transcripts = self.compute_result_transcripts( diff --git a/paddlespeech/s2t/exps/u2/bin/quant.py b/paddlespeech/s2t/exps/u2/bin/quant.py index 73a9794f..72c64e46 100755 --- a/paddlespeech/s2t/exps/u2/bin/quant.py +++ b/paddlespeech/s2t/exps/u2/bin/quant.py @@ -75,7 +75,7 @@ class U2Infer(): feat = self.preprocessing(audio, **self.preprocess_args) logger.info(f"feat shape: {feat.shape}") - ilen = paddle.to_tensor(feat.shape[0]) + ilen = paddle.to_tensor(feat.shape[0]).unsqueeze(0) xs = paddle.to_tensor(feat, dtype='float32').unsqueeze(0) decode_config = self.config.decode logger.info(f"decode cfg: {decode_config}") diff --git a/paddlespeech/s2t/exps/u2/bin/test_wav.py b/paddlespeech/s2t/exps/u2/bin/test_wav.py index a6228a12..0d1a3b3c 100644 --- a/paddlespeech/s2t/exps/u2/bin/test_wav.py +++ b/paddlespeech/s2t/exps/u2/bin/test_wav.py @@ -78,7 +78,7 @@ class U2Infer(): if self.args.debug: np.savetxt("feat.transform.txt", feat) - ilen = paddle.to_tensor(feat.shape[0]) + ilen = paddle.to_tensor(feat.shape[0]).unsqueeze(0) xs = paddle.to_tensor(feat, dtype='float32').unsqueeze(0) decode_config = self.config.decode logger.info(f"decode cfg: {decode_config}")