From 91170bd2604e5a22237fcb46ebcf44f4d86914b5 Mon Sep 17 00:00:00 2001 From: zxcd <228587199@qq.com> Date: Tue, 11 Jun 2024 11:12:58 +0800 Subject: [PATCH] adapt view behavior change, fix KeyError. (#3794) * adapt view behavior change, fix KeyError. * fix readme demo run error. * fixed opencc version --- paddlespeech/cli/asr/infer.py | 2 +- paddlespeech/s2t/models/wav2vec2/wav2vec2_ASR.py | 2 +- setup.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/paddlespeech/cli/asr/infer.py b/paddlespeech/cli/asr/infer.py index 4001f957..231a00f4 100644 --- a/paddlespeech/cli/asr/infer.py +++ b/paddlespeech/cli/asr/infer.py @@ -274,7 +274,7 @@ class ASRExecutor(BaseExecutor): # fbank audio = preprocessing(audio, **preprocess_args) - audio_len = paddle.to_tensor([audio.shape[0]]).unsqueeze(axis=0) + audio_len = paddle.to_tensor(audio.shape[0]).unsqueeze(axis=0) audio = paddle.to_tensor(audio, dtype='float32').unsqueeze(axis=0) self._inputs["audio"] = audio diff --git a/paddlespeech/s2t/models/wav2vec2/wav2vec2_ASR.py b/paddlespeech/s2t/models/wav2vec2/wav2vec2_ASR.py index a3744d34..64195def 100755 --- a/paddlespeech/s2t/models/wav2vec2/wav2vec2_ASR.py +++ b/paddlespeech/s2t/models/wav2vec2/wav2vec2_ASR.py @@ -188,7 +188,7 @@ class Wav2vec2ASR(nn.Layer): x_lens = x.shape[1] ctc_probs = self.ctc.log_softmax(x) # (B, maxlen, vocab_size) topk_prob, topk_index = ctc_probs.topk(1, axis=2) # (B, maxlen, 1) - topk_index = topk_index.view([batch_size, x_lens]) # (B, maxlen) + topk_index = topk_index.reshape([batch_size, x_lens]) # (B, maxlen) hyps = [hyp.tolist() for hyp in topk_index] hyps = [remove_duplicates_and_blank(hyp) for hyp in hyps] diff --git a/setup.py b/setup.py index 10a6502c..030f7f88 100644 --- a/setup.py +++ b/setup.py @@ -48,7 +48,7 @@ base = [ "matplotlib", "nara_wpe", "onnxruntime>=1.11.0", - "opencc", + "opencc==1.1.6", "opencc-python-reimplemented", "pandas", "paddleaudio>=1.1.0",