fix asr infer.py

pull/3214/head
zxcd 2 years ago
parent 8205343c65
commit 6b48a20687

@ -25,9 +25,6 @@ import librosa
import numpy as np
import paddle
import soundfile
from paddlespeech.audio.transform.transformation import Transformation
from paddlespeech.s2t.frontend.featurizer.text_featurizer import TextFeaturizer
from paddlespeech.s2t.utils.utility import UpdateConfig
from yacs.config import CfgNode
from ...utils.env import MODEL_HOME
@ -37,6 +34,9 @@ from ..log import logger
from ..utils import CLI_TIMER
from ..utils import stats_wrapper
from ..utils import timer_register
from paddlespeech.audio.transform.transformation import Transformation
from paddlespeech.s2t.frontend.featurizer.text_featurizer import TextFeaturizer
from paddlespeech.s2t.utils.utility import UpdateConfig
__all__ = ['ASRExecutor']
@ -274,7 +274,7 @@ class ASRExecutor(BaseExecutor):
# fbank
audio = preprocessing(audio, **preprocess_args)
audio_len = paddle.to_tensor(audio.shape[0])
audio_len = paddle.to_tensor(audio.shape[0]).unsqueeze(axis=0)
audio = paddle.to_tensor(audio, dtype='float32').unsqueeze(axis=0)
self._inputs["audio"] = audio

@ -245,7 +245,7 @@ class SSLExecutor(BaseExecutor):
# fbank
audio = preprocessing(audio, **preprocess_args)
audio_len = paddle.to_tensor(audio.shape[0])
audio_len = paddle.to_tensor(audio.shape[0]).unsqueeze(axis=0)
audio = paddle.to_tensor(audio, dtype='float32').unsqueeze(axis=0)
self._inputs["audio"] = audio

@ -253,7 +253,7 @@ class WhisperExecutor(BaseExecutor):
# fbank
audio = log_mel_spectrogram(audio, resource_path=self.resource_path)
audio_len = paddle.to_tensor(audio.shape[0])
audio_len = paddle.to_tensor(audio.shape[0]).unsqueeze(axis=0)
self._inputs["audio"] = audio
self._inputs["audio_len"] = audio_len

Loading…
Cancel
Save