diff --git a/paddlespeech/s2t/models/whisper/whisper.py b/paddlespeech/s2t/models/whisper/whisper.py index 54aef0956..9ae2a7de5 100644 --- a/paddlespeech/s2t/models/whisper/whisper.py +++ b/paddlespeech/s2t/models/whisper/whisper.py @@ -1613,7 +1613,9 @@ def log_mel_spectrogram(audio: Union[str, np.ndarray, paddle.Tensor], magnitudes = stft[:, :-1].abs()**2 filters = mel_filters(resource_path, n_mels) - mel_spec = filters @ magnitudes + mel_spec = paddle.to_tensor( + filters.numpy() + @ magnitudes.numpy()) # Use numpy to reduce precision difference mel_spec = paddle.to_tensor(mel_spec.numpy().tolist()) log_spec = paddle.clip(mel_spec, min=1e-10).log10()