From af2bdf120ff6758ed13ec51cf1f3c2a2a373a3d4 Mon Sep 17 00:00:00 2001 From: zxcd <228587199@qq.com> Date: Wed, 3 Sep 2025 16:52:16 +0800 Subject: [PATCH] Reduce precision difference for whipser model (#4116) --- paddlespeech/s2t/models/whisper/whisper.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/paddlespeech/s2t/models/whisper/whisper.py b/paddlespeech/s2t/models/whisper/whisper.py index 54aef0956..9ae2a7de5 100644 --- a/paddlespeech/s2t/models/whisper/whisper.py +++ b/paddlespeech/s2t/models/whisper/whisper.py @@ -1613,7 +1613,9 @@ def log_mel_spectrogram(audio: Union[str, np.ndarray, paddle.Tensor], magnitudes = stft[:, :-1].abs()**2 filters = mel_filters(resource_path, n_mels) - mel_spec = filters @ magnitudes + mel_spec = paddle.to_tensor( + filters.numpy() + @ magnitudes.numpy()) # Use numpy to reduce precision difference mel_spec = paddle.to_tensor(mel_spec.numpy().tolist()) log_spec = paddle.clip(mel_spec, min=1e-10).log10()