From db24e9d46ea3fc1c20290ed317afd3a9d3034626 Mon Sep 17 00:00:00 2001 From: zxcd <228587199@qq.com> Date: Tue, 2 Sep 2025 20:50:12 +0800 Subject: [PATCH] Reduce precision difference for whipser model --- paddlespeech/s2t/models/whisper/whisper.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/paddlespeech/s2t/models/whisper/whisper.py b/paddlespeech/s2t/models/whisper/whisper.py index 54aef0956..9ae2a7de5 100644 --- a/paddlespeech/s2t/models/whisper/whisper.py +++ b/paddlespeech/s2t/models/whisper/whisper.py @@ -1613,7 +1613,9 @@ def log_mel_spectrogram(audio: Union[str, np.ndarray, paddle.Tensor], magnitudes = stft[:, :-1].abs()**2 filters = mel_filters(resource_path, n_mels) - mel_spec = filters @ magnitudes + mel_spec = paddle.to_tensor( + filters.numpy() + @ magnitudes.numpy()) # Use numpy to reduce precision difference mel_spec = paddle.to_tensor(mel_spec.numpy().tolist()) log_spec = paddle.clip(mel_spec, min=1e-10).log10()