From 37f60d6c2a28d19bf9d52c210e3dc3287dca18d0 Mon Sep 17 00:00:00 2001 From: suzakuwcx Date: Sun, 29 Dec 2024 00:09:09 +0800 Subject: [PATCH] t2s/modules/losses.py: Add a 'clamp_eps' parameter to dynamically adjust the clipping threshold --- paddlespeech/t2s/modules/losses.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/paddlespeech/t2s/modules/losses.py b/paddlespeech/t2s/modules/losses.py index 029ad1be1..61dfbaf03 100644 --- a/paddlespeech/t2s/modules/losses.py +++ b/paddlespeech/t2s/modules/losses.py @@ -461,7 +461,8 @@ def stft(x, win_length=None, window='hann', center=True, - pad_mode='reflect'): + pad_mode='reflect', + clamp_eps=1e-7): """Perform STFT and convert to magnitude spectrogram. Args: x(Tensor): @@ -501,7 +502,7 @@ def stft(x, real = x_stft.real() imag = x_stft.imag() - return paddle.sqrt(paddle.clip(real**2 + imag**2, min=1e-7)).transpose( + return paddle.sqrt(paddle.clip(real**2 + imag**2, min=clamp_eps)).transpose( [0, 2, 1]) @@ -1501,13 +1502,15 @@ class MultiScaleSTFTLoss(nn.Layer): fft_size=s.window_length, hop_length=s.hop_length, win_length=s.window_length, - window=s.window_type) + window=s.window_type, + clamp_eps=1e-5) y_mag = stft( y.reshape([-1, y.shape[-1]]), fft_size=s.window_length, hop_length=s.hop_length, win_length=s.window_length, - window=s.window_type) + window=s.window_type, + clamp_eps=1e-5) x_mag = x_mag.transpose([0, 2, 1]) y_mag = y_mag.transpose([0, 2, 1]) elif isinstance(x, AudioSignal) and isinstance(y, AudioSignal):