From 3df69e750216cde61fcff5dd6cbcba8eca899b11 Mon Sep 17 00:00:00 2001 From: lym0302 Date: Wed, 8 Mar 2023 02:51:38 +0000 Subject: [PATCH] update inference step --- paddlespeech/t2s/models/diffsinger/diffsinger.py | 2 +- paddlespeech/t2s/modules/diffusion.py | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/paddlespeech/t2s/models/diffsinger/diffsinger.py b/paddlespeech/t2s/models/diffsinger/diffsinger.py index 1fa4dfd39..b86d835bc 100644 --- a/paddlespeech/t2s/models/diffsinger/diffsinger.py +++ b/paddlespeech/t2s/models/diffsinger/diffsinger.py @@ -348,7 +348,7 @@ class DiffSingerInference(nn.Layer): note_dur=note_dur, is_slur=is_slur, get_mel_fs2=get_mel_fs2) - logmel = self.normalizer.inverse(normalized_mel) + logmel = normalized_mel return logmel diff --git a/paddlespeech/t2s/modules/diffusion.py b/paddlespeech/t2s/modules/diffusion.py index 621dfe530..4ada57489 100644 --- a/paddlespeech/t2s/modules/diffusion.py +++ b/paddlespeech/t2s/modules/diffusion.py @@ -223,7 +223,7 @@ class GaussianDiffusion(nn.Layer): num_inference_steps: Optional[int]=1000, strength: Optional[float]=None, scheduler_type: Optional[str]="ddpm", - clip_noise: Optional[bool]=True, + clip_noise: Optional[bool]=False, clip_noise_range: Optional[Tuple[float, float]]=(-1, 1), callback: Optional[Callable[[int, int, int, paddle.Tensor], None]]=None, @@ -302,6 +302,9 @@ class GaussianDiffusion(nn.Layer): noisy_input = scheduler.add_noise(ref_x, noise, timesteps[0]) denoised_output = noisy_input + if clip_noise: + n_min, n_max = clip_noise_range + denoised_output = paddle.clip(denoised_output, n_min, n_max) for i, t in enumerate(timesteps): denoised_output = scheduler.scale_model_input(denoised_output, t) noise_pred = self.denoiser(denoised_output, t, cond)