diff --git a/paddlespeech/t2s/modules/diffusion.py b/paddlespeech/t2s/modules/diffusion.py index 52fe84ceb..eb67ffb0d 100644 --- a/paddlespeech/t2s/modules/diffusion.py +++ b/paddlespeech/t2s/modules/diffusion.py @@ -40,7 +40,7 @@ class WaveNetDenoiser(nn.Layer): layers (int, optional): Number of residual blocks inside, by default 20 stacks (int, optional): - The number of groups to split the residual blocks into, by default 4 + The number of groups to split the residual blocks into, by default 5 Within each group, the dilation of the residual block grows exponentially. residual_channels (int, optional): Residual channel of the residual blocks, by default 256 @@ -64,7 +64,7 @@ class WaveNetDenoiser(nn.Layer): out_channels: int=80, kernel_size: int=3, layers: int=20, - stacks: int=4, + stacks: int=5, residual_channels: int=256, gate_channels: int=512, skip_channels: int=256, @@ -72,7 +72,7 @@ class WaveNetDenoiser(nn.Layer): dropout: float=0., bias: bool=True, use_weight_norm: bool=False, - init_type: str="kaiming_uniform", ): + init_type: str="kaiming_normal", ): super().__init__() # initialize parameters @@ -118,18 +118,15 @@ class WaveNetDenoiser(nn.Layer): bias=bias) self.conv_layers.append(conv) + final_conv = nn.Conv1D(skip_channels, out_channels, 1, bias_attr=True) + nn.initializer.Constant(0.0)(final_conv.weight) self.last_conv_layers = nn.Sequential(nn.ReLU(), nn.Conv1D( skip_channels, skip_channels, 1, bias_attr=True), - nn.ReLU(), - nn.Conv1D( - skip_channels, - out_channels, - 1, - bias_attr=True)) + nn.ReLU(), final_conv) if use_weight_norm: self.apply_weight_norm() @@ -200,10 +197,6 @@ class GaussianDiffusion(nn.Layer): Args: denoiser (Layer, optional): The model used for denoising noises. - In fact, the denoiser model performs the operation - of producing a output with more noises from the noisy input. - Then we use the diffusion algorithm to calculate - the input with the output to get the denoised result. num_train_timesteps (int, optional): The number of timesteps between the noise and the real during training, by default 1000. beta_start (float, optional): @@ -233,7 +226,8 @@ class GaussianDiffusion(nn.Layer): >>> def callback(index, timestep, num_timesteps, sample): >>> nonlocal pbar >>> if pbar is None: - >>> pbar = tqdm(total=num_timesteps-index) + >>> pbar = tqdm(total=num_timesteps) + >>> pbar.update(index) >>> pbar.update() >>> >>> return callback @@ -247,7 +241,7 @@ class GaussianDiffusion(nn.Layer): >>> diffusion = GaussianDiffusion(denoiser, num_train_timesteps=ds, num_max_timesteps=K_step) >>> with paddle.no_grad(): >>> sample = diffusion.inference( - >>> paddle.randn(x.shape), c, x, + >>> paddle.randn(x.shape), c, ref_x=x_in, >>> num_inference_steps=infer_steps, >>> scheduler_type=scheduler_type, >>> callback=create_progress_callback()) @@ -262,7 +256,7 @@ class GaussianDiffusion(nn.Layer): >>> diffusion = GaussianDiffusion(denoiser, num_train_timesteps=ds, num_max_timesteps=K_step) >>> with paddle.no_grad(): >>> sample = diffusion.inference( - >>> paddle.randn(x.shape), c, x_in, + >>> paddle.randn(x.shape), c, ref_x=x_in, >>> num_inference_steps=infer_steps, >>> scheduler_type=scheduler_type, >>> callback=create_progress_callback()) @@ -277,11 +271,11 @@ class GaussianDiffusion(nn.Layer): >>> diffusion = GaussianDiffusion(denoiser, num_train_timesteps=ds, num_max_timesteps=K_step) >>> with paddle.no_grad(): >>> sample = diffusion.inference( - >>> paddle.randn(x.shape), c, None, + >>> paddle.randn(x.shape), c, ref_x=x_in, >>> num_inference_steps=infer_steps, >>> scheduler_type=scheduler_type, >>> callback=create_progress_callback()) - 100%|█████| 25/25 [00:01<00:00, 19.75it/s] + 100%|█████| 34/34 [00:01<00:00, 19.75it/s] >>> >>> # ds=1000, K_step=100, scheduler=pndm, infer_step=50, from aux fs2 mel output >>> ds = 1000 @@ -292,11 +286,11 @@ class GaussianDiffusion(nn.Layer): >>> diffusion = GaussianDiffusion(denoiser, num_train_timesteps=ds, num_max_timesteps=K_step) >>> with paddle.no_grad(): >>> sample = diffusion.inference( - >>> paddle.randn(x.shape), c, x, + >>> paddle.randn(x.shape), c, ref_x=x_in, >>> num_inference_steps=infer_steps, >>> scheduler_type=scheduler_type, >>> callback=create_progress_callback()) - 100%|█████| 5/5 [00:00<00:00, 23.80it/s] + 100%|█████| 14/14 [00:00<00:00, 23.80it/s] """