From 4c30cd6eb180998c4d01ac60b23ea3d2a6729fd7 Mon Sep 17 00:00:00 2001 From: 0x45f Date: Thu, 23 Feb 2023 11:47:16 +0000 Subject: [PATCH] Support dy2st for VITS --- paddlespeech/t2s/exps/vits/synthesize_e2e.py | 4 ++-- paddlespeech/t2s/models/vits/transform.py | 19 +++++++++++-------- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/paddlespeech/t2s/exps/vits/synthesize_e2e.py b/paddlespeech/t2s/exps/vits/synthesize_e2e.py index f9c8b3f71..eb3cad034 100644 --- a/paddlespeech/t2s/exps/vits/synthesize_e2e.py +++ b/paddlespeech/t2s/exps/vits/synthesize_e2e.py @@ -64,7 +64,6 @@ def evaluate(args): vits = VITS(idim=vocab_size, odim=odim, **config["model"]) vits.set_state_dict(paddle.load(args.ckpt)["main_params"]) vits.eval() - VITSInference vits_inference = VITSInference(vits) # whether dygraph to static @@ -108,7 +107,8 @@ def evaluate(args): spk_id = None if spk_num is not None: spk_id = paddle.to_tensor(args.spk_id) - wav = vits_inference(text=part_phone_ids, sids=spk_id) + # wav = vits_inference(text=part_phone_ids, sids=spk_id) + wav = vits_inference(part_phone_ids) if flags == 0: wav_all = wav flags = 1 diff --git a/paddlespeech/t2s/models/vits/transform.py b/paddlespeech/t2s/models/vits/transform.py index 8c6227a6a..360453cca 100644 --- a/paddlespeech/t2s/models/vits/transform.py +++ b/paddlespeech/t2s/models/vits/transform.py @@ -82,8 +82,10 @@ def unconstrained_rational_quadratic_spline( inside_interval_mask = (inputs >= -tail_bound) & (inputs <= tail_bound) outside_interval_mask = ~inside_interval_mask - outputs = paddle.zeros(paddle.shape(inputs)) - logabsdet = paddle.zeros(paddle.shape(inputs)) + # outputs = paddle.zeros(paddle.shape(inputs)) + # logabsdet = paddle.zeros(paddle.shape(inputs)) + outputs = paddle.zeros(inputs.shape) + logabsdet = paddle.zeros(inputs.shape) if tails == "linear": unnormalized_derivatives = F.pad( unnormalized_derivatives, @@ -140,15 +142,16 @@ def rational_quadratic_spline( min_bin_width=1e-3, min_bin_height=1e-3, min_derivative=1e-3, ): - if paddle.min(inputs) < left or paddle.max(inputs) > right: - raise ValueError("Input to a transform is not within its domain") + + # if paddle.min(inputs) < left or paddle.max(inputs) > right: + # raise ValueError("Input to a transform is not within its domain") num_bins = unnormalized_widths.shape[-1] - if min_bin_width * num_bins > 1.0: - raise ValueError("Minimal bin width too large for the number of bins") - if min_bin_height * num_bins > 1.0: - raise ValueError("Minimal bin height too large for the number of bins") + # if min_bin_width * num_bins > 1.0: + # raise ValueError("Minimal bin width too large for the number of bins") + # if min_bin_height * num_bins > 1.0: + # raise ValueError("Minimal bin height too large for the number of bins") widths = F.softmax(unnormalized_widths, axis=-1) widths = min_bin_width + (1 - min_bin_width * num_bins) * widths