From c116a3a92644a6fcbf0e2346d0077bb7c3b3c50c Mon Sep 17 00:00:00 2001 From: Jerryuhoo Date: Wed, 2 Mar 2022 09:41:18 +0800 Subject: [PATCH] fix Speedyspeech multi-speaker inference, test=tts --- paddlespeech/t2s/exps/synthesize_e2e.py | 8 ++++---- paddlespeech/t2s/models/speedyspeech/speedyspeech.py | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/paddlespeech/t2s/exps/synthesize_e2e.py b/paddlespeech/t2s/exps/synthesize_e2e.py index 75c631b8..514d4822 100644 --- a/paddlespeech/t2s/exps/synthesize_e2e.py +++ b/paddlespeech/t2s/exps/synthesize_e2e.py @@ -194,10 +194,10 @@ def evaluate(args): am_inference = jit.to_static( am_inference, input_spec=[ - InputSpec([-1], dtype=paddle.int64), # text - InputSpec([-1], dtype=paddle.int64), # tone - None, # duration - InputSpec([-1], dtype=paddle.int64) # spk_id + InputSpec([-1], dtype=paddle.int64), # text + InputSpec([-1], dtype=paddle.int64), # tone + InputSpec([1], dtype=paddle.int64), # spk_id + None # duration ]) else: am_inference = jit.to_static( diff --git a/paddlespeech/t2s/models/speedyspeech/speedyspeech.py b/paddlespeech/t2s/models/speedyspeech/speedyspeech.py index 42e8f743..44ccfc60 100644 --- a/paddlespeech/t2s/models/speedyspeech/speedyspeech.py +++ b/paddlespeech/t2s/models/speedyspeech/speedyspeech.py @@ -247,7 +247,7 @@ class SpeedySpeechInference(nn.Layer): self.normalizer = normalizer self.acoustic_model = speedyspeech_model - def forward(self, phones, tones, durations=None, spk_id=None): + def forward(self, phones, tones, spk_id=None, durations=None): normalized_mel = self.acoustic_model.inference( phones, tones, durations=durations, spk_id=spk_id) logmel = self.normalizer.inverse(normalized_mel)