Merge pull request #1197 from jerryuhoo/develop

Add speaker embedding and speaker id for style fastspeech2 inference
pull/1202/head
TianYuan 3 years ago committed by GitHub
commit de8e09fd97
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -907,7 +907,9 @@ class StyleFastSpeech2Inference(FastSpeech2Inference):
energy: Union[paddle.Tensor, np.ndarray]=None,
energy_scale: Union[int, float]=None,
energy_bias: Union[int, float]=None,
robot: bool=False):
robot: bool=False,
spk_emb=None,
spk_id=None):
"""
Parameters
----------
@ -938,8 +940,9 @@ class StyleFastSpeech2Inference(FastSpeech2Inference):
Tensor
Output sequence of features (L, odim).
"""
spk_id = paddle.to_tensor(spk_id)
normalized_mel, d_outs, p_outs, e_outs = self.acoustic_model.inference(
text, durations=None, pitch=None, energy=None)
text, durations=None, pitch=None, energy=None, spk_emb=spk_emb, spk_id=spk_id)
# priority: groundtruth > scale/bias > previous output
# set durations
if isinstance(durations, np.ndarray):
@ -991,7 +994,10 @@ class StyleFastSpeech2Inference(FastSpeech2Inference):
durations=durations,
pitch=pitch,
energy=energy,
use_teacher_forcing=True)
use_teacher_forcing=True,
spk_emb=spk_emb,
spk_id=spk_id
)
logmel = self.normalizer.inverse(normalized_mel)
return logmel

Loading…
Cancel
Save