restructure expand in length_regulator.py for paddle2onnx, test=tts

pull/1613/head
TianYuan 3 years ago
parent e6e72b445a
commit bc5ae43d3a

@ -227,7 +227,9 @@ Pretrained FastSpeech2 model with no silence in the edge of audios:
- [fastspeech2_nosil_baker_ckpt_0.4.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_baker_ckpt_0.4.zip)
- [fastspeech2_conformer_baker_ckpt_0.5.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_conformer_baker_ckpt_0.5.zip)
The static model can be downloaded here [fastspeech2_nosil_baker_static_0.4.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_baker_static_0.4.zip).
The static model can be downloaded here:
[fastspeech2_nosil_baker_static_0.4.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_baker_static_0.4.zip).
[fastspeech2_csmsc_static_0.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_csmsc_static_0.2.0.zip)
Model | Step | eval/loss | eval/l1_loss | eval/duration_loss | eval/pitch_loss| eval/energy_loss
:-------------:| :------------:| :-----: | :-----: | :--------: |:--------:|:---------:

@ -73,15 +73,21 @@ class LengthRegulator(nn.Layer):
batch_size, t_enc = paddle.shape(durations)
slens = paddle.sum(durations, -1)
t_dec = paddle.max(slens)
M = paddle.zeros([batch_size, t_dec, t_enc])
for i in range(batch_size):
k = 0
for j in range(t_enc):
d = durations[i, j]
# If the d == 0, slice action is meaningless and not supported in paddle
if d >= 1:
M[i, k:k + d, j] = 1
k += d
t_dec_1 = t_dec + 1
flatten_duration = paddle.cumsum(
paddle.reshape(durations, [batch_size * t_enc])) + 1
init = paddle.zeros(t_dec_1)
m_batch = batch_size * t_enc
M = paddle.zeros([t_dec_1, m_batch])
for i in range(m_batch):
d = flatten_duration[i]
m = paddle.concat(
[paddle.ones(d), paddle.zeros(t_dec_1 - d)], axis=0)
M[:, i] = m - init
init = m
M = paddle.reshape(M, shape=[t_dec_1, batch_size, t_enc])
M = M[1:, :, :]
M = paddle.transpose(M, (1, 0, 2))
encodings = paddle.matmul(M, encodings)
return encodings

Loading…
Cancel
Save