From bc5ae43d3a5ecdb3fbcea92d18a054ddc260816a Mon Sep 17 00:00:00 2001 From: TianYuan Date: Mon, 28 Mar 2022 04:49:10 +0000 Subject: [PATCH 1/2] restructure expand in length_regulator.py for paddle2onnx, test=tts --- examples/csmsc/tts3/README.md | 4 +++- .../t2s/modules/predictor/length_regulator.py | 24 ++++++++++++------- 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/examples/csmsc/tts3/README.md b/examples/csmsc/tts3/README.md index 7b803526f..a31203f8a 100644 --- a/examples/csmsc/tts3/README.md +++ b/examples/csmsc/tts3/README.md @@ -227,7 +227,9 @@ Pretrained FastSpeech2 model with no silence in the edge of audios: - [fastspeech2_nosil_baker_ckpt_0.4.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_baker_ckpt_0.4.zip) - [fastspeech2_conformer_baker_ckpt_0.5.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_conformer_baker_ckpt_0.5.zip) -The static model can be downloaded here [fastspeech2_nosil_baker_static_0.4.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_baker_static_0.4.zip). +The static model can be downloaded here: +[fastspeech2_nosil_baker_static_0.4.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_baker_static_0.4.zip). +[fastspeech2_csmsc_static_0.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_csmsc_static_0.2.0.zip) Model | Step | eval/loss | eval/l1_loss | eval/duration_loss | eval/pitch_loss| eval/energy_loss :-------------:| :------------:| :-----: | :-----: | :--------: |:--------:|:---------: diff --git a/paddlespeech/t2s/modules/predictor/length_regulator.py b/paddlespeech/t2s/modules/predictor/length_regulator.py index 2472c413b..be788e6ed 100644 --- a/paddlespeech/t2s/modules/predictor/length_regulator.py +++ b/paddlespeech/t2s/modules/predictor/length_regulator.py @@ -73,15 +73,21 @@ class LengthRegulator(nn.Layer): batch_size, t_enc = paddle.shape(durations) slens = paddle.sum(durations, -1) t_dec = paddle.max(slens) - M = paddle.zeros([batch_size, t_dec, t_enc]) - for i in range(batch_size): - k = 0 - for j in range(t_enc): - d = durations[i, j] - # If the d == 0, slice action is meaningless and not supported in paddle - if d >= 1: - M[i, k:k + d, j] = 1 - k += d + t_dec_1 = t_dec + 1 + flatten_duration = paddle.cumsum( + paddle.reshape(durations, [batch_size * t_enc])) + 1 + init = paddle.zeros(t_dec_1) + m_batch = batch_size * t_enc + M = paddle.zeros([t_dec_1, m_batch]) + for i in range(m_batch): + d = flatten_duration[i] + m = paddle.concat( + [paddle.ones(d), paddle.zeros(t_dec_1 - d)], axis=0) + M[:, i] = m - init + init = m + M = paddle.reshape(M, shape=[t_dec_1, batch_size, t_enc]) + M = M[1:, :, :] + M = paddle.transpose(M, (1, 0, 2)) encodings = paddle.matmul(M, encodings) return encodings From e52fc08c586e1da29bb5ccf1ebaf6b639e21412d Mon Sep 17 00:00:00 2001 From: TianYuan Date: Mon, 28 Mar 2022 05:53:31 +0000 Subject: [PATCH 2/2] update readme, test=doc --- examples/csmsc/tts3/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/csmsc/tts3/README.md b/examples/csmsc/tts3/README.md index a31203f8a..08bf2ee91 100644 --- a/examples/csmsc/tts3/README.md +++ b/examples/csmsc/tts3/README.md @@ -228,8 +228,8 @@ Pretrained FastSpeech2 model with no silence in the edge of audios: - [fastspeech2_conformer_baker_ckpt_0.5.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_conformer_baker_ckpt_0.5.zip) The static model can be downloaded here: -[fastspeech2_nosil_baker_static_0.4.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_baker_static_0.4.zip). -[fastspeech2_csmsc_static_0.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_csmsc_static_0.2.0.zip) +- [fastspeech2_nosil_baker_static_0.4.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_baker_static_0.4.zip) +- [fastspeech2_csmsc_static_0.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_csmsc_static_0.2.0.zip) Model | Step | eval/loss | eval/l1_loss | eval/duration_loss | eval/pitch_loss| eval/energy_loss :-------------:| :------------:| :-----: | :-----: | :--------: |:--------:|:---------: