From 5d515f3f3f567497533483ad03dc71c8f9ee9d0a Mon Sep 17 00:00:00 2001 From: TianYuan Date: Mon, 15 Aug 2022 06:41:53 +0000 Subject: [PATCH] update mix tts --- paddlespeech/cli/tts/infer.py | 4 ++-- paddlespeech/resource/pretrained_models.py | 16 ++++++++++++++++ tests/unit/cli/test_cli.sh | 10 +++++++--- 3 files changed, 25 insertions(+), 5 deletions(-) diff --git a/paddlespeech/cli/tts/infer.py b/paddlespeech/cli/tts/infer.py index 1b02192e..3eb59715 100644 --- a/paddlespeech/cli/tts/infer.py +++ b/paddlespeech/cli/tts/infer.py @@ -255,9 +255,9 @@ class TTSExecutor(BaseExecutor): else: use_pretrained_voc = False voc_lang = lang - # we must use ljspeech's voc for mix am now! + # When speaker is 174 (csmsc), use csmsc's vocoder is better than aishell3's if lang == 'mix': - voc_lang = 'en' + voc_lang = 'zh' voc_tag = voc + '-' + voc_lang self.task_resource.set_task_model( model_tag=voc_tag, diff --git a/paddlespeech/resource/pretrained_models.py b/paddlespeech/resource/pretrained_models.py index e39f7721..9d9be0ac 100644 --- a/paddlespeech/resource/pretrained_models.py +++ b/paddlespeech/resource/pretrained_models.py @@ -672,6 +672,22 @@ tts_dynamic_pretrained_models = { 'speaker_dict': 'speaker_id_map.txt', }, + '2.0': { + 'url': + 'https://paddlespeech.bj.bcebos.com/t2s/chinse_english_mixed/models/fastspeech2_mix_ckpt_0.2.0.zip', + 'md5': + '1d938e104e972386c8bfcbcc98a91587', + 'config': + 'default.yaml', + 'ckpt': + 'snapshot_iter_99200.pdz', + 'speech_stats': + 'speech_stats.npy', + 'phones_dict': + 'phone_id_map.txt', + 'speaker_dict': + 'speaker_id_map.txt', + }, }, # tacotron2 "tacotron2_csmsc-zh": { diff --git a/tests/unit/cli/test_cli.sh b/tests/unit/cli/test_cli.sh index 4d2ed1b8..15604961 100755 --- a/tests/unit/cli/test_cli.sh +++ b/tests/unit/cli/test_cli.sh @@ -56,9 +56,13 @@ paddlespeech tts --am tacotron2_ljspeech --voc pwgan_ljspeech --lang en --input # mix tts # The `am` must be `fastspeech2_mix`! # The `lang` must be `mix`! -# The voc must be `hifigan_ljspeech` or `pwgan_ljspeech` for f`astspeech2_mix` now! -paddlespeech tts --am fastspeech2_mix --voc hifigan_ljspeech --lang mix --input "热烈欢迎您在 Discussions 中提交问题,并在 Issues 中指出发现的 bug。此外,我们非常希望您参与到 Paddle Speech 的开发中!" --spk_id 0 --output mix_spk0.wav -paddlespeech tts --am fastspeech2_mix --voc pwgan_ljspeech --lang mix --input "我们的声学模型使用了 Fast Speech Two, 声码器使用了 Parallel Wave GAN and Hifi GAN." --spk_id 1 --output mix_spk1.wav +# The voc must be chinese datasets' voc now! +# spk 174 is csmcc, spk 175 is ljspeech +paddlespeech tts --am fastspeech2_mix --voc hifigan_csmsc --lang mix --input "热烈欢迎您在 Discussions 中提交问题,并在 Issues 中指出发现的 bug。此外,我们非常希望您参与到 Paddle Speech 的开发中!" --spk_id 174 --output mix_spk174.wav +paddlespeech tts --am fastspeech2_mix --voc hifigan_aishell3 --lang mix --input "热烈欢迎您在 Discussions 中提交问题,并在 Issues 中指出发现的 bug。此外,我们非常希望您参与到 Paddle Speech 的开发中!" --spk_id 174 --output mix_spk174_aishell3.wav +paddlespeech tts --am fastspeech2_mix --voc pwgan_csmsc --lang mix --input "我们的声学模型使用了 Fast Speech Two, 声码器使用了 Parallel Wave GAN and Hifi GAN." --spk_id 175 --output mix_spk175_pwgan.wav +paddlespeech tts --am fastspeech2_mix --voc hifigan_csmsc --lang mix --input "我们的声学模型使用了 Fast Speech Two, 声码器使用了 Parallel Wave GAN and Hifi GAN." --spk_id 175 --output mix_spk175.wav + # Speech Translation (only support linux) paddlespeech st --input ./en.wav