diff --git a/README.md b/README.md index 0a8566940..3d83c1a16 100644 --- a/README.md +++ b/README.md @@ -157,6 +157,7 @@ Via the easy-to-use, efficient, flexible and scalable implementation, our vision - 🧩 *Cascaded models application*: as an extension of the typical traditional audio tasks, we combine the workflows of the aforementioned tasks with other fields like Natural language processing (NLP) and Computer Vision (CV). ### Recent Update +- 🎉 2022.11.17: [Add male voice for TTS](https://github.com/PaddlePaddle/PaddleSpeech/pull/2660) - 🔥 2022.11.07: [U2/U2++ C++ High Performance Streaming Asr Deployment](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/speechx/examples/u2pp_ol/wenetspeech). - 👑 2022.11.01: Add [Adversarial Loss](https://arxiv.org/pdf/1907.04448.pdf) for [Chinese English mixed TTS](./examples/zh_en_tts/tts3). - 🔥 2022.10.26: Add [Prosody Prediction](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/rhy) for TTS. diff --git a/docs/source/released_model.md b/docs/source/released_model.md index 79e8f4f46..ee68f934f 100644 --- a/docs/source/released_model.md +++ b/docs/source/released_model.md @@ -53,6 +53,8 @@ FastSpeech2| AISHELL-3 |[fastspeech2-aishell3](https://github.com/PaddlePaddle/P FastSpeech2| LJSpeech |[fastspeech2-ljspeech](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/ljspeech/tts3)|[fastspeech2_nosil_ljspeech_ckpt_0.5.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_ljspeech_ckpt_0.5.zip)|[fastspeech2_ljspeech_static_1.1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_ljspeech_static_1.1.0.zip)
[fastspeech2_ljspeech_onnx_1.1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_ljspeech_onnx_1.1.0.zip)|145MB| FastSpeech2| VCTK |[fastspeech2-vctk](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/vctk/tts3)|[fastspeech2_vctk_ckpt_1.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_vctk_ckpt_1.2.0.zip)|[fastspeech2_vctk_static_1.1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_vctk_static_1.1.0.zip)
[fastspeech2_vctk_onnx_1.1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_vctk_onnx_1.1.0.zip) | 145MB| FastSpeech2| ZH_EN |[fastspeech2-zh_en](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/zh_en_tts/tts3)|[fastspeech2_mix_ckpt_1.2.0.zip](https://paddlespeech.bj.bcebos.com/t2s/chinse_english_mixed/models/fastspeech2_mix_ckpt_1.2.0.zip)|[fastspeech2_mix_static_0.2.0.zip](https://paddlespeech.bj.bcebos.com/t2s/chinse_english_mixed/models/fastspeech2_mix_static_0.2.0.zip)
[fastspeech2_mix_onnx_0.2.0.zip](https://paddlespeech.bj.bcebos.com/t2s/chinse_english_mixed/models/fastspeech2_mix_onnx_0.2.0.zip) | 145MB| +FastSpeech2| Male ||[fastspeech2_male_ckpt_1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_ckpt_1.0.zip)| | | + ### Vocoders @@ -70,6 +72,7 @@ HiFiGAN | LJSpeech |[HiFiGAN-ljspeech](https://github.com/PaddlePaddle/PaddleSpe HiFiGAN | AISHELL-3 |[HiFiGAN-aishell3](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/aishell3/voc5)|[hifigan_aishell3_ckpt_0.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_aishell3_ckpt_0.2.0.zip)|[hifigan_aishell3_static_1.1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_aishell3_static_1.1.0.zip)
[hifigan_aishell3_onnx_1.1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_aishell3_onnx_1.1.0.zip)|46MB| HiFiGAN | VCTK |[HiFiGAN-vctk](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/vctk/voc5)|[hifigan_vctk_ckpt_0.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_vctk_ckpt_0.2.0.zip)|[hifigan_vctk_static_1.1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_vctk_static_1.1.0.zip)
[hifigan_vctk_onnx_1.1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_vctk_onnx_1.1.0.zip)|46MB| WaveRNN | CSMSC |[WaveRNN-csmsc](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/csmsc/voc6)|[wavernn_csmsc_ckpt_0.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/wavernn/wavernn_csmsc_ckpt_0.2.0.zip)|[wavernn_csmsc_static_0.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/wavernn/wavernn_csmsc_static_0.2.0.zip)|18MB| +Parallel WaveGAN| Male ||[pwg_male_ckpt_1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwg_male_ckpt_1.0.zip)||| ### Voice Cloning diff --git a/paddlespeech/cli/tts/infer.py b/paddlespeech/cli/tts/infer.py index 3eb597156..707518c05 100644 --- a/paddlespeech/cli/tts/infer.py +++ b/paddlespeech/cli/tts/infer.py @@ -67,6 +67,7 @@ class TTSExecutor(BaseExecutor): 'fastspeech2_mix', 'tacotron2_csmsc', 'tacotron2_ljspeech', + 'fastspeech2_male', ], help='Choose acoustic model type of tts task.') self.parser.add_argument( @@ -122,6 +123,7 @@ class TTSExecutor(BaseExecutor): 'hifigan_aishell3', 'hifigan_vctk', 'wavernn_csmsc', + 'pwgan_male', ], help='Choose vocoder type of tts task.') diff --git a/paddlespeech/resource/pretrained_models.py b/paddlespeech/resource/pretrained_models.py index df50a6a9d..777aec913 100644 --- a/paddlespeech/resource/pretrained_models.py +++ b/paddlespeech/resource/pretrained_models.py @@ -723,6 +723,22 @@ tts_dynamic_pretrained_models = { 'speaker_id_map.txt', }, }, + "fastspeech2_male-zh": { + '1.0': { + 'url': + 'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_ckpt_1.0.zip', + 'md5': + 'e7a42e9f126deb164ee902e4d14a64d3', + 'config': + 'default.yaml', + 'ckpt': + 'snapshot_iter_76000.pdz', + 'speech_stats': + 'speech_stats.npy', + 'phones_dict': + 'phone_id_map.txt', + }, + }, # tacotron2 "tacotron2_csmsc-zh": { '1.0': { @@ -813,6 +829,20 @@ tts_dynamic_pretrained_models = { 'feats_stats.npy', }, }, + "pwgan_male-zh": { + '1.0': { + 'url': + 'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwg_male_ckpt_1.0.zip', + 'md5': + '26ae7eb8c52741a90e863f0443097f80', + 'config': + 'default.yaml', + 'ckpt': + 'snapshot_iter_200000.pdz', + 'speech_stats': + 'feats_stats.npy', + }, + }, # mb_melgan "mb_melgan_csmsc-zh": { '1.0': { diff --git a/tests/unit/cli/test_cli.sh b/tests/unit/cli/test_cli.sh index d571aa78f..cb24fa614 100755 --- a/tests/unit/cli/test_cli.sh +++ b/tests/unit/cli/test_cli.sh @@ -54,6 +54,7 @@ paddlespeech tts --am fastspeech2_vctk --voc hifigan_vctk --input "Life was like paddlespeech tts --am tacotron2_csmsc --input "你好,欢迎使用百度飞桨深度学习框架!" paddlespeech tts --am tacotron2_csmsc --voc wavernn_csmsc --input "你好,欢迎使用百度飞桨深度学习框架!" paddlespeech tts --am tacotron2_ljspeech --voc pwgan_ljspeech --lang en --input "Life was like a box of chocolates, you never know what you're gonna get." +paddlespeech tts --am fastspeech2_male --voc pwgan_male --input "你好,欢迎使用百度飞桨深度学习框架!" # mix tts # The `am` must be `fastspeech2_mix`! # The `lang` must be `mix`!