From c5375cc4cad86844fc3553799379af3980534d6b Mon Sep 17 00:00:00 2001 From: liangym Date: Wed, 22 Feb 2023 03:35:31 +0000 Subject: [PATCH] add male onnx, test=tts --- docs/source/released_model.md | 7 +- paddlespeech/cli/tts/infer.py | 43 ++- paddlespeech/resource/pretrained_models.py | 288 ++++++++++++++++++++- paddlespeech/t2s/exps/inference.py | 5 + paddlespeech/t2s/exps/ort_predict_e2e.py | 5 + paddlespeech/t2s/exps/synthesize_e2e.py | 19 +- 6 files changed, 347 insertions(+), 20 deletions(-) diff --git a/docs/source/released_model.md b/docs/source/released_model.md index 10a39e239..a63ea901f 100644 --- a/docs/source/released_model.md +++ b/docs/source/released_model.md @@ -61,7 +61,9 @@ FastSpeech2| AISHELL-3 |[fastspeech2-aishell3](https://github.com/PaddlePaddle/P FastSpeech2| LJSpeech |[fastspeech2-ljspeech](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/ljspeech/tts3)|[fastspeech2_nosil_ljspeech_ckpt_0.5.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_ljspeech_ckpt_0.5.zip)|[fastspeech2_ljspeech_static_1.1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_ljspeech_static_1.1.0.zip)
[fastspeech2_ljspeech_onnx_1.1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_ljspeech_onnx_1.1.0.zip)
[fastspeech2_ljspeech_pdlite_1.3.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_ljspeech_pdlite_1.3.0.zip)|145MB| FastSpeech2| VCTK |[fastspeech2-vctk](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/vctk/tts3)|[fastspeech2_vctk_ckpt_1.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_vctk_ckpt_1.2.0.zip)|[fastspeech2_vctk_static_1.1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_vctk_static_1.1.0.zip)
[fastspeech2_vctk_onnx_1.1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_vctk_onnx_1.1.0.zip)
[fastspeech2_vctk_pdlite_1.3.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_vctk_pdlite_1.3.0.zip)| 145MB| FastSpeech2| ZH_EN |[fastspeech2-zh_en](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/zh_en_tts/tts3)|[fastspeech2_mix_ckpt_1.2.0.zip](https://paddlespeech.bj.bcebos.com/t2s/chinse_english_mixed/models/fastspeech2_mix_ckpt_1.2.0.zip)|[fastspeech2_mix_static_0.2.0.zip](https://paddlespeech.bj.bcebos.com/t2s/chinse_english_mixed/models/fastspeech2_mix_static_0.2.0.zip)
[fastspeech2_mix_onnx_0.2.0.zip](https://paddlespeech.bj.bcebos.com/t2s/chinse_english_mixed/models/fastspeech2_mix_onnx_0.2.0.zip) | 145MB| -FastSpeech2| Male ||[fastspeech2_male_ckpt_1.3.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_ckpt_1.3.0.zip)| | | +FastSpeech2| male-zh ||[fastspeech2_male_zh_ckpt_1.4.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_zh_ckpt_1.4.0.zip)|[fastspeech2_male_zh_static_1.4.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_zh_static_1.4.0.zip)
[fastspeech2_male_zh_onnx_1.4.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_zh_onnx_1.4.0.zip) |146MB| +FastSpeech2| male-en ||[fastspeech2_male_en_ckpt_1.4.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_en_ckpt_1.4.0.zip)|[fastspeech2_male_en_static_1.4.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_en_static_1.4.0.zip)
[fastspeech2_male_en_onnx_1.4.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_en_onnx_1.4.0.zip) |145MB| +FastSpeech2| male-mix ||[fastspeech2_male_mix_ckpt_1.4.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_mix_ckpt_1.4.0.zip)|[fastspeech2_male_mix_static_1.4.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_mix_static_1.4.0.zip)
[fastspeech2_male_mix_onnx_1.4.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_mix_onnx_1.4.0.zip) |146MB| ### Vocoders Model Type | Dataset| Example Link | Pretrained Models| Static / ONNX / Paddle-Lite Models|Size (static) @@ -78,7 +80,8 @@ HiFiGAN | LJSpeech |[HiFiGAN-ljspeech](https://github.com/PaddlePaddle/PaddleSpe HiFiGAN | AISHELL-3 |[HiFiGAN-aishell3](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/aishell3/voc5)|[hifigan_aishell3_ckpt_0.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_aishell3_ckpt_0.2.0.zip)|[hifigan_aishell3_static_1.1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_aishell3_static_1.1.0.zip)
[hifigan_aishell3_onnx_1.1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_aishell3_onnx_1.1.0.zip)
[hifigan_aishell3_pdlite_1.3.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_aishell3_pdlite_1.3.0.zip)|46MB| HiFiGAN | VCTK |[HiFiGAN-vctk](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/vctk/voc5)|[hifigan_vctk_ckpt_0.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_vctk_ckpt_0.2.0.zip)|[hifigan_vctk_static_1.1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_vctk_static_1.1.0.zip)
[hifigan_vctk_onnx_1.1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_vctk_onnx_1.1.0.zip)
[hifigan_vctk_pdlite_1.3.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_vctk_pdlite_1.3.0.zip)|46MB| WaveRNN | CSMSC |[WaveRNN-csmsc](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/csmsc/voc6)|[wavernn_csmsc_ckpt_0.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/wavernn/wavernn_csmsc_ckpt_0.2.0.zip)|[wavernn_csmsc_static_0.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/wavernn/wavernn_csmsc_static_0.2.0.zip)|18MB| -Parallel WaveGAN| Male ||[pwg_male_ckpt_1.3.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwg_male_ckpt_1.3.0.zip)||| +Parallel WaveGAN| Male ||[pwg_male_ckpt_1.4.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwg_male_ckpt_1.4.0.zip)|[pwgan_male_static_1.4.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwgan_male_static_1.4.0.zip)
[pwgan_male_onnx_1.4.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwgan_male_onnx_1.4.0.zip)|4.8M| +HiFiGAN| Male ||[hifigan_male_ckpt_1.4.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_male_ckpt_1.4.0.zip)|[hifigan_male_static_1.4.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_male_static_1.4.0.zip)
[hifigan_male_onnx_1.4.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_male_onnx_1.4.0.zip)|46M| ### Voice Cloning diff --git a/paddlespeech/cli/tts/infer.py b/paddlespeech/cli/tts/infer.py index 5515ade26..e95c85744 100644 --- a/paddlespeech/cli/tts/infer.py +++ b/paddlespeech/cli/tts/infer.py @@ -39,10 +39,24 @@ from paddlespeech.t2s.utils import str2bool __all__ = ['TTSExecutor'] ONNX_SUPPORT_SET = { - 'speedyspeech_csmsc', 'fastspeech2_csmsc', 'fastspeech2_ljspeech', - 'fastspeech2_aishell3', 'fastspeech2_vctk', 'pwgan_csmsc', 'pwgan_ljspeech', - 'pwgan_aishell3', 'pwgan_vctk', 'mb_melgan_csmsc', 'hifigan_csmsc', - 'hifigan_ljspeech', 'hifigan_aishell3', 'hifigan_vctk' + 'speedyspeech_csmsc', + 'fastspeech2_csmsc', + 'fastspeech2_ljspeech', + 'fastspeech2_aishell3', + 'fastspeech2_vctk', + 'fastspeech2_male', + 'fastspeech2_mix', + 'pwgan_csmsc', + 'pwgan_ljspeech', + 'pwgan_aishell3', + 'pwgan_vctk', + 'pwgan_male', + 'mb_melgan_csmsc', + 'hifigan_csmsc', + 'hifigan_ljspeech', + 'hifigan_aishell3', + 'hifigan_vctk', + 'hifigan_male', } @@ -124,6 +138,7 @@ class TTSExecutor(BaseExecutor): 'hifigan_vctk', 'wavernn_csmsc', 'pwgan_male', + 'hifigan_male', ], help='Choose vocoder type of tts task.') @@ -259,7 +274,11 @@ class TTSExecutor(BaseExecutor): voc_lang = lang # When speaker is 174 (csmsc), use csmsc's vocoder is better than aishell3's if lang == 'mix': - voc_lang = 'zh' + voc_dataset = voc[voc.rindex('_') + 1:] + if voc_dataset in {"ljspeech", "vctk"}: + voc_lang = 'en' + else: + voc_lang = 'zh' voc_tag = voc + '-' + voc_lang self.task_resource.set_task_model( model_tag=voc_tag, @@ -388,9 +407,12 @@ class TTSExecutor(BaseExecutor): else: use_pretrained_voc = False voc_lang = lang - # we must use ljspeech's voc for mix am now! if lang == 'mix': - voc_lang = 'en' + voc_dataset = voc[voc.rindex('_') + 1:] + if voc_dataset in {"ljspeech", "vctk"}: + voc_lang = 'en' + else: + voc_lang = 'zh' voc_tag = voc + '_onnx' + '-' + voc_lang self.task_resource.set_task_model( model_tag=voc_tag, @@ -500,10 +522,11 @@ class TTSExecutor(BaseExecutor): text=text, merge_sentences=merge_sentences, get_tone_ids=get_tone_ids, - lang=lang, - to_tensor=False) + lang=lang, ) self.frontend_time = time.time() - frontend_st phone_ids = frontend_dict['phone_ids'] + # onnx need numpy data as input + phone_ids = [phone_id.numpy() for phone_id in phone_ids] self.am_time = 0 self.voc_time = 0 flags = 0 @@ -512,7 +535,7 @@ class TTSExecutor(BaseExecutor): part_phone_ids = phone_ids[i] if am_name == 'fastspeech2': am_input_feed.update({'text': part_phone_ids}) - if am_dataset in {"aishell3", "vctk"}: + if am_dataset in {"aishell3", "vctk", "mix"}: # NOTE: 'spk_id' should be List[int] rather than int here!! am_input_feed.update({'spk_id': [spk_id]}) elif am_name == 'speedyspeech': diff --git a/paddlespeech/resource/pretrained_models.py b/paddlespeech/resource/pretrained_models.py index ff0b30f6d..82c7776eb 100644 --- a/paddlespeech/resource/pretrained_models.py +++ b/paddlespeech/resource/pretrained_models.py @@ -994,9 +994,9 @@ tts_dynamic_pretrained_models = { "fastspeech2_male-zh": { '1.0': { 'url': - 'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_ckpt_1.3.0.zip', + 'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_zh_ckpt_1.4.0.zip', 'md5': - 'a4b1a2f667b878ec8f67375357b04282', + '43a9f4bc48a91f5a6f53017474e6c788', 'config': 'default.yaml', 'ckpt': @@ -1007,6 +1007,38 @@ tts_dynamic_pretrained_models = { 'phone_id_map.txt', }, }, + "fastspeech2_male-en": { + '1.0': { + 'url': + 'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_en_ckpt_1.4.0.zip', + 'md5': + 'cc9f44f1f20a8173f63e2d1d41ef1a9c', + 'config': + 'default.yaml', + 'ckpt': + 'snapshot_iter_100000.pdz', + 'speech_stats': + 'speech_stats.npy', + 'phones_dict': + 'phone_id_map.txt', + }, + }, + "fastspeech2_male-mix": { + '1.0': { + 'url': + 'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_mix_ckpt_1.4.0.zip', + 'md5': + '6d48ad60ef0ab2cee89a5d8cfd93dd86', + 'config': + 'default.yaml', + 'ckpt': + 'snapshot_iter_177000.pdz', + 'speech_stats': + 'speech_stats.npy', + 'phones_dict': + 'phone_id_map.txt', + }, + }, # tacotron2 "tacotron2_csmsc-zh": { '1.0': { @@ -1100,9 +1132,9 @@ tts_dynamic_pretrained_models = { "pwgan_male-zh": { '1.0': { 'url': - 'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwg_male_ckpt_1.3.0.zip', + 'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwg_male_ckpt_1.4.0.zip', 'md5': - 'c98cdb889c809973f8cc764437311132', + 'a443d6253bf9be377f27ae5972a03c65', 'config': 'default.yaml', 'ckpt': @@ -1198,6 +1230,20 @@ tts_dynamic_pretrained_models = { 'feats_stats.npy', }, }, + "hifigan_male-zh": { + '1.0': { + 'url': + 'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_male_ckpt_1.4.0.zip', + 'md5': + 'a709830596e102c2b83f8adc26d41d85', + 'config': + 'default.yaml', + 'ckpt': + 'snapshot_iter_630000.pdz', + 'speech_stats': + 'feats_stats.npy', + }, + }, # wavernn "wavernn_csmsc-zh": { '1.0': { @@ -1214,6 +1260,15 @@ tts_dynamic_pretrained_models = { }, }, } +tts_dynamic_pretrained_models[ + "fastspeech2_mix-zh"] = tts_dynamic_pretrained_models[ + "fastspeech2_mix-en"] = tts_dynamic_pretrained_models[ + "fastspeech2_mix-mix"] +tts_dynamic_pretrained_models["pwgan_male-en"] = tts_dynamic_pretrained_models[ + "pwgan_male-mix"] = tts_dynamic_pretrained_models["pwgan_male-zh"] +tts_dynamic_pretrained_models[ + "hifigan_male-en"] = tts_dynamic_pretrained_models[ + "hifigan_male-mix"] = tts_dynamic_pretrained_models["hifigan_male-zh"] tts_static_pretrained_models = { # speedyspeech @@ -1304,6 +1359,88 @@ tts_static_pretrained_models = { 24000, }, }, + "fastspeech2_mix-mix": { + '1.0': { + 'url': + 'https://paddlespeech.bj.bcebos.com/t2s/chinse_english_mixed/models/fastspeech2_csmscljspeech_add-zhen_static.zip', + 'md5': + 'b5001f66cccafdde07707e1b6269fa58', + 'model': + 'fastspeech2_mix.pdmodel', + 'params': + 'fastspeech2_mix.pdiparams', + 'phones_dict': + 'phone_id_map.txt', + 'speaker_dict': + 'speaker_id_map.txt', + 'sample_rate': + 24000, + }, + '2.0': { + 'url': + 'https://paddlespeech.bj.bcebos.com/t2s/chinse_english_mixed/models/fastspeech2_mix_static_0.2.0.zip', + 'md5': + 'c6dd138fab3ba261299c0b2efee51d5a', + 'model': + 'fastspeech2_mix.pdmodel', + 'params': + 'fastspeech2_mix.pdiparams', + 'phones_dict': + 'phone_id_map.txt', + 'speaker_dict': + 'speaker_id_map.txt', + 'sample_rate': + 24000, + }, + }, + "fastspeech2_male-zh": { + '1.0': { + 'url': + 'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_zh_static_1.4.0.zip', + 'md5': + '9b7218829e7fa01aa33dbb2c5f6ef20f', + 'model': + 'fastspeech2_male-zh.pdmodel', + 'params': + 'fastspeech2_male-zh.pdiparams', + 'phones_dict': + 'phone_id_map.txt', + 'sample_rate': + 24000, + }, + }, + "fastspeech2_male-en": { + '1.0': { + 'url': + 'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_en_static_1.4.0.zip', + 'md5': + '33cea19b6821b371d242969ffd8b6cbf', + 'model': + 'fastspeech2_male-en.pdmodel', + 'params': + 'fastspeech2_male-en.pdiparams', + 'phones_dict': + 'phone_id_map.txt', + 'sample_rate': + 24000, + }, + }, + "fastspeech2_male-mix": { + '1.0': { + 'url': + 'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_mix_static_1.4.0.zip', + 'md5': + '66585b04c0ced72f3cb82ee85b814d80', + 'model': + 'fastspeech2_male-mix.pdmodel', + 'params': + 'fastspeech2_male-mix.pdiparams', + 'phones_dict': + 'phone_id_map.txt', + 'sample_rate': + 24000, + }, + }, # pwgan "pwgan_csmsc-zh": { '1.0': { @@ -1361,6 +1498,20 @@ tts_static_pretrained_models = { 24000, }, }, + "pwgan_male-zh": { + '1.0': { + 'url': + 'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwgan_male_static_1.4.0.zip', + 'md5': + '52a480ad35694b96603e0a92e9fb3f95', + 'model': + 'pwgan_male.pdmodel', + 'params': + 'pwgan_male.pdiparams', + 'sample_rate': + 24000, + }, + }, # mb_melgan "mb_melgan_csmsc-zh": { '1.0': { @@ -1433,8 +1584,31 @@ tts_static_pretrained_models = { 24000, }, }, + "hifigan_male-zh": { + '1.0': { + 'url': + 'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_male_static_1.4.0.zip', + 'md5': + '9011fa2738b501e909d1a61054bed29b', + 'model': + 'hifigan_male.pdmodel', + 'params': + 'hifigan_male.pdiparams', + 'sample_rate': + 24000, + }, + }, } +tts_static_pretrained_models[ + "fastspeech2_mix-zh"] = tts_static_pretrained_models[ + "fastspeech2_mix-en"] = tts_static_pretrained_models[ + "fastspeech2_mix-mix"] +tts_static_pretrained_models["pwgan_male-en"] = tts_static_pretrained_models[ + "pwgan_male-mix"] = tts_static_pretrained_models["pwgan_male-zh"] +tts_static_pretrained_models["hifigan_male-en"] = tts_static_pretrained_models[ + "hifigan_male-mix"] = tts_static_pretrained_models["hifigan_male-zh"] + tts_onnx_pretrained_models = { # speedyspeech "speedyspeech_csmsc_onnx-zh": { @@ -1533,6 +1707,78 @@ tts_onnx_pretrained_models = { 24000, }, }, + "fastspeech2_mix_onnx-mix": { + '1.0': { + 'url': + 'https://paddlespeech.bj.bcebos.com/t2s/chinse_english_mixed/models/fastspeech2_csmscljspeech_add-zhen_onnx.zip', + 'md5': + '73052520202957920cf54700980933d0', + 'ckpt': + 'fastspeech2_mix.onnx', + 'phones_dict': + 'phone_id_map.txt', + 'speaker_dict': + 'speaker_id_map.txt', + 'sample_rate': + 24000, + }, + '2.0': { + 'url': + 'https://paddlespeech.bj.bcebos.com/t2s/chinse_english_mixed/models/fastspeech2_mix_onnx_0.2.0.zip', + 'md5': + '43b8ca5f85709c503777f808eb02a39e', + 'ckpt': + 'fastspeech2_mix.onnx', + 'phones_dict': + 'phone_id_map.txt', + 'speaker_dict': + 'speaker_id_map.txt', + 'sample_rate': + 24000, + }, + }, + "fastspeech2_male_onnx-zh": { + '1.0': { + 'url': + 'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_zh_onnx_1.4.0.zip', + 'md5': + '46c66f5ab86f4fcb493d899d9901c863', + 'ckpt': + 'fastspeech2_male-zh.onnx', + 'phones_dict': + 'phone_id_map.txt', + 'sample_rate': + 24000, + }, + }, + "fastspeech2_male_onnx-en": { + '1.0': { + 'url': + 'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_en_onnx_1.4.0.zip', + 'md5': + '401fb5cc31fdb25e22e901c9acba79c8', + 'ckpt': + 'fastspeech2_male-en.onnx', + 'phones_dict': + 'phone_id_map.txt', + 'sample_rate': + 24000, + }, + }, + "fastspeech2_male_onnx-mix": { + '1.0': { + 'url': + 'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_mix_onnx_1.4.0.zip', + 'md5': + '07e51c5991c529b78603034547e9d0fa', + 'ckpt': + 'fastspeech2_male-mix.onnx', + 'phones_dict': + 'phone_id_map.txt', + 'sample_rate': + 24000, + }, + }, # pwgan "pwgan_csmsc_onnx-zh": { '1.0': { @@ -1582,6 +1828,18 @@ tts_onnx_pretrained_models = { 24000, }, }, + "pwgan_male_onnx-zh": { + '1.0': { + 'url': + 'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwgan_male_onnx_1.4.0.zip', + 'md5': + '13163fd1326f555650dc7141d31767c3', + 'ckpt': + 'pwgan_male.onnx', + 'sample_rate': + 24000, + }, + }, # mb_melgan "mb_melgan_csmsc_onnx-zh": { '1.0': { @@ -1644,8 +1902,30 @@ tts_onnx_pretrained_models = { 24000, }, }, + "hifigan_male_onnx-zh": { + '1.0': { + 'url': + 'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_male_onnx_1.4.0.zip', + 'md5': + 'ec6b35417b1fe811d3b1641d4b527769', + 'ckpt': + 'hifigan_male.onnx', + 'sample_rate': + 24000, + }, + }, } +tts_onnx_pretrained_models[ + "fastspeech2_mix_onnx-zh"] = tts_onnx_pretrained_models[ + "fastspeech2_mix_onnx-en"] = tts_onnx_pretrained_models[ + "fastspeech2_mix_onnx-mix"] +tts_onnx_pretrained_models["pwgan_male_onnx-en"] = tts_onnx_pretrained_models[ + "pwgan_male_onnx-mix"] = tts_onnx_pretrained_models["pwgan_male_onnx-zh"] +tts_onnx_pretrained_models["hifigan_male_onnx-en"] = tts_onnx_pretrained_models[ + "hifigan_male_onnx-mix"] = tts_onnx_pretrained_models[ + "hifigan_male_onnx-zh"] + # --------------------------------- # ------------ Vector ------------- # --------------------------------- diff --git a/paddlespeech/t2s/exps/inference.py b/paddlespeech/t2s/exps/inference.py index 56dd7838a..d5c262243 100644 --- a/paddlespeech/t2s/exps/inference.py +++ b/paddlespeech/t2s/exps/inference.py @@ -42,6 +42,9 @@ def parse_args(): 'fastspeech2_vctk', 'tacotron2_csmsc', 'fastspeech2_mix', + 'fastspeech2_male-zh', + 'fastspeech2_male-en', + 'fastspeech2_male-mix', ], help='Choose acoustic model type of tts task.') parser.add_argument( @@ -71,6 +74,8 @@ def parse_args(): 'hifigan_ljspeech', 'hifigan_vctk', 'wavernn_csmsc', + 'pwgan_male', + 'hifigan_male', ], help='Choose vocoder type of tts task.') # other diff --git a/paddlespeech/t2s/exps/ort_predict_e2e.py b/paddlespeech/t2s/exps/ort_predict_e2e.py index 75284f7bb..91aa07e14 100644 --- a/paddlespeech/t2s/exps/ort_predict_e2e.py +++ b/paddlespeech/t2s/exps/ort_predict_e2e.py @@ -156,6 +156,9 @@ def parse_args(): 'fastspeech2_vctk', 'speedyspeech_csmsc', 'fastspeech2_mix', + 'fastspeech2_male-zh', + 'fastspeech2_male-en', + 'fastspeech2_male-mix', ], help='Choose acoustic model type of tts task.') parser.add_argument( @@ -183,6 +186,8 @@ def parse_args(): 'hifigan_ljspeech', 'hifigan_vctk', 'mb_melgan_csmsc', + 'pwgan_male', + 'hifigan_male', ], help='Choose vocoder type of tts task.') # other diff --git a/paddlespeech/t2s/exps/synthesize_e2e.py b/paddlespeech/t2s/exps/synthesize_e2e.py index 3b87d9e16..db94a6e53 100644 --- a/paddlespeech/t2s/exps/synthesize_e2e.py +++ b/paddlespeech/t2s/exps/synthesize_e2e.py @@ -165,10 +165,19 @@ def parse_args(): type=str, default='fastspeech2_csmsc', choices=[ - 'speedyspeech_csmsc', 'speedyspeech_aishell3', 'fastspeech2_csmsc', - 'fastspeech2_ljspeech', 'fastspeech2_aishell3', 'fastspeech2_vctk', - 'tacotron2_csmsc', 'tacotron2_ljspeech', 'fastspeech2_mix', - 'fastspeech2_canton' + 'speedyspeech_csmsc', + 'speedyspeech_aishell3', + 'fastspeech2_csmsc', + 'fastspeech2_ljspeech', + 'fastspeech2_aishell3', + 'fastspeech2_vctk', + 'tacotron2_csmsc', + 'tacotron2_ljspeech', + 'fastspeech2_mix', + 'fastspeech2_canton', + 'fastspeech2_male-zh', + 'fastspeech2_male-en', + 'fastspeech2_male-mix', ], help='Choose acoustic model type of tts task.') parser.add_argument( @@ -212,6 +221,8 @@ def parse_args(): 'hifigan_aishell3', 'hifigan_vctk', 'wavernn_csmsc', + 'pwgan_male', + 'hifigan_male', ], help='Choose vocoder type of tts task.') parser.add_argument(