From c5375cc4cad86844fc3553799379af3980534d6b Mon Sep 17 00:00:00 2001
From: liangym <lym0302@foxmail.com>
Date: Wed, 22 Feb 2023 03:35:31 +0000
Subject: [PATCH] add male onnx, test=tts

---
 docs/source/released_model.md              |   7 +-
 paddlespeech/cli/tts/infer.py              |  43 ++-
 paddlespeech/resource/pretrained_models.py | 288 ++++++++++++++++++++-
 paddlespeech/t2s/exps/inference.py         |   5 +
 paddlespeech/t2s/exps/ort_predict_e2e.py   |   5 +
 paddlespeech/t2s/exps/synthesize_e2e.py    |  19 +-
 6 files changed, 347 insertions(+), 20 deletions(-)
diff --git a/docs/source/released_model.md b/docs/source/released_model.md
index 10a39e239..a63ea901f 100644
--- a/docs/source/released_model.md
+++ b/docs/source/released_model.md
@@ -61,7 +61,9 @@ FastSpeech2| AISHELL-3 |[fastspeech2-aishell3](https://github.com/PaddlePaddle/P
 FastSpeech2| LJSpeech |[fastspeech2-ljspeech](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/ljspeech/tts3)|[fastspeech2_nosil_ljspeech_ckpt_0.5.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_ljspeech_ckpt_0.5.zip)|[fastspeech2_ljspeech_static_1.1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_ljspeech_static_1.1.0.zip) </br> [fastspeech2_ljspeech_onnx_1.1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_ljspeech_onnx_1.1.0.zip) </br> [fastspeech2_ljspeech_pdlite_1.3.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_ljspeech_pdlite_1.3.0.zip)|145MB|
 FastSpeech2| VCTK |[fastspeech2-vctk](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/vctk/tts3)|[fastspeech2_vctk_ckpt_1.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_vctk_ckpt_1.2.0.zip)|[fastspeech2_vctk_static_1.1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_vctk_static_1.1.0.zip) </br> [fastspeech2_vctk_onnx_1.1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_vctk_onnx_1.1.0.zip) </br> [fastspeech2_vctk_pdlite_1.3.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_vctk_pdlite_1.3.0.zip)| 145MB|
 FastSpeech2| ZH_EN |[fastspeech2-zh_en](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/zh_en_tts/tts3)|[fastspeech2_mix_ckpt_1.2.0.zip](https://paddlespeech.bj.bcebos.com/t2s/chinse_english_mixed/models/fastspeech2_mix_ckpt_1.2.0.zip)|[fastspeech2_mix_static_0.2.0.zip](https://paddlespeech.bj.bcebos.com/t2s/chinse_english_mixed/models/fastspeech2_mix_static_0.2.0.zip) </br> [fastspeech2_mix_onnx_0.2.0.zip](https://paddlespeech.bj.bcebos.com/t2s/chinse_english_mixed/models/fastspeech2_mix_onnx_0.2.0.zip) | 145MB|
-FastSpeech2| Male ||[fastspeech2_male_ckpt_1.3.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_ckpt_1.3.0.zip)| | |
+FastSpeech2| male-zh ||[fastspeech2_male_zh_ckpt_1.4.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_zh_ckpt_1.4.0.zip)|[fastspeech2_male_zh_static_1.4.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_zh_static_1.4.0.zip) </br> [fastspeech2_male_zh_onnx_1.4.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_zh_onnx_1.4.0.zip) |146MB|
+FastSpeech2| male-en ||[fastspeech2_male_en_ckpt_1.4.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_en_ckpt_1.4.0.zip)|[fastspeech2_male_en_static_1.4.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_en_static_1.4.0.zip) </br> [fastspeech2_male_en_onnx_1.4.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_en_onnx_1.4.0.zip) |145MB|
+FastSpeech2| male-mix ||[fastspeech2_male_mix_ckpt_1.4.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_mix_ckpt_1.4.0.zip)|[fastspeech2_male_mix_static_1.4.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_mix_static_1.4.0.zip) </br> [fastspeech2_male_mix_onnx_1.4.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_mix_onnx_1.4.0.zip) |146MB|
 
 ### Vocoders
 Model Type | Dataset| Example Link | Pretrained Models| Static / ONNX / Paddle-Lite Models|Size (static)
@@ -78,7 +80,8 @@ HiFiGAN | LJSpeech |[HiFiGAN-ljspeech](https://github.com/PaddlePaddle/PaddleSpe
 HiFiGAN | AISHELL-3 |[HiFiGAN-aishell3](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/aishell3/voc5)|[hifigan_aishell3_ckpt_0.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_aishell3_ckpt_0.2.0.zip)|[hifigan_aishell3_static_1.1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_aishell3_static_1.1.0.zip) </br> [hifigan_aishell3_onnx_1.1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_aishell3_onnx_1.1.0.zip) </br> [hifigan_aishell3_pdlite_1.3.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_aishell3_pdlite_1.3.0.zip)|46MB|
 HiFiGAN | VCTK |[HiFiGAN-vctk](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/vctk/voc5)|[hifigan_vctk_ckpt_0.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_vctk_ckpt_0.2.0.zip)|[hifigan_vctk_static_1.1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_vctk_static_1.1.0.zip) </br> [hifigan_vctk_onnx_1.1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_vctk_onnx_1.1.0.zip) </br> [hifigan_vctk_pdlite_1.3.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_vctk_pdlite_1.3.0.zip)|46MB|
 WaveRNN | CSMSC |[WaveRNN-csmsc](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/csmsc/voc6)|[wavernn_csmsc_ckpt_0.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/wavernn/wavernn_csmsc_ckpt_0.2.0.zip)|[wavernn_csmsc_static_0.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/wavernn/wavernn_csmsc_static_0.2.0.zip)|18MB|
-Parallel WaveGAN| Male ||[pwg_male_ckpt_1.3.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwg_male_ckpt_1.3.0.zip)|||
+Parallel WaveGAN| Male ||[pwg_male_ckpt_1.4.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwg_male_ckpt_1.4.0.zip)|[pwgan_male_static_1.4.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwgan_male_static_1.4.0.zip) </br> [pwgan_male_onnx_1.4.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwgan_male_onnx_1.4.0.zip)|4.8M|
+HiFiGAN| Male ||[hifigan_male_ckpt_1.4.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_male_ckpt_1.4.0.zip)|[hifigan_male_static_1.4.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_male_static_1.4.0.zip) </br> [hifigan_male_onnx_1.4.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_male_onnx_1.4.0.zip)|46M|
 
 
 ### Voice Cloning
diff --git a/paddlespeech/cli/tts/infer.py b/paddlespeech/cli/tts/infer.py
index 5515ade26..e95c85744 100644
--- a/paddlespeech/cli/tts/infer.py
+++ b/paddlespeech/cli/tts/infer.py
@@ -39,10 +39,24 @@ from paddlespeech.t2s.utils import str2bool
 
 __all__ = ['TTSExecutor']
 ONNX_SUPPORT_SET = {
-    'speedyspeech_csmsc', 'fastspeech2_csmsc', 'fastspeech2_ljspeech',
-    'fastspeech2_aishell3', 'fastspeech2_vctk', 'pwgan_csmsc', 'pwgan_ljspeech',
-    'pwgan_aishell3', 'pwgan_vctk', 'mb_melgan_csmsc', 'hifigan_csmsc',
-    'hifigan_ljspeech', 'hifigan_aishell3', 'hifigan_vctk'
+    'speedyspeech_csmsc',
+    'fastspeech2_csmsc',
+    'fastspeech2_ljspeech',
+    'fastspeech2_aishell3',
+    'fastspeech2_vctk',
+    'fastspeech2_male',
+    'fastspeech2_mix',
+    'pwgan_csmsc',
+    'pwgan_ljspeech',
+    'pwgan_aishell3',
+    'pwgan_vctk',
+    'pwgan_male',
+    'mb_melgan_csmsc',
+    'hifigan_csmsc',
+    'hifigan_ljspeech',
+    'hifigan_aishell3',
+    'hifigan_vctk',
+    'hifigan_male',
 }
 
 
@@ -124,6 +138,7 @@ class TTSExecutor(BaseExecutor):
                 'hifigan_vctk',
                 'wavernn_csmsc',
                 'pwgan_male',
+                'hifigan_male',
             ],
             help='Choose vocoder type of tts task.')
 
@@ -259,7 +274,11 @@ class TTSExecutor(BaseExecutor):
         voc_lang = lang
         # When speaker is 174 (csmsc), use csmsc's vocoder is better than aishell3's
         if lang == 'mix':
-            voc_lang = 'zh'
+            voc_dataset = voc[voc.rindex('_') + 1:]
+            if voc_dataset in {"ljspeech", "vctk"}:
+                voc_lang = 'en'
+            else:
+                voc_lang = 'zh'
         voc_tag = voc + '-' + voc_lang
         self.task_resource.set_task_model(
             model_tag=voc_tag,
@@ -388,9 +407,12 @@ class TTSExecutor(BaseExecutor):
         else:
             use_pretrained_voc = False
         voc_lang = lang
-        # we must use ljspeech's voc for mix am now!
         if lang == 'mix':
-            voc_lang = 'en'
+            voc_dataset = voc[voc.rindex('_') + 1:]
+            if voc_dataset in {"ljspeech", "vctk"}:
+                voc_lang = 'en'
+            else:
+                voc_lang = 'zh'
         voc_tag = voc + '_onnx' + '-' + voc_lang
         self.task_resource.set_task_model(
             model_tag=voc_tag,
@@ -500,10 +522,11 @@ class TTSExecutor(BaseExecutor):
             text=text,
             merge_sentences=merge_sentences,
             get_tone_ids=get_tone_ids,
-            lang=lang,
-            to_tensor=False)
+            lang=lang, )
         self.frontend_time = time.time() - frontend_st
         phone_ids = frontend_dict['phone_ids']
+        # onnx need numpy data as input 
+        phone_ids = [phone_id.numpy() for phone_id in phone_ids]
         self.am_time = 0
         self.voc_time = 0
         flags = 0
@@ -512,7 +535,7 @@ class TTSExecutor(BaseExecutor):
             part_phone_ids = phone_ids[i]
             if am_name == 'fastspeech2':
                 am_input_feed.update({'text': part_phone_ids})
-                if am_dataset in {"aishell3", "vctk"}:
+                if am_dataset in {"aishell3", "vctk", "mix"}:
                     # NOTE: 'spk_id' should be List[int] rather than int here!!
                     am_input_feed.update({'spk_id': [spk_id]})
             elif am_name == 'speedyspeech':
diff --git a/paddlespeech/resource/pretrained_models.py b/paddlespeech/resource/pretrained_models.py
index ff0b30f6d..82c7776eb 100644
--- a/paddlespeech/resource/pretrained_models.py
+++ b/paddlespeech/resource/pretrained_models.py
@@ -994,9 +994,9 @@ tts_dynamic_pretrained_models = {
     "fastspeech2_male-zh": {
         '1.0': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_ckpt_1.3.0.zip',
+            'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_zh_ckpt_1.4.0.zip',
             'md5':
-            'a4b1a2f667b878ec8f67375357b04282',
+            '43a9f4bc48a91f5a6f53017474e6c788',
             'config':
             'default.yaml',
             'ckpt':
@@ -1007,6 +1007,38 @@ tts_dynamic_pretrained_models = {
             'phone_id_map.txt',
         },
     },
+    "fastspeech2_male-en": {
+        '1.0': {
+            'url':
+            'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_en_ckpt_1.4.0.zip',
+            'md5':
+            'cc9f44f1f20a8173f63e2d1d41ef1a9c',
+            'config':
+            'default.yaml',
+            'ckpt':
+            'snapshot_iter_100000.pdz',
+            'speech_stats':
+            'speech_stats.npy',
+            'phones_dict':
+            'phone_id_map.txt',
+        },
+    },
+    "fastspeech2_male-mix": {
+        '1.0': {
+            'url':
+            'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_mix_ckpt_1.4.0.zip',
+            'md5':
+            '6d48ad60ef0ab2cee89a5d8cfd93dd86',
+            'config':
+            'default.yaml',
+            'ckpt':
+            'snapshot_iter_177000.pdz',
+            'speech_stats':
+            'speech_stats.npy',
+            'phones_dict':
+            'phone_id_map.txt',
+        },
+    },
     # tacotron2
     "tacotron2_csmsc-zh": {
         '1.0': {
@@ -1100,9 +1132,9 @@ tts_dynamic_pretrained_models = {
     "pwgan_male-zh": {
         '1.0': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwg_male_ckpt_1.3.0.zip',
+            'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwg_male_ckpt_1.4.0.zip',
             'md5':
-            'c98cdb889c809973f8cc764437311132',
+            'a443d6253bf9be377f27ae5972a03c65',
             'config':
             'default.yaml',
             'ckpt':
@@ -1198,6 +1230,20 @@ tts_dynamic_pretrained_models = {
             'feats_stats.npy',
         },
     },
+    "hifigan_male-zh": {
+        '1.0': {
+            'url':
+            'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_male_ckpt_1.4.0.zip',
+            'md5':
+            'a709830596e102c2b83f8adc26d41d85',
+            'config':
+            'default.yaml',
+            'ckpt':
+            'snapshot_iter_630000.pdz',
+            'speech_stats':
+            'feats_stats.npy',
+        },
+    },
     # wavernn
     "wavernn_csmsc-zh": {
         '1.0': {
@@ -1214,6 +1260,15 @@ tts_dynamic_pretrained_models = {
         },
     },
 }
+tts_dynamic_pretrained_models[
+    "fastspeech2_mix-zh"] = tts_dynamic_pretrained_models[
+        "fastspeech2_mix-en"] = tts_dynamic_pretrained_models[
+            "fastspeech2_mix-mix"]
+tts_dynamic_pretrained_models["pwgan_male-en"] = tts_dynamic_pretrained_models[
+    "pwgan_male-mix"] = tts_dynamic_pretrained_models["pwgan_male-zh"]
+tts_dynamic_pretrained_models[
+    "hifigan_male-en"] = tts_dynamic_pretrained_models[
+        "hifigan_male-mix"] = tts_dynamic_pretrained_models["hifigan_male-zh"]
 
 tts_static_pretrained_models = {
     # speedyspeech
@@ -1304,6 +1359,88 @@ tts_static_pretrained_models = {
             24000,
         },
     },
+    "fastspeech2_mix-mix": {
+        '1.0': {
+            'url':
+            'https://paddlespeech.bj.bcebos.com/t2s/chinse_english_mixed/models/fastspeech2_csmscljspeech_add-zhen_static.zip',
+            'md5':
+            'b5001f66cccafdde07707e1b6269fa58',
+            'model':
+            'fastspeech2_mix.pdmodel',
+            'params':
+            'fastspeech2_mix.pdiparams',
+            'phones_dict':
+            'phone_id_map.txt',
+            'speaker_dict':
+            'speaker_id_map.txt',
+            'sample_rate':
+            24000,
+        },
+        '2.0': {
+            'url':
+            'https://paddlespeech.bj.bcebos.com/t2s/chinse_english_mixed/models/fastspeech2_mix_static_0.2.0.zip',
+            'md5':
+            'c6dd138fab3ba261299c0b2efee51d5a',
+            'model':
+            'fastspeech2_mix.pdmodel',
+            'params':
+            'fastspeech2_mix.pdiparams',
+            'phones_dict':
+            'phone_id_map.txt',
+            'speaker_dict':
+            'speaker_id_map.txt',
+            'sample_rate':
+            24000,
+        },
+    },
+    "fastspeech2_male-zh": {
+        '1.0': {
+            'url':
+            'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_zh_static_1.4.0.zip',
+            'md5':
+            '9b7218829e7fa01aa33dbb2c5f6ef20f',
+            'model':
+            'fastspeech2_male-zh.pdmodel',
+            'params':
+            'fastspeech2_male-zh.pdiparams',
+            'phones_dict':
+            'phone_id_map.txt',
+            'sample_rate':
+            24000,
+        },
+    },
+    "fastspeech2_male-en": {
+        '1.0': {
+            'url':
+            'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_en_static_1.4.0.zip',
+            'md5':
+            '33cea19b6821b371d242969ffd8b6cbf',
+            'model':
+            'fastspeech2_male-en.pdmodel',
+            'params':
+            'fastspeech2_male-en.pdiparams',
+            'phones_dict':
+            'phone_id_map.txt',
+            'sample_rate':
+            24000,
+        },
+    },
+    "fastspeech2_male-mix": {
+        '1.0': {
+            'url':
+            'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_mix_static_1.4.0.zip',
+            'md5':
+            '66585b04c0ced72f3cb82ee85b814d80',
+            'model':
+            'fastspeech2_male-mix.pdmodel',
+            'params':
+            'fastspeech2_male-mix.pdiparams',
+            'phones_dict':
+            'phone_id_map.txt',
+            'sample_rate':
+            24000,
+        },
+    },
     # pwgan
     "pwgan_csmsc-zh": {
         '1.0': {
@@ -1361,6 +1498,20 @@ tts_static_pretrained_models = {
             24000,
         },
     },
+    "pwgan_male-zh": {
+        '1.0': {
+            'url':
+            'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwgan_male_static_1.4.0.zip',
+            'md5':
+            '52a480ad35694b96603e0a92e9fb3f95',
+            'model':
+            'pwgan_male.pdmodel',
+            'params':
+            'pwgan_male.pdiparams',
+            'sample_rate':
+            24000,
+        },
+    },
     # mb_melgan
     "mb_melgan_csmsc-zh": {
         '1.0': {
@@ -1433,8 +1584,31 @@ tts_static_pretrained_models = {
             24000,
         },
     },
+    "hifigan_male-zh": {
+        '1.0': {
+            'url':
+            'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_male_static_1.4.0.zip',
+            'md5':
+            '9011fa2738b501e909d1a61054bed29b',
+            'model':
+            'hifigan_male.pdmodel',
+            'params':
+            'hifigan_male.pdiparams',
+            'sample_rate':
+            24000,
+        },
+    },
 }
 
+tts_static_pretrained_models[
+    "fastspeech2_mix-zh"] = tts_static_pretrained_models[
+        "fastspeech2_mix-en"] = tts_static_pretrained_models[
+            "fastspeech2_mix-mix"]
+tts_static_pretrained_models["pwgan_male-en"] = tts_static_pretrained_models[
+    "pwgan_male-mix"] = tts_static_pretrained_models["pwgan_male-zh"]
+tts_static_pretrained_models["hifigan_male-en"] = tts_static_pretrained_models[
+    "hifigan_male-mix"] = tts_static_pretrained_models["hifigan_male-zh"]
+
 tts_onnx_pretrained_models = {
     # speedyspeech
     "speedyspeech_csmsc_onnx-zh": {
@@ -1533,6 +1707,78 @@ tts_onnx_pretrained_models = {
             24000,
         },
     },
+    "fastspeech2_mix_onnx-mix": {
+        '1.0': {
+            'url':
+            'https://paddlespeech.bj.bcebos.com/t2s/chinse_english_mixed/models/fastspeech2_csmscljspeech_add-zhen_onnx.zip',
+            'md5':
+            '73052520202957920cf54700980933d0',
+            'ckpt':
+            'fastspeech2_mix.onnx',
+            'phones_dict':
+            'phone_id_map.txt',
+            'speaker_dict':
+            'speaker_id_map.txt',
+            'sample_rate':
+            24000,
+        },
+        '2.0': {
+            'url':
+            'https://paddlespeech.bj.bcebos.com/t2s/chinse_english_mixed/models/fastspeech2_mix_onnx_0.2.0.zip',
+            'md5':
+            '43b8ca5f85709c503777f808eb02a39e',
+            'ckpt':
+            'fastspeech2_mix.onnx',
+            'phones_dict':
+            'phone_id_map.txt',
+            'speaker_dict':
+            'speaker_id_map.txt',
+            'sample_rate':
+            24000,
+        },
+    },
+    "fastspeech2_male_onnx-zh": {
+        '1.0': {
+            'url':
+            'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_zh_onnx_1.4.0.zip',
+            'md5':
+            '46c66f5ab86f4fcb493d899d9901c863',
+            'ckpt':
+            'fastspeech2_male-zh.onnx',
+            'phones_dict':
+            'phone_id_map.txt',
+            'sample_rate':
+            24000,
+        },
+    },
+    "fastspeech2_male_onnx-en": {
+        '1.0': {
+            'url':
+            'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_en_onnx_1.4.0.zip',
+            'md5':
+            '401fb5cc31fdb25e22e901c9acba79c8',
+            'ckpt':
+            'fastspeech2_male-en.onnx',
+            'phones_dict':
+            'phone_id_map.txt',
+            'sample_rate':
+            24000,
+        },
+    },
+    "fastspeech2_male_onnx-mix": {
+        '1.0': {
+            'url':
+            'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_mix_onnx_1.4.0.zip',
+            'md5':
+            '07e51c5991c529b78603034547e9d0fa',
+            'ckpt':
+            'fastspeech2_male-mix.onnx',
+            'phones_dict':
+            'phone_id_map.txt',
+            'sample_rate':
+            24000,
+        },
+    },
     # pwgan
     "pwgan_csmsc_onnx-zh": {
         '1.0': {
@@ -1582,6 +1828,18 @@ tts_onnx_pretrained_models = {
             24000,
         },
     },
+    "pwgan_male_onnx-zh": {
+        '1.0': {
+            'url':
+            'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwgan_male_onnx_1.4.0.zip',
+            'md5':
+            '13163fd1326f555650dc7141d31767c3',
+            'ckpt':
+            'pwgan_male.onnx',
+            'sample_rate':
+            24000,
+        },
+    },
     # mb_melgan
     "mb_melgan_csmsc_onnx-zh": {
         '1.0': {
@@ -1644,8 +1902,30 @@ tts_onnx_pretrained_models = {
             24000,
         },
     },
+    "hifigan_male_onnx-zh": {
+        '1.0': {
+            'url':
+            'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_male_onnx_1.4.0.zip',
+            'md5':
+            'ec6b35417b1fe811d3b1641d4b527769',
+            'ckpt':
+            'hifigan_male.onnx',
+            'sample_rate':
+            24000,
+        },
+    },
 }
 
+tts_onnx_pretrained_models[
+    "fastspeech2_mix_onnx-zh"] = tts_onnx_pretrained_models[
+        "fastspeech2_mix_onnx-en"] = tts_onnx_pretrained_models[
+            "fastspeech2_mix_onnx-mix"]
+tts_onnx_pretrained_models["pwgan_male_onnx-en"] = tts_onnx_pretrained_models[
+    "pwgan_male_onnx-mix"] = tts_onnx_pretrained_models["pwgan_male_onnx-zh"]
+tts_onnx_pretrained_models["hifigan_male_onnx-en"] = tts_onnx_pretrained_models[
+    "hifigan_male_onnx-mix"] = tts_onnx_pretrained_models[
+        "hifigan_male_onnx-zh"]
+
 # ---------------------------------
 # ------------ Vector -------------
 # ---------------------------------
diff --git a/paddlespeech/t2s/exps/inference.py b/paddlespeech/t2s/exps/inference.py
index 56dd7838a..d5c262243 100644
--- a/paddlespeech/t2s/exps/inference.py
+++ b/paddlespeech/t2s/exps/inference.py
@@ -42,6 +42,9 @@ def parse_args():
             'fastspeech2_vctk',
             'tacotron2_csmsc',
             'fastspeech2_mix',
+            'fastspeech2_male-zh',
+            'fastspeech2_male-en',
+            'fastspeech2_male-mix',
         ],
         help='Choose acoustic model type of tts task.')
     parser.add_argument(
@@ -71,6 +74,8 @@ def parse_args():
             'hifigan_ljspeech',
             'hifigan_vctk',
             'wavernn_csmsc',
+            'pwgan_male',
+            'hifigan_male',
         ],
         help='Choose vocoder type of tts task.')
     # other
diff --git a/paddlespeech/t2s/exps/ort_predict_e2e.py b/paddlespeech/t2s/exps/ort_predict_e2e.py
index 75284f7bb..91aa07e14 100644
--- a/paddlespeech/t2s/exps/ort_predict_e2e.py
+++ b/paddlespeech/t2s/exps/ort_predict_e2e.py
@@ -156,6 +156,9 @@ def parse_args():
             'fastspeech2_vctk',
             'speedyspeech_csmsc',
             'fastspeech2_mix',
+            'fastspeech2_male-zh',
+            'fastspeech2_male-en',
+            'fastspeech2_male-mix',
         ],
         help='Choose acoustic model type of tts task.')
     parser.add_argument(
@@ -183,6 +186,8 @@ def parse_args():
             'hifigan_ljspeech',
             'hifigan_vctk',
             'mb_melgan_csmsc',
+            'pwgan_male',
+            'hifigan_male',
         ],
         help='Choose vocoder type of tts task.')
     # other
diff --git a/paddlespeech/t2s/exps/synthesize_e2e.py b/paddlespeech/t2s/exps/synthesize_e2e.py
index 3b87d9e16..db94a6e53 100644
--- a/paddlespeech/t2s/exps/synthesize_e2e.py
+++ b/paddlespeech/t2s/exps/synthesize_e2e.py
@@ -165,10 +165,19 @@ def parse_args():
         type=str,
         default='fastspeech2_csmsc',
         choices=[
-            'speedyspeech_csmsc', 'speedyspeech_aishell3', 'fastspeech2_csmsc',
-            'fastspeech2_ljspeech', 'fastspeech2_aishell3', 'fastspeech2_vctk',
-            'tacotron2_csmsc', 'tacotron2_ljspeech', 'fastspeech2_mix',
-            'fastspeech2_canton'
+            'speedyspeech_csmsc',
+            'speedyspeech_aishell3',
+            'fastspeech2_csmsc',
+            'fastspeech2_ljspeech',
+            'fastspeech2_aishell3',
+            'fastspeech2_vctk',
+            'tacotron2_csmsc',
+            'tacotron2_ljspeech',
+            'fastspeech2_mix',
+            'fastspeech2_canton',
+            'fastspeech2_male-zh',
+            'fastspeech2_male-en',
+            'fastspeech2_male-mix',
         ],
         help='Choose acoustic model type of tts task.')
     parser.add_argument(
@@ -212,6 +221,8 @@ def parse_args():
             'hifigan_aishell3',
             'hifigan_vctk',
             'wavernn_csmsc',
+            'pwgan_male',
+            'hifigan_male',
         ],
         help='Choose vocoder type of tts task.')
     parser.add_argument(